source("C:\\Sasha\\D\\DGU\\Repos\\Swetagging2021\\regression.r") utils:::menuInstallPkgs() source("C:\\Sasha\\D\\DGU\\Repos\\Swetagging2021\\regression.r") utils:::menuInstallPkgs() library("effectsize") ?rank_biserial A <- c(48, 48, 77, 86, 85, 85) B <- c(14, 34, 34, 77) rank_biserial(A, B) rank_biserial(A, B)$value rank_biserial(A, B)$r ?corr.test ?corr ?cor.test ?cor rep(c(1,2,3,4,5,6),each=2) seq(0.1,1.2,by=0.1) rep(c(1,2,3),each=4) seq(0.1,1.2,by=0.1) utils:::menuInstallPkgs() local({pkg <- select.list(sort(.packages(all.available = TRUE)),graphics=TRUE) if(nchar(pkg)) library(pkg, character.only=TRUE)}) Kendall(c(1,3,5,5),c(1,5,6,7)) Kendall(c(1,3,5,5),c(1,5,6,7))$tau out <- Kendall(c(1,3,5,5),c(1,5,6,7)) out out$tau summary.Kendall summary.Kendall(c(1,3,5,5),c(1,5,6,7))$tau as.numeric(out$tau) as.numeric(out$tau) as.numeric(out$tau) f <- read.csv("results.csv",sep="\t") f summary(lm(diff~splitsize*alpha)) summary(lm(diff~splitsize*alpha, data=f)) summary(lm(diff~splitsize+alpha, data=f)) summary(lm(diff~splitsize*alpha, data=f)) summary(lm(diff~alpha, data=f)) summary(lm(diff~splitsize, data=f)) summary(lm(diff~splitsize:alpha, data=f)) summary(lm(diff~splitsize*alpha, data=f)) f3 <- read.csv("results3.csv",sep="\t") f3 summary(lm(diff~splitsize*alpham, data=f3)) summary(lm(diff~splitsize*alpha, data=f)) f f3 summary(lm(diff~splitsize*alpham, data=f3)) f3 <- read.csv("results3.csv",sep="\t") summary(lm(diff~splitsize*alpham, data=f3)) ?shapiro.wilk sdsource <- read.csv("sd_source6;7;8;9;10;11.tsv",sep="\t") head(sdsource) shapiro.test(sdsource$average) shapiro.test(sdsource$X6) shapiro.test(sdsource$X7) shapiro.test(sdsource$X8) shapiro.test(sdsource$X9) shapiro.test(sdsource$X10) shapiro.test(sdsource$X111) shapiro.test(sdsource$X11) shapiro.test(sdsource$vote) library("ggpubr") utils:::menuInstallPkgs() ggdensity(sdsource$average) library("ggpubr") ggdensity(sdsource$average) ggdensity(sdsource$X1) ggdensity(sdsource$X6) ggdensity(sdsource$vote) ggdensity(sdsource$X11) ggdensity(sdsource$X10) ggdensity(sdsource$X9) ggdensity(sdsource$X8) ggdensity(sdsource$X7) sdsource3 <- read.csv("sd_source7;8;11.tsv",sep="\t") ggdensity(sdsource3$average) shapiro.test(sdsource3$average) shapiro.test(sdsource$average) sdsource5 <- read.csv("sd_source6;7;8;9;10.tsv",sep="\t") shapiro.test(sdsource5$average) ggdensity(sdsource5$average) ggqqplot(sdsource5$average) ggqqplot(sdsource3$average) ggqqplot(sdsource$average) ggqqplot(sdsource$X6) ggqqplot(sdsource$X11) ggqqplot(sdsource$X10) ggqqplot(sdsource$X8) ggqqplot(sdsource$X7) ggqqplot(sdsource$X9) sdsource7 <- read.csv("sd_source1;6;7;8;9;10;11.tsv",sep="\t") ggqqplot(sdsource$average) ggdensity(sdsource7$average) shapiro.test(sdsource7$average) f <- read.csv("results.csv",sep="\t") summary(lm(diff~splitsize*alpha, data=f)) f f <- read.csv("results.csv",sep="\t") summary(lm(diff~splitsize*alpha, data=f)) summary(lm(diff~splitsize*rho, data=f)) summary(lm(diff~splitsize*r, data=f)) head(sdsource) head(sdsource7) annot7 <- sdsource7[6:12,] annot7 annot7 <- sdsource7[,6:12] annot7 summary(anova(annot7)) annot7 <- read.csv("annot7.csv",sep="\t") annot7 res.aov <- aov(weight ~ group, data = annot7) annot7 <- read.csv("annot7.csv",sep="\t") annot7 res.aov <- aov(label ~ annotator, data = annot7) summary(res.aov) kruskal.test(label ~ annotator, data = annot7) shapiro.test(sdsource7$X9) mean16 <- c(3.50 , 3.38 , 3.75 , 3.44 , 3.56 , 3.69 , 3.69) mean_all <- c(2.7014652015,2.8713692946,2.7772675086,3.0820143885,2.8843930636,2.9183333333,2.8549382716,2.9211618257) cor.test(mean16, mean_all, method="spearman") mean_all <- c(2.7014652015,2.7772675086,3.0820143885,2.8843930636,2.9183333333,2.8549382716,2.9211618257) cor.test(mean16, mean_all, method="spearman") cor.test(mean16, mean_all, method="kendall") cor.test(mean16, mean_all, method="pearson") f <- read.csv("results.csv",sep="\t") summary(lm(diff~splitsize*alpha, data=f)) summary(lm(diff~splitsize*rho, data=f)) summary(lm(diff~splitsize*r, data=f)) wilcox.test(f$annot_r, f$ra_r_mean) rank_biserial(f$annot_r, f$ra_r_mean) f f <- read.csv("results.csv",sep="\t") f f <- read.csv("results_not_rounded.csv",sep="\t") wilcox.test(f$annot_r, f$ra_r_mean) f rank_biserial(f$annot_r, f$ra_r_mean) f1 < f[f$annot_id != 3,] f1 <- f[f$annot_id != 3,] f1 summary(lm(diff~splitsize*alpha, data=f1)) summary(lm(diff~splitsize*rho, data=f1)) summary(lm(diff~splitsize*r, data=f1)) citation(effectsize) citation("effectsize") ?rank_biserial ?citation() citation()