require 'statsample' require "rinruby" R.eval("library(\"Kendall\")") R.eval("library(\"irr\")") #test = ARGV[0] #auto = ARGV[1] def stats(input, type) if type == "hash" sent_array = input.values elsif type == "array" sent_array = input end sent_sum = 0.0 sent_array.each do |sent| sent_sum += sent end mean = sent_sum/sent_array.length sumsq = 0.0 sent_array.each do |sent| sumsq += (mean - sent)*(mean - sent) end sd = Math.sqrt(sumsq/(sent_array.length - 1)) se = sd/Math.sqrt(sent_array.length) return mean, sd, se end def readfromfile(filename,sentindex,testindex) file = File.open(filename,"r:utf-8") if filename[-3..-1] == "csv" separator = "," elsif filename[-3..-1] == "tsv" separator = "\t" end array = [] file.each_line.with_index do |line, index| if index > 0 #STDERR.puts line #STDERR.puts line.strip.split(separator)[1] #break array << [line.strip.split(separator)[sentindex].to_f, line.strip.split(separator)[testindex].to_f] end end file.close return array end def readfromfile2(filename,goldindex,testindex,method) file = File.open(filename,"r:utf-8") if filename[-3..-1] == "csv" separator = "," elsif filename[-3..-1] == "tsv" separator = "\t" end test = [] gold = [] file.each_line.with_index do |line, index| if index > 0 #STDERR.puts line #STDERR.puts line.strip.split(separator)[1] #break test << line.strip.split(separator)[testindex].to_f gold << line.strip.split(separator)[goldindex].to_f end end file.close a = Daru::Vector.new(test) b = Daru::Vector.new(gold) if method == "pearson" pearson = Statsample::Bivariate::Pearson.new(a,b) rho = pearson.r elsif method == "spearman" rho = Statsample::Bivariate.spearman(a, b) elsif method == "kendall" R.assign "test", test R.assign "gold", gold R.eval "out <- Kendall(test,gold)" rho = R.pull "as.numeric(out$tau)" #p = R.pull "Kendall(test,gold)$s1" elsif method == "krippendorff" R.assign "test", test R.assign "gold", gold R.eval "m1 <- matrix(c(test,gold),nrow=2,byrow=TRUE)" rho = R.pull "kripp.alpha(m1,\"interval\")$value" end return rho end dirs = ["results5_ca", "results6_ca", "results7_ca", "results8_ca", "results9_ca", "results20_ca"] mode = "usual" method = "krippendorff" o2 = File.open("results_new.tsv","w:utf-8") annot_hash2 = {1 => "a", 3 => "b", 6 => "e", 7 => "f", 8 => "g", 9 => "h", 10 => "i", 11 => "j"} o2.puts "annot_id annot_r ra_r_mean ra_r_se diff\tdiff_adj" dirs.each do |dir| STDERR.puts dir if mode == "ttest" else o = File.open("#{dir}_#{method}.tsv","w:utf-8") folds_rhos = [] annot_rhos = {} filelist = Dir.children(dir) filelist.each do |filename| rho = readfromfile2("#{dir}\\#{filename}",3,2,method) if dir != "results20_ca" id = filename.split(".")[0][-1] else id = filename.split(".")[0][-2..-1] end o.puts "#{id}\t#{rho}" if ["0","1","2","3","4","5","6","7","8","9"].include?(id[0]) folds_rhos << rho else annot_rhos[id] = rho end end annot_rhos.each_pair do |annot, rho| annot_id = annot_hash2.key(annot[0]) mean = stats(folds_rhos, "array")[0] mean_adj = stats(folds_rhos[0..-2], "array")[0] se = stats(folds_rhos, "array")[2] diff = mean - rho diff_adj = mean_adj - rho o2.puts "#{annot_id}\t#{rho}\t#{mean}\t#{se}\t#{diff}\t#{diff_adj}" end end end