require 'statsample' require "rinruby" R.eval("library(\"Kendall\")") R.eval("library(\"irr\")") #test = ARGV[0] #auto = ARGV[1] def stats(input, type) if type == "hash" sent_array = input.values elsif type == "array" sent_array = input end sent_sum = 0.0 sent_array.each do |sent| sent_sum += sent end mean = sent_sum/sent_array.length sumsq = 0.0 sent_array.each do |sent| sumsq += (mean - sent)*(mean - sent) end sd = Math.sqrt(sumsq/(sent_array.length - 1)) se = sd/Math.sqrt(sent_array.length) return mean, sd, se end def readfromfile(filename) file = File.open(filename,"r:utf-8") if filename[-3..-1] == "csv" separator = "," elsif filename[-3..-1] == "tsv" separator = "\t" end array = [] file.each_line.with_index do |line, index| if index > 0 #STDERR.puts line #STDERR.puts line.strip.split(separator)[1] #break array << line.strip.split(separator)[1].to_f end end file.close return array end def readfromfile2(filename,goldindex,testindex,method) file = File.open(filename,"r:utf-8") if filename[-3..-1] == "csv" separator = "," elsif filename[-3..-1] == "tsv" separator = "\t" end test = [] gold = [] file.each_line.with_index do |line, index| if index > 0 #STDERR.puts line #STDERR.puts line.strip.split(separator)[1] #break test << line.strip.split(separator)[testindex].to_f gold << line.strip.split(separator)[goldindex].to_f end end file.close a = Daru::Vector.new(test) b = Daru::Vector.new(gold) if method == "pearson" pearson = Statsample::Bivariate::Pearson.new(a,b) rho = pearson.r elsif method == "spearman" rho = Statsample::Bivariate.spearman(a, b) elsif method == "kendall" R.assign "test", test R.assign "gold", gold R.eval "out <- Kendall(test,gold)" rho = R.pull "as.numeric(out$tau)" #p = R.pull "Kendall(test,gold)$s1" elsif method == "krippendorff" R.assign "test", test R.assign "gold", gold R.eval "m1 <- matrix(c(test,gold),nrow=2,byrow=TRUE)" rho = R.pull "kripp.alpha(m1,\"interval\")$value" end return rho end #testarray = readfromfile(test) #autoarray = readfromfile(auto) dir = ARGV[0] #dirs = ["results5_ca", "results6_ca", "results7_ca", "results8_ca", "results9_ca"] method = "krippendorff" #o2 = File.open("results_new.tsv","w:utf-8") #annot_hash2 = {1 => "a", 3 => "b", 6 => "e", 7 => "f", 8 => "g", 9 => "h", 10 => "i", 11 => "j"} #o2.puts "annot_id annot_r ra_r_mean ra_r_se diff" #dirs.each do |dir| o = File.open("#{dir}_#{method}.tsv","w:utf-8") #folds_rhos = [] #annot_rhos = {} filelist = Dir.children(dir) filelist.each do |filename| rho = readfromfile2("#{dir}\\#{filename}",3,2,method) id = filename.split(".")[0]#[-2..-1] o.puts "#{id}\t#{rho}" #if [0,1,2,3,4,5,6,7,8,9].include?(id) # folds_rhos << rho #else # annot_rhos[id] = rho #end end #annot_rhos.each_pair do |annot, rho| # annot_id = annot_hash2.key(annot) # mean = stats(folds_rhos, "array")[0] # se = stats(folds_rhos, "array")[2] # diff = mean - rho # o2.puts "#{annot_id}\t#{rho}\t#{mean}\t#{se}\t#{diff}" #end #end