from voting_classifier import predict import numpy as np fil = "siwoco-swellex-topic-bigram-compact-nh.csv" data = [] labels = [] with open(fil,"r") as f: for l in f: if not l.strip(): continue values,target = l.rstrip().rsplit(",",1) values = np.array(values.split(","), dtype="float64").reshape(1,-1) data.append(values) labels.append(target) def cmap(pred): if pred == "A1": return 0 if pred == "A2": return 1 if pred == "B1": return 2 if pred == "B2": return 3 if pred == "C1": return 4 return -1 def f1(p,r): return (2*p*r)/(p+r) def mean(l): return float(sum(l))/max(len(l),1) matrix = {} matrix["A1"] = [0,0,0,0,0] matrix["A2"] = [0,0,0,0,0] matrix["B1"] = [0,0,0,0,0] matrix["B2"] = [0,0,0,0,0] matrix["C1"] = [0,0,0,0,0] for i in range(len(data)): pred = predict(data[i]) target = labels[i] matrix[target][cmap(pred)] += 1 r_a1 = float(matrix["A1"][0]) / sum(matrix["A1"]) r_a2 = float(matrix["A2"][1]) / sum(matrix["A2"]) r_b1 = float(matrix["B1"][2]) / sum(matrix["B1"]) r_b2 = float(matrix["B2"][3]) / sum(matrix["B2"]) r_c1 = float(matrix["C1"][4]) / sum(matrix["C1"]) p_a1 = float(matrix["A1"][0]) / (matrix["A1"][0] + matrix["A2"][0] + matrix["B1"][0] + matrix["B2"][0] + matrix["C1"][0]) p_a2 = float(matrix["A2"][1]) / (matrix["A1"][1] + matrix["A2"][1] + matrix["B1"][1] + matrix["B2"][1] + matrix["C1"][1]) p_b1 = float(matrix["B1"][2]) / (matrix["A1"][2] + matrix["A2"][2] + matrix["B1"][2] + matrix["B2"][2] + matrix["C1"][2]) p_b2 = float(matrix["B2"][3]) / (matrix["A1"][3] + matrix["A2"][3] + matrix["B1"][3] + matrix["B2"][3] + matrix["C1"][3]) p_c1 = float(matrix["C1"][4]) / (matrix["A1"][4] + matrix["A2"][4] + matrix["B1"][4] + matrix["B2"][4] + matrix["C1"][4]) a1_f1 = f1(p_a1,r_a1) a2_f1 = f1(p_a2,r_a2) b1_f1 = f1(p_b1,r_b1) b2_f1 = f1(p_b2,r_b2) c1_f1 = f1(p_c1,r_c1) avg_f1 = mean([a1_f1,a2_f1,b1_f1,b2_f1,c1_f1]) print("Confusion matrix") print("\t".join([str(x) for x in matrix["A1"]])) print("\t".join([str(x) for x in matrix["A2"]])) print("\t".join([str(x) for x in matrix["B1"]])) print("\t".join([str(x) for x in matrix["B2"]])) print("\t".join([str(x) for x in matrix["C1"]])) print("*"*60) print("\tRecall\t\tPrecision\tF1") print("A1\t{} {} {}".format(r_a1,p_a1,a1_f1)) print("A2\t{} {} {}".format(r_a2,p_a2,a2_f1)) print("B1\t{} {} {}".format(r_b1,p_b1,b1_f1)) print("B2\t{} {} {}".format(r_b2,p_b2,b2_f1)) print("C1\t{} {} {}".format(r_c1,p_c1,c1_f1)) print("-"*60) print("Average F1: {}".format(avg_f1))