''' Created on Dec 4, 2016 @author: David ''' import codecs file = "f:/parll data/data/pos_tfidf.txt" out = "f:/parll data/data/pos_tfidf.dist" f = codecs.open(file, "r", "utf-8") o = codecs.open(out, "w", "utf-8") dist_summary = {} for line in f: if (not line): continue try: dummy,value,level,key = line.rstrip().split(" ") except ValueError: continue level = int(level[0:-1]) if (key not in dist_summary.keys()): dist_summary[key] = [0,0,0,0,0] #if (not dist_summary[key][level]): # dist_summary[key][level] = [] dist_summary[key][level] = value for key,val in dist_summary.items(): strdist = "\t".join(str(v) for v in val) o.write("{}\t{}\n".format(key,strdist)) print("Done")