''' Created on Jul 5, 2017 @author: David ''' import codecs index_file = "f:/parll data/data/compound-indexes.txt" infile = "f:/parll data/data/compound_tfidf.dist" outfile = "f:/parll data/data/compound-utile.txt" index = 0 indexes = [] with open(index_file, "r") as f: for l in f: if not l.strip(): continue indexes.append(int(l.rstrip())) out = codecs.open(outfile, "w", encoding="utf-8") with codecs.open(infile, "r", encoding="utf-8") as f: for l in f: if index < indexes[0]: index += 1 continue print(index) elem = l.split("\t")[0] indexes.pop(0) out.write("{}\n".format(elem)) out.close()