def read_tagged_sentence(f): line = f.readline() if not line: return None sentence = [] while line and (line != "\n"): line = line.strip().decode("utf-8") word, tag = line.split("\t", 2) sentence.append( (word, tag) ) line = f.readline() return sentence def read_tagged_corpus(filename): sentences = [] with open(filename) as f: sentence = read_tagged_sentence(f) while sentence: sentences.append(sentence) sentence = read_tagged_sentence(f) return sentences