''' Created on Jul 3, 2017 @author: David ''' from os.path import isfile,join,splitext from os import listdir import codecs topic_dir = "siwoco/coctaill_topics_lemmatized" topic_files = [join(topic_dir,f) for f in listdir(topic_dir) if isfile(join(topic_dir,f)) and f.endswith(".list")] swefn_dir = "siwoco/topic_lists" swefn_files = [join(swefn_dir,f) for f in listdir(swefn_dir) if isfile(join(swefn_dir,f)) and f.endswith(".syn.list")] topics = [] topics_fn = [] for topic_file in topic_files: arr = [] with codecs.open(topic_file,"r",encoding="utf-8") as f: for l in f: arr.append(l.rstrip()) topics.append(arr) for swefn_file in swefn_files: arr = [] with codecs.open(swefn_file,"r",encoding="utf-8") as f: for l in f: arr.append(l.rstrip()) topics_fn.append(arr) def get_topics(word): topic_distribution = [] for topic in topics: if word in topic: topic_distribution.append(1) else: topic_distribution.append(0) return topic_distribution def get_topics_fn(word): topic_distribution = [] for topic in topics_fn: if word in topic: topic_distribution.append(1) else: topic_distribution.append(0) return topic_distribution #print(get_topics_fn("gurka")) #print(get_topics_fn("tomat")) #print(get_topics("gurka")) #print(get_topics("tomat"))