#!/usr/bin/env python import sys import codecs class LMF: def __init__(self,lang): self.lang = lang self.lexical_entries = [] def add_lexical_entry(self,lexical_entry): self.lexical_entries.append(lexical_entry) def __str__(self): return "\n".join([ '', '', '', '', ' ', '', '', ' ' % self.lang, "\n".join([str(e) for e in self.lexical_entries]), '', '']) class LexicalEntry: def __init__(self): self.features = [] self.lemmas = [] self.senses = [] def add_lemma(self,lemma,pos,features): self.lemmas.append((lemma,pos,features)) def add_sense(self,sense): self.senses.append(sense) def add_feature(self, feature): self.features.append(feature) def __str__(self): return "\n".join([ '', '', "\n".join([ "\n".join([ '', '' % amp(lemma), '' % pos, '\n'.join([str(f) for f in features]), '' ]) for (lemma,pos,features) in self.lemmas ] + ['']), '\n'.join([str(s) for s in self.senses]), '\n'.join([str(f) for f in self.features] + ['']) ]) class Feature: def __init__(self,att,val): self.att = att self.val = val def __str__(self): return '' % (self.att, self.val) class Sense: def __init__(self,sense): self.sense = sense self.relations = [] def relation(self,target, relation_types): self.relations.append((target,relation_types)) def __str__(self): if len(self.relations) > 0: return "\n".join([ '' % (self.sense), "\n".join([ "\n".join([ '' % (target), '\n'.join(['' % t for t in relation_types]), '' ]) for (target,relation_types) in self.relations ]), '' ]) else: return '' % (self.sense) def read_csv(num_of_fields): for line in sys.stdin: e = line[:-1].split('\t') if len(e) == num_of_fields: yield e return def amp(s): return s.replace('&','&') # --- SALDO ------------------------------------------------ # a sense is unique for a entry. def saldo_data(): forms = {} senses = set() for (saldo,primary,secondary,lemgram,gf,pos,paradigm) in read_csv(num_of_fields=7): if saldo in forms: forms[saldo].append((lemgram,gf,pos,paradigm)) else: forms[saldo] = [(lemgram,gf,pos,paradigm)] senses.add((saldo,primary,secondary)) return (forms,senses) def saldo(): lmf = LMF('swe') (forms,senses) = saldo_data() lexical_entry = LexicalEntry() sense = Sense('PRIM..1') lexical_entry.add_sense(sense) lmf.add_lexical_entry(lexical_entry) for (saldo,primary,secondary) in senses: lexical_entry = LexicalEntry() for (lemgram,gf,pos,paradigm) in forms[saldo]: lexical_entry.add_lemma(gf,pos,[ Feature('lemgram',lemgram), Feature('paradigm',paradigm) ]) sense = Sense(saldo) sense.relation(primary,['primary']) if secondary != 'PRIM..1': for sec in secondary.split(' '): sense.relation(sec,['secondary']) lexical_entry.add_sense(sense) lmf.add_lexical_entry(lexical_entry) return str(lmf) # --- SWEFN ------------------------------------------------ def swefn_data(): frames = [] for (fr,fr_st,fr_domain,fr_example,fe_core_list,fe_noncore_list,fe_cmp_list,fe_cmp_example,lus_saldo,lus_new,notes,created_by,createdate,modifdate) in read_csv(num_of_fields=14): frames.append(fr) return str(frames) def swefn(): lmf = LMF('swe') #lexical_entry = LexicalEntry() swefnstuff = swefn_data() return swefnstuff # ---------------------------------------------------------- if __name__ == '__main__': if len(sys.argv) > 1: resource = sys.argv[1] if resource == 'saldo': print saldo() elif resource == 'swesaurus': pass elif resource == 'saldoe': pass elif resource == 'swefn': print swefn() elif resource == 'lwt': pass