#!/usr/bin/env python
import sys
import codecs
class LMF:
def __init__(self,lang):
self.lang = lang
self.lexical_entries = []
def add_lexical_entry(self,lexical_entry):
self.lexical_entries.append(lexical_entry)
def __str__(self):
return "\n".join([
'',
'',
'',
'',
' ',
'',
'',
' ' % self.lang,
"\n".join([str(e) for e in self.lexical_entries]),
'',
''])
class LexicalEntry:
def __init__(self):
self.features = []
self.lemmas = []
self.senses = []
def add_lemma(self,lemma,pos,features):
self.lemmas.append((lemma,pos,features))
def add_sense(self,sense):
self.senses.append(sense)
def add_feature(self, feature):
self.features.append(feature)
def __str__(self):
return "\n".join([
'',
'',
"\n".join([
"\n".join([
'',
'' % amp(lemma),
'' % pos,
'\n'.join([str(f) for f in features]),
''
]) for (lemma,pos,features) in self.lemmas
] + ['']),
'\n'.join([str(s) for s in self.senses]),
'\n'.join([str(f) for f in self.features] + [''])
])
class Feature:
def __init__(self,att,val):
self.att = att
self.val = val
def __str__(self):
return '' % (self.att, self.val)
class Sense:
def __init__(self,sense):
self.sense = sense
self.relations = []
def relation(self,target, relation_types):
self.relations.append((target,relation_types))
def __str__(self):
if len(self.relations) > 0:
return "\n".join([
'' % (self.sense),
"\n".join([
"\n".join([
'' % (target),
'\n'.join(['' % t for t in relation_types]),
''
])
for (target,relation_types) in self.relations
]),
''
])
else:
return '' % (self.sense)
def read_csv(num_of_fields):
for line in sys.stdin:
e = line[:-1].split('\t')
if len(e) == num_of_fields:
yield e
return
def amp(s):
return s.replace('&','&')
# --- SALDO ------------------------------------------------
# a sense is unique for a entry.
def saldo_data():
forms = {}
senses = set()
for (saldo,primary,secondary,lemgram,gf,pos,paradigm) in read_csv(num_of_fields=7):
if saldo in forms:
forms[saldo].append((lemgram,gf,pos,paradigm))
else:
forms[saldo] = [(lemgram,gf,pos,paradigm)]
senses.add((saldo,primary,secondary))
return (forms,senses)
def saldo():
lmf = LMF('swe')
(forms,senses) = saldo_data()
lexical_entry = LexicalEntry()
sense = Sense('PRIM..1')
lexical_entry.add_sense(sense)
lmf.add_lexical_entry(lexical_entry)
for (saldo,primary,secondary) in senses:
lexical_entry = LexicalEntry()
for (lemgram,gf,pos,paradigm) in forms[saldo]:
lexical_entry.add_lemma(gf,pos,[
Feature('lemgram',lemgram),
Feature('paradigm',paradigm)
])
sense = Sense(saldo)
sense.relation(primary,['primary'])
if secondary != 'PRIM..1':
for sec in secondary.split(' '):
sense.relation(sec,['secondary'])
lexical_entry.add_sense(sense)
lmf.add_lexical_entry(lexical_entry)
return str(lmf)
# --- SWEFN ------------------------------------------------
def swefn_data():
frames = []
for (fr,fr_st,fr_domain,fr_example,fe_core_list,fe_noncore_list,fe_cmp_list,fe_cmp_example,lus_saldo,lus_new,notes,created_by,createdate,modifdate) in read_csv(num_of_fields=14):
frames.append(fr)
return str(frames)
def swefn():
lmf = LMF('swe')
#lexical_entry = LexicalEntry()
swefnstuff = swefn_data()
return swefnstuff
# ----------------------------------------------------------
if __name__ == '__main__':
if len(sys.argv) > 1:
resource = sys.argv[1]
if resource == 'saldo':
print saldo()
elif resource == 'swesaurus':
pass
elif resource == 'saldoe':
pass
elif resource == 'swefn':
print swefn()
elif resource == 'lwt':
pass