#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys
import re
import codecs
from xml.etree.ElementTree import ElementTree
from xml.etree.ElementTree import fromstring
class HTML:
def __init__(self):
self.entries = []
def __str__(self):
return "\n".join([
'',
'',
'
',
'Ett svenskt konstruktikon, utvecklingsversion',
'',
'',
'',
'',
"\n".join(["%s" % (e) for e in self.entries]),
'',
''])#.encode('utf-8')
def add_entry(self, entry):
self.entries.append(entry)
class KonstruktikonEntry:
def __init__(self):
self.typee = ""
self.cat = ""
self.inheritance = ""
self.evokes = ""
self.definition = None
self.structure = ""
self.cee = ""
self.coll = ""
self.construction_elements_internal = []
self.construction_elements_external = []
self.examples = []
self.comment = None
self.reference = ""
self.id = ""
def add_example(self, contents):
self.examples.append(Example(contents))
def set_definition(self, contents):
self.definition = Example(contents)
def set_comment(self, contents):
self.comment = Example(contents)
def add_internal_construction_element(self, tagname, contents):
self.construction_elements_internal.append(ConstructionElement(tagname, contents))
def add_external_construction_element(self, tagname, contents):
self.construction_elements_external.append(ConstructionElement(tagname, contents))
def __str__(self):
examples = ""
if len(self.examples) != 0:
#print self.examples[1]
examples = 'examples | |
' % ("".join( [ ('%s' % (example)) for example in self.examples]))
internal = ""
if len(self.construction_elements_internal) != 0:
internal = 'internal construction elements | |
' % ("".join( [ ('%s' % (elem)) for elem in self.construction_elements_internal]))
external = ""
if len(self.construction_elements_external) != 0:
external = 'external construction elements | |
' % ("".join( [ ('%s' % (elem)) for elem in self.construction_elements_external]))
s = "\n".join([
'' % (self.id, self.id) ,
'' % (self.id),
cond("type", self.typee),
cond("category", self.cat),
cond("evokes", self.evokes),
'definition | %s |
' % (self.definition),
cond("structure", self.structure),
linked_cond("inheritance", self.inheritance),
cond("cee", self.cee),
cond("coll", self.coll),
internal,
external,
examples,
'comment | %s |
' % (self.comment),
cond("reference", self.reference),
'
'])
return s#.encode('utf-8')
def linked_cond(label, item):
if(item != "" and item != None):
regexp = re.compile(r'\w*\.\.\d+', re.UNICODE)
item = re.sub(regexp, sensify, item)
return '%s | %s |
' % (label, item, item)
else:
return ''
def cond(label, item):
if(item != "" and item != None):
regexp = re.compile(r'\w*\.\.\d+', re.UNICODE)
item = re.sub(regexp, sensify, item)
return '%s | %s |
' % (label, item)
else:
return ''
def sensify(matchobj):
sense = matchobj.group(0)
parts = sense.split("..")
if len(parts) == 2:
return '%s%s' % (parts[0], parts[1])
else:
return sense
class ConstructionElement:
def __init__(self, tagname, contents):
self.tagname = tagname
self.contents = contents # [("name", "Activity"), ("cat", "vb")]
def __str__(self):
out = []
#print self.contents
regexp = re.compile(r'\w*\.\.\d+', re.UNICODE)
for (att, value) in self.contents:
out.append(att + "=" + re.sub(regexp, sensify, value))
return self.tagname + ": " + " ".join(out)
class Example:
def __init__(self, contents):
self.contents = contents
def __str__(self):
out = []
for p in self.contents:
if len(p) == 2:
if p[0] == "/freetext":
# Freetext
out.append(p[1])
else:
# Node
out.append('[' + p[1] + ']' + p[0].get("name", "") + '')
elif len(p) == 3:
# Branch
out.append('[')
#print p[2]
for item in p[2]:
if item[0] == "/freetext":
# Freetext
out.append(item[1])
else:
# Node
out.append('[' + item[1] + ']' + item[0].get("name", "") + '')
out.append(']' + p[0].get("name", "") + '')
outstr = " ".join(out)
return outstr#.encode('utf-8')
#class Constructicon_Element:
#def styleForNode(node):
#############################
# INPUT #
#############################
def handle_example(example):
example_parts = []
if example.text != None:
first_text = example.text.strip()
else:
first_text = ""
if first_text != "":
example_parts.append(("/freetext", first_text))
for part in example:
inners = part.findall("e")
if inners == None or len(inners) == 0:
example_parts.append(("/leaf", part.attrib, part.text.strip() ))
else:
example_parts.append(("/branch", part.attrib, handle_example(part) ))
thetail = part.tail
if thetail != None:
thetail = thetail.strip()
if thetail != "":
example_parts.append(("/freetext", thetail))
return example_parts
def take(entry, tagname, resort):
e2 = entry.find(tagname)
if e2 != None:
t = e2.text
if(t == None):
return resort
else:
return t.strip()
else:
return resort
def list_to_dict(alist):
dict = {}
for item in alist:
dict[item[0]] = item[1]
return dict
if __name__ == '__main__':
lines = []
#char_stream = codecs.getreader("utf-8")(sys.stdin)
#for line in char_stream:
#for line in sys.stdin:
# lines.append(line)
#intext = "\n".join(lines)
#tree = ElementTree()
#with codecs.open('constructicon2.xml', encoding='utf-8') as f:
# intext = f.read()
#tree = fromstring(intext)
#tree = fromstring(intext.encode('utf-8'))
tree = ElementTree()
tree.parse("constructiconRec.xml")
html = HTML()
entries = tree.findall("entry")
for entry in entries:
e = KonstruktikonEntry()
#print entry.attrib
e.id = entry.get("{http://www.w3.org/XML/1998/namespace}id")
#print take(entry, "type", "")
e.typee = take(entry, "type", "")
e.cat = take(entry, "cat", "")
e.inheritance = take(entry, "inheritance", "")
e.evokes = take(entry, "evokes", "")
definition = entry.find("definition")
def_parts = []
if definition != None:
first_text = definition.text.strip()
if first_text != "":
def_parts.append(("/freetext", first_text))
for atom in definition:
if atom.text != None:
def_parts.append((list_to_dict(atom.items()), atom.text.strip()))
thetail = atom.tail
if thetail != None:
thetail = thetail.strip()
if thetail != "":
def_parts.append(("/freetext", thetail))
#print def_parts
#print
e.set_definition(def_parts)
e.structure = take(entry, "structure", "")
e.cee = take(entry, "cee", "")
e.coll = take(entry, "coll", "")
c_e = entry.find("construction_elements")
if c_e != None:
internal = c_e.find("internal")
if internal != None:
for el in internal:
e.add_internal_construction_element(el.tag, el.items())
external = c_e.find("external")
if external != None:
for el in external:
e.add_external_construction_element(el.tag, el.items())
examples = entry.find("examples")
for example in examples:
if example != None:
example_parts = handle_example(example)
print example_parts
print "---"
e.add_example(example_parts)
comment = entry.find("comment")
com_parts = []
if comment != None:
if comment.text != None:
first_text = comment.text.strip()
if first_text != "":
com_parts.append(("/freetext", first_text))
for atom in comment:
if atom.text != None:
com_parts.append((list_to_dict(atom.items()), atom.text.strip()))
thetail = atom.tail
if thetail != None:
thetail = thetail.strip()
if thetail != "":
com_parts.append(("/freetext", thetail))
e.set_comment(com_parts)
#e.comment = take(entry, "comment", "")
e.reference = take(entry, "reference", "")
html.add_entry(e)
#sys.stdout = codecs.getwriter('utf-8')(sys.stdout)
a = unicode(html)
print a.encode("utf-8")
def old_handle_example(example): # Would be nicer if this were recursive but it might be unnecessary so we won't waste time changing it
first_text = example.text.strip()
if first_text != "":
example_parts.append(("/freetext", first_text))
for atom in example:
#if atom.text != None:
subatoms = atom.findall("e")
if subatoms == None or len(subatoms) == 0:
example_parts.append((list_to_dict(atom.items()), atom.text.strip()))
else:
subex = []
for subatom in subatoms:
subex.append((list_to_dict(subatom.items()), subatom.text.strip()))
extratext = subatom.tail
if extratext != None:
extratext = extratext.strip()
if extratext != "":
subex.append(("/freetext", extratext))
example_parts.append((list_to_dict(atom.items()), "/branch", subex))
thetail = atom.tail
if thetail != None:
thetail = thetail.strip()
if thetail != "":
example_parts.append(("/freetext", thetail))