#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import cjson import urllib import codecs dalin_paradigm = {} with codecs.open('dalin_paradigm.txt','r','utf-8') as f: for l in f: try: (p1,p2) = l[:-1].split('\t') dalin_paradigm[p1] = p2 except: pass # blank line seen = set() for l in sys.stdin.readlines(): try: (w,w_norm,pos,p,id,typ) = l[:-1].decode('UTF-8').split('\t') if id not in seen: seen.add(id) try: np = dalin_paradigm[p] except: np = 'prim' if len(w) > 1: # ignore letters print ('%s "%s" {id("%s")} ;' % (np,w,id)).encode('UTF-8') except: pass # blank line