#!/usr/bin/python # -*- coding: utf-8 -*- import cgitb cgitb.enable() import sys, os, cgi, json from HitEx.matching_set2 import MatchingSet from partial_tree_kernel import calculate_similarity #activate_this = os.path.join(os.path.dirname(__file__), 'venv/bin/activate_this.py') #execfile(activate_this, dict(__file__=activate_this)) def create_mset_json(parameters, criteria): ms = MatchingSet() ms.initialize(parameters, criteria) ms.create_set() url = ms.get_url() j = ms.to_json() return (ms, j, url) def main(): """Main CGI handler.""" # Print JSON header print_header() # Convert form fields to regular dictionary form = dict((field, cgi.FieldStorage().getvalue(field)) for field in cgi.FieldStorage().keys()) default_parameters = { #"query_type": "lemma", # wordform or cqp or lemma #"query_w" : u'bröd', # u"huset", [deprel = "SS" & lemma contains "språk"].decode("utf-8") #"query_w" : '[ref = "01" & pos = "NN" & lemma contains "dörr"]'.decode("utf-8"), #"query_w" :'[msd = "VB.INF.AKT" &]'.decode("utf-8"), "query_pos" : "any", #"corpus_list":["ROM99","GP2012","LASBART"], #randomly pick one of them? "BLOGGMIX2012" "corpus_list":["rom99","gp2010","gp2011","gp2012","gp2013","gp2d","attasidor","lasbart","suc3","talbanken"], #"wikipedia-sv" "sweachum","sweacsam" "max_kwics": 300, # nr KWICs to process (limited for efficiency reasons) "maxhit": 10, # maximum number of matches to return "target_edge" : "end", # to which edge the keyword should be close to "proportion" : 50, # within which percentage of the sent the keyword should appear "target_cefr" : "B1", "voc_thr" : 0, # percentage of words above the target CEFR level "min_len" : 6, "max_len" : 20, "non_alpha_thr": 30, "non_lemmatized_thr": 30, "lex_to_func_thr": "", #0.8, "sensitive_voc_cats": ["all"], # ["sex", "violence", "other", "religion", "secretion"], "preserve_bad":True} default_criteria = { "well_formedness":{"root":"filter", "sent_tokenization":"filter", "elliptic":"filter", "non_alpha":"filter", "non_lemmatized":"filter"}, "isolability":{"struct_conn":"filter", "yn_answer":"filter", "anaphora-PN":"filter", "anaphora-AB":"filter"}, "readability":"filter", "typicality": "ranker", "sensitive_voc": "filter", "other_criteria":{"length":"filter", "proper_name":"filter", "repkw":"filter", "kw_position":"", "modal_verb":"", "participle":"", # same as korp "sverb":"", # same as korp "interrogative":"filter", "neg_form":"filter", "abbrev":"filter", "direct_speech":"filter", "diff_voc_kelly":"rank", "svalex_fr":"filter", "out_of_svalex":"filter"}} # Get input values + analyze text if form.get("command", "") == "hitex": if "query_type" in form: query_type = form["query_type"] else: json.dumps({"Error":"Missing 'query_type'"}) if "query_w" in form: word = form["query_w"] else: json.dumps({"Error":"Missing 'query_w'"}) #default_parameters["target_cefr"] = form.get("target_cefr", "any") parameters = {} criteria = {} print "started" if form["use_defaults"]: parameters = default_parameters parameters["query_type"] = query_type parameters["query_w"] = word criteria = default_criteria else: for p in default_parameters: if p in form: parameters[p] = form[p] for cr in default_criteria: if type(default_criteria[cr]) == dict: for sub_cr in default_criteria[cr]: if sub_cr in form: parameters[sub_cr] = form[sub_cr] elif cr in form: parameters[cr] = form[cr] mset, json_ms, url = create_mset_json(parameters, criteria) result = json_ms else: result = json.dumps({"Error":"MMX Unknown or missing command. Command option(s): complexity, log","code":501}) print_object(json.loads(result), form) def print_header(): """Prints the JSON header.""" print "Content-Type: application/json" print "Access-Control-Allow-Origin: *" print "Access-Control-Allow-Methods: POST, GET, OPTIONS" print "Access-Control-Allow-Headers: Content-Type" print def print_object(obj, form): """Prints an object in JSON format. The CGI form can contain optional parameters 'callback' and 'indent' which change the output format. """ callback = form.get("callback") if callback: print callback + "(", try: indent = int(form.get("indent")) print json.dumps(obj, sort_keys=True, indent=indent), except: print json.dumps(obj, separators=(",",":")) if callback: print ")", print if __name__ == "__main__": main()