# -*- coding: utf-8 -*- """ Default parameter settings for the HitEx (sentence selection) web-service, tuned for detecting exercise items. """ #TO DO: add dictionary example setup default_parameters = { "query_type": "", # from GUI (wordform or cqp or lemma) "query_w" : "", # from GUI (e.g. u"huset", [deprel = "SS" & lemma contains "språk"].decode("utf-8")) "query_pos" : "", # from GUI (e.g.# "NN") "corpus_list":["rom99","flashback-resor","gp2013","gp2d","attasidor","lasbart","suc3","talbanken"], # additional options: ["attasidor","lasbart","talbanken","rom99", # "familjeliv-allmanna-fritid","flashback-mat", # "flashback-resor","wikipedia-sv" "sweachum","sweacsam","BLOGGMIX2012"] "max_kwics": 100, # nr KWICs to process (limited for efficiency reasons) "maxhit": 10, # maximum number of matches to return "target_edge" : "end", # to which edge the keyword should be close to "proportion" : 50, # within which percentage of the sent the keyword should appear "target_cefr" : "B1", # 'any' not supported - omit readability to obtain the same effect "voc_thr" : 0, # percentage of words above the target CEFR level "min_len" : 6, "max_len" : 20, "non_alpha_thr": 30, "non_lemmatized_thr": 30, "lex_to_func_thr": "", #0.8, "sensitive_voc_cats": ["all"], # Options: ["sex", "violence", "other", "religion", "secretion"], "preserve_bad":True} default_criteria = { "well_formedness":{"root":"filter", "sent_tokenization":"filter", "elliptic":"filter", "non_alpha":"filter", "non_lemmatized":"filter"}, "isolability":{"struct_conn":"filter", "yn_answer":"filter", "anaphora-PN":"filter", "anaphora-AB":"filter"}, "readability":"filter", "typicality": "ranker", "sensitive_voc": "filter", "other_criteria":{"length":"filter", "proper_name":"ranker", "repkw":"filter", "kw_position":"", "modal_verb":"", "participle":"", # same as korp "sverb":"", # same as korp "interrogative":"filter", "neg_form":"", "abbrev":"filter", "direct_speech":"filter", "diff_voc_kelly":"filter", "svalex_fr":"", "out_of_svalex":"filter"} }