# -*- coding: utf-8 -*-

"""
Default parameter settings for the HitEx (sentence selection)
web-service, tuned for detecting exercise items.
"""
#TO DO: add dictionary example setup 

default_parameters = { 
    "query_type": "", # from GUI (wordform or cqp or lemma)
    "query_w" : "",   # from GUI (e.g. u"huset", [deprel = "SS" & lemma contains "språk"].decode("utf-8"))
    "query_pos" : "", # from GUI (e.g.# "NN")
    "corpus_list":["rom99","flashback-resor","gp2013","gp2d","attasidor","lasbart","suc3","talbanken"],
    # additional options: ["attasidor","lasbart","talbanken","rom99",
    #                      "familjeliv-allmanna-fritid","flashback-mat",
    #                      "flashback-resor","wikipedia-sv" "sweachum","sweacsam","BLOGGMIX2012"]
    "max_kwics": 100,      # nr KWICs to process (limited for efficiency reasons)
    "maxhit": 10,          # maximum number of matches to return
    "target_edge" : "end", # to which edge the keyword should be close to
    "proportion" : 50,     # within which percentage of the sent the keyword should appear
    "target_cefr" : "B1",  # 'any' not supported - omit readability to obtain the same effect
    "voc_thr" : 0,         # percentage of words above the target CEFR level
    "min_len" : 6,
    "max_len" : 20, 
    "non_alpha_thr": 30,
    "non_lemmatized_thr": 30,
    "lex_to_func_thr": "", #0.8,
    "sensitive_voc_cats": ["all"], # Options: ["sex", "violence", "other", "religion", "secretion"],
    "preserve_bad":True}

default_criteria = {
    "well_formedness":{"root":"filter", "sent_tokenization":"filter", 
                    "elliptic":"filter", "non_alpha":"filter", 
                    "non_lemmatized":"filter"}, 
    "isolability":{"struct_conn":"filter", "yn_answer":"filter", 
                 "anaphora-PN":"filter", "anaphora-AB":"filter"}, 
    "readability":"filter", 
    "typicality": "ranker",
    "sensitive_voc": "filter",
    "other_criteria":{"length":"filter",
                     "proper_name":"ranker",
                     "repkw":"filter",
                     "kw_position":"",
                     "modal_verb":"",
                     "participle":"",  # same as korp
                     "sverb":"",       # same as korp
                     "interrogative":"filter",
                     "neg_form":"",
                     "abbrev":"filter",
                     "direct_speech":"filter",
                     "diff_voc_kelly":"filter",
                     "svalex_fr":"",
                     "out_of_svalex":"filter"}
                     }