#!/usr/bin/python
# -*- coding: utf-8 -*-

import cgitb
cgitb.enable()
import sys, os, cgi, json
from HitEx.matching_set import MatchingSet

#activate_this = os.path.join(os.path.dirname(__file__), 'venv/bin/activate_this.py')
#execfile(activate_this, dict(__file__=activate_this))

def create_mset_json(parameters, criteria):
    
    ms = MatchingSet(parameters, criteria)
    ms.create_set()
    url = ms.get_url()
    j = ms.to_json()
  
    return (ms, j, url)

def main():
    """Main CGI handler."""
    # Print JSON header
    print_header()
    
    # Convert form fields to regular dictionary
    form = dict((field, cgi.FieldStorage().getvalue(field)) for field in cgi.FieldStorage().keys())
    #form = {"command":"hitex", "query_type":"lemma", "query_w":"hund", "use_defaults":"1"} # for testing
    parameters = {}
    criteria = {}
    default_parameters = { 
        #"query_type": "lemma", # wordform or cqp or lemma
        #"query_w" : u'bröd',   # u"huset", [deprel = "SS" & lemma contains "språk"].decode("utf-8")
        #"query_pos" : "", # "NN", 
        #"corpus_list":["ROM99","GP2012","LASBART"], #randomly pick one of them? "BLOGGMIX2012"
        "corpus_list":["rom99","flashback-resor","gp2013","gp2d","attasidor","lasbart","suc3","talbanken"], #"wikipedia-sv" "sweachum","sweacsam"
        #"corpus_list":["attasidor","lasbart","talbanken","rom99",
        #               "familjeliv-allmanna-fritid","flashback-mat",
        #               "flashback-resor"],
        "max_kwics": 100, #300, # nr KWICs to process (limited for efficiency reasons)
        "maxhit": 10,          # maximum number of matches to return
        "target_edge" : "end", # to which edge the keyword should be close to
        "proportion" : 50,     # within which percentage of the sent the keyword should appear
        "target_cefr" : "B1",  # 'any' not supported - omit readability to obtain the same effect
        "voc_thr" : 0,         # percentage of words above the target CEFR level
        "min_len" : 6,
        "max_len" : 20, 
        "non_alpha_thr": 30,
        "non_lemmatized_thr": 30,
        "lex_to_func_thr": "", #0.8,
        "sensitive_voc_cats": ["all"], # ["sex", "violence", "other", "religion", "secretion"],
        "preserve_bad":True}
 
    default_criteria = {
        "well_formedness":{"root":"filter", "sent_tokenization":"filter", 
                        "elliptic":"filter", "non_alpha":"filter", 
                        "non_lemmatized":"filter"}, 
        "isolability":{"struct_conn":"filter", "yn_answer":"filter", 
                     "anaphora-PN":"filter", "anaphora-AB":"filter"}, 
        "readability":"filter", 
        "typicality": "ranker",
        "sensitive_voc": "filter",
        "other_criteria":{"length":"filter",
                         "proper_name":"ranker",
                         "repkw":"filter",
                         "kw_position":"",
                         "modal_verb":"",
                         "participle":"", # same as korp
                         "sverb":"",       # same as korp
                         "interrogative":"filter",
                         "neg_form":"",
                         "abbrev":"filter",
                         "direct_speech":"filter",
                         "diff_voc_kelly":"filter",
                         "svalex_fr":"",
                         "out_of_svalex":"filter"}

                         }
    #default_criteria = {"readability":"ranker"}
    # TO DO: change to {"filters":["readability"], "rankers":[]}

    # Get input values	 
    if form.get("command", "") == "hitex":
        if "query_type" in form:
            parameters["query_type"] = form["query_type"]
        else:
            json.dumps({"Error":"Missing 'query_type', choose what TYPE of \
                        term to search for (lemma, wordform or cqp).", 
                        "code":400})
        if "query_w" in form:
            parameters["query_w"] = form["query_w"]
        else:
            json.dumps({"Error":"Missing 'query_w', choose a what term to \
                        search for.", "code":400})
        #if "query_pos" in form: #when not specified, any POS will be matched
        #    parameters["query_pos"] = form["query_pos"]
        if "target_cefr" in form:
            if form["target_cefr"] not in ["A1", "A2", "B1", "B2", "C1"]:
                json.dumps({"Error":"Argument 'target_cefr' has to be a CEFR \
                            level (A1, A2, B1, B2 or C1)", "code":400})
        if "readability" in form and not form.has_key("target_cefr"):
            json.dumps({"Error":"To use 'readability' as a criteria, specify \
                        'target_cefr' (A1, A2, B1, B2 or C1)", "code":400})
        if "random_seed" in form:
            try:
                parameters["random_seed"] = int(form["random_seed"])
            except ValueError:
                json.dumps({"Error":"The argument 'random_seed' has to be a integer value", "code":400})

        #default_parameters["target_cefr"] = form.get("target_cefr", "any")
        
        if form.get("use_defaults") in ["true", True, 1, "1"]:
            parameters.update(default_parameters)
            criteria.update(default_criteria)
        elif "use_defaults" in form and form["use_defaults"] != "true":
            json.dumps({"Error":"'use_defaults' should be set to 'true' for \
                        using default parameters and criteria for the search.", 
                        "code":400})
        else:
            mandatory_params = [] 
            for p in default_parameters:
                if p in form:
                    parameters[p] = form[p]
                elif p not in form and p in mandatory_params:
                    json.dumps({"Error":"The arguments 'corpus_list', 'maxhit' \
                        and 'max_kwics' are mandatory. Set 'use_defaults' to 'true' \
                        if you don't want to specify these.", 
                        "code":400})
            for cr in default_criteria:
                if type(default_criteria[cr]) == dict:
                    for sub_cr in default_criteria[cr]:
                        if sub_cr in form:
                            parameters[sub_cr] = form[sub_cr]
                elif cr in form:
                    parameters[cr] = form[cr]

        # Analyze sentences for match
        mset, json_ms, url = create_mset_json(parameters, criteria)
        result = json_ms

    else:
        result = json.dumps({"Error":"Unknown or missing command. Command \
                             option(s): hitex, complexity, log","code":404})
    print_object(json.loads(result), form)

def print_header():
    """Prints the JSON header."""
    print "Content-Type: application/json"
    print "Access-Control-Allow-Origin: *"
    print "Access-Control-Allow-Methods: POST, GET, OPTIONS"
    print "Access-Control-Allow-Headers: Content-Type"
    print


def print_object(obj, form):
    """Prints an object in JSON format.
    The CGI form can contain optional parameters 'callback' and 'indent'
    which change the output format.
    """
    callback = form.get("callback")
    if callback: print callback + "(",
    try:
        indent = int(form.get("indent"))
        print json.dumps(obj, sort_keys=True, indent=indent),
    except:
        print json.dumps(obj, separators=(",",":"))
    if callback: print ")",
    print

if __name__ == "__main__":
    main()