# scikit-learn from sklearn.feature_extraction import DictVectorizer from sklearn.linear_model import Perceptron from sklearn.naive_bayes import MultinomialNB from sklearn.svm import LinearSVC def train_sk(ldocs, clname='perc'): docs = [bow_to_avmap(doc) for _, doc in ldocs] lbls = [l for l, _ in ldocs] vec = DictVectorizer() encoded_docs = vec.fit_transform(docs) if clname == 'perc': # perceptron classifier = Perceptron(n_iter=20) elif clname == 'nb': # Naive Bayes classifier = MultinomialNB() elif clname == 'svm': # linear support vector machine classifier = LinearSVC() classifier.fit(encoded_docs, lbls) return vec, classifier def bow_to_avmap(doc): return dict((w, True) for w in doc) def classify_sk(v_c, doc): vec, classifier = v_c encoded_doc = vec.transform(bow_to_avmap(doc)) return classifier.predict(encoded_doc)