# Natural Language Toolkit: Evaluation # # Copyright (C) 2001-2017 NLTK Project # Author: Edward Loper # Steven Bird # URL: # For license information, see LICENSE.TXT from __future__ import print_function, division from math import fabs import operator from random import shuffle from functools import reduce from six.moves import range, zip try: from scipy.stats.stats import betai except ImportError: betai = None from nltk.util import LazyConcatenation, LazyMap def accuracy(reference, test): """ Given a list of reference values and a corresponding list of test values, return the fraction of corresponding values that are equal. In particular, return the fraction of indices ``0= actual_stat: c += 1 if verbose and i % 10 == 0: print('pseudo-statistic: %f' % pseudo_stat) print('significance: %f' % ((c + 1) / (i + 1))) print('-' * 60) significance = (c + 1) / (shuffles + 1) if verbose: print('significance: %f' % significance) if betai: for phi in [0.01, 0.05, 0.10, 0.15, 0.25, 0.50]: print("prob(phi<=%f): %f" % (phi, betai(c, shuffles, phi))) return (significance, c, shuffles) def demo(): print('-'*75) reference = 'DET NN VB DET JJ NN NN IN DET NN'.split() test = 'DET VB VB DET NN NN NN IN DET NN'.split() print('Reference =', reference) print('Test =', test) print('Accuracy:', accuracy(reference, test)) print('-'*75) reference_set = set(reference) test_set = set(test) print('Reference =', reference_set) print('Test = ', test_set) print('Precision:', precision(reference_set, test_set)) print(' Recall:', recall(reference_set, test_set)) print('F-Measure:', f_measure(reference_set, test_set)) print('-'*75) if __name__ == '__main__': demo()