''' Created on Dec 5, 2016 @author: David ''' import codecs file = "f:/parll data/data/syllable_len_dist_nz.txt" f = codecs.open(file, "r", "utf-8") for line in f: rl = line.rstrip()[1:-1] tfreq = 0 tsum = 0 for upair in rl.split(","): feat,freq = (int(x.strip()) for x in upair.split(":")) tfreq += freq tsum += feat * freq avg_len = tsum/tfreq print(avg_len)