from nltk.corpus import cmudict
import sys
import re

filename = sys.argv[1]

d = cmudict.dict()
syllables = {}
total_syllables = 0

def nsyl(word):
    return [len(list(y for y in x if y[-1].isdigit())) for x in d[word.lower()]]

if (filename == '-'):
    data = sys.__stdin__.read()

else:
    data = open(filename).read()

data = re.sub(r"[\(\)]", "", data)
data = re.split(r"[\s:.,\-\?\"]+", data)[:-1]

for word in data:
    try:
        syllables[nsyl(word)[0]].append(word)
    except:
        try:
            syllables[nsyl(word)[0]] = [word]
        except:
            print("Error with %s" % word)

for key, values in syllables.items():
    if (len(values) < 10):
        print("%d: %d (%f%%) (%r)" % (key, len(values), 100*len(values)/len(data), values))
    else:
        print("%d: %d (%f%%)" % (key, len(values), 100*len(values)/len(data)))
    total_syllables += key * len(values)

print(total_syllables / len(data))
