# # # # # # # # # # # # # # # # # # # # # # # # # # # #
#  Framework provided by Daan de Graaf(UvA)           #
#  For Automaten en Formele Talen                     #
#  Under Guidance of Inge Bethke(UvA)                 #
#  Last edited 2018-04-03 by Bas van den Heuvel (UvA) #
# # # # # # # # # # # # # # # # # # # # # # # # # # # #

from Automaton import FA
import string
import pickle
import sys

# Use these for convenience, also used in the transitions
characters = set(string.ascii_letters)
numbers = set(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])


def create_lexer():
    '''Creates the actual automaton with the definitions below'''
    Q = ['START', 'ID', 'INT', 'ADD', 'SUB', 'MUL', 'DIV', 'LBT', 'RBT', 'EQU',
         'WHITE', 'EOF']
    Sigma = ['<digit>', '<character>', '/', '=', '(', ')', '+', '*', '-', '\n',
             ' ', '\t']
    delta = {'START': {'<character>': 'ID',
                       '<digit>': 'INT',
                       '+': 'ADD',
                       '-': 'SUB',
                       '*': 'MUL',
                       '/': 'DIV',
                       '(': 'LBT',
                       ')': 'RBT',
                       '=': 'EQU',
                       '\n': 'EOF',
                       ' ': 'WHITE',
                       '\t': 'WHITE'},
             'ID': {'<character>': 'ID',
                    '<digit>': 'ID'},
             'INT': {'<digit>': 'INT'}}
    s = 'START'
    F = ['ID', 'INT', 'ADD', 'SUB', 'MUL', 'DIV', 'LBT', 'RBT', 'EQU', 'WHITE',
         'EOF']
    M = FA(Q, Sigma, delta, s, F)
    return M


def lexer(M, source):
    ''' The lexer iterates through the source data, tokenizing and assigning
    states to it'''

    lst = []
    tkn = ""
    for char in source:
        if M.move(_type_(char)):
            tkn += str(char)
        else:
            lst.append((tkn, M.current_state.name))
            M.reset()
            M.move(_type_(char))
            tkn = char
    lst.append(('\n', 'EOF'))
    return lst


def _type_(source):
    ''' Returns the type of a source character'''
    if source in numbers:
        return '<digit>'
    elif source in characters:
        return '<character>'
    else:
        return source


def main(program):
    ''' Reads the source program and feeds it line by line to the lexer '''
    Lexer = create_lexer()
    Lexer.plot(filename='lexer')
    try:
        with open(program) as f:
            content = f.readlines()
    except:
        exit('FileError: file "' + str(program) + '" not found')

    lexemes = []
    for line in content:
        lexeme = lexer(Lexer, line)
        Lexer.reset()
        lexemes.append(lexeme)

    print('Succesfully tokenized!')
    print(lexemes)
    pickle.dump(lexemes, open(program + ".lex", "wb"))

if __name__ == '__main__':
    try:
        program = sys.argv[1]
    except:
        exit('RuntimeError: Use lexer.py [prog]')

    main(program)
