EXPRESSION = 0
END_OF_INPUT = 1

SOLVE = "solve"
FOR = "for"
AND = "and"
EQUALS = "="
COMMA = ","

keyword_tokens = [SOLVE, FOR, AND, EQUALS, COMMA]


class Token:
    def __init__(self, type: int|str, value: str = None):
        self.type = type
        self.value = value

    def __repr__(self):
        if self.value == None:
            return f"{self.type}"
        return f"{self.type}|'{self.value}'"


def tokenize(expression: str) -> list[Token]:
    """
    this splits a math instruction into tokens.
    example:
        "solve x + 1 = 5 and y = 2*x for x, y"
    result:
        ["solve", "x + 1", "=", "5", "and", "y", "=", "2*x", "for", "x", "and", "y", "end_of_input"]
    """

    tokens = []   # output list of tokens

    symbols = expression.replace(",", " , ").replace("=", " = ").split(" ")

    current_token = []  # everything that is not directly in math_keyword_tokens gets binned here
    for s in symbols:
        found = False

        for keyword in keyword_tokens:
            if s.lower() == keyword:
                if len(current_token) != 0:
                    tokens.append(Token(EXPRESSION, " ".join(current_token)))
                    current_token = []
                tokens.append(Token(keyword))
                found = True
                break

        if found == False:
            current_token.append(s)
    if len(current_token) != 0:
        tokens.append(Token(EXPRESSION, " ".join(current_token)))
        current_token = []

    tokens.append(Token(END_OF_INPUT))
    return tokens