llama/math_lexer.py



								EXPRESSION = 0

								END_OF_INPUT = 1


								SOLVE = "solve"

								FOR = "for"

								AND = "and"

								EQUALS = "="

								COMMA = ","


								keyword_tokens = [SOLVE, FOR, AND, EQUALS, COMMA]


								class Token:

								    def __init__(self, type: int|str, value: str = None):

								        self.type = type

								        self.value = value


								    def __repr__(self):

								        if self.value == None:

								            return f"{self.type}"

								        return f"{self.type}|'{self.value}'"


								def tokenize(expression: str) -> list[Token]:

								    """

								    this splits a math instruction into tokens.

								    example:

								        "solve x + 1 = 5 and y = 2*x for x, y"

								    result:

								        ["solve", "x + 1", "=", "5", "and", "y", "=", "2*x", "for", "x", "and", "y", "end_of_input"]

								    """


								    tokens = []   # output list of tokens


								    symbols = expression.replace(",", " , ").replace("=", " = ").split(" ")


								    current_token = []  # everything that is not directly in math_keyword_tokens gets binned here

								    for s in symbols:

								        found = False


								        for keyword in keyword_tokens:

								            if s.lower() == keyword:

								                if len(current_token) != 0:

								                    tokens.append(Token(EXPRESSION, " ".join(current_token)))

								                    current_token = []

								                tokens.append(Token(keyword))

								                found = True

								                break


								        if found == False:

								            current_token.append(s)

								    if len(current_token) != 0:

								        tokens.append(Token(EXPRESSION, " ".join(current_token)))

								        current_token = []


								    tokens.append(Token(END_OF_INPUT))

								    return tokens