EXPRESSION = 0 END_OF_INPUT = 1 SOLVE = "solve" FOR = "for" AND = "and" EQUALS = "=" COMMA = "," keyword_tokens = [SOLVE, FOR, AND, EQUALS, COMMA] class Token: def __init__(self, type: int|str, value: str = None): self.type = type self.value = value def __repr__(self): if self.value == None: return f"{self.type}" return f"{self.type}|'{self.value}'" def tokenize(expression: str) -> list[Token]: """ this splits a math instruction into tokens. example: "solve x + 1 = 5 and y = 2*x for x, y" result: ["solve", "x + 1", "=", "5", "and", "y", "=", "2*x", "for", "x", "and", "y", "end_of_input"] """ tokens = [] # output list of tokens symbols = expression.replace(",", " , ").replace("=", " = ").split(" ") current_token = [] # everything that is not directly in math_keyword_tokens gets binned here for s in symbols: found = False for keyword in keyword_tokens: if s.lower() == keyword: if len(current_token) != 0: tokens.append(Token(EXPRESSION, " ".join(current_token))) current_token = [] tokens.append(Token(keyword)) found = True break if found == False: current_token.append(s) if len(current_token) != 0: tokens.append(Token(EXPRESSION, " ".join(current_token))) current_token = [] tokens.append(Token(END_OF_INPUT)) return tokens