You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

61 lines
1.5 KiB

EXPRESSION = 0
END_OF_INPUT = 1
SOLVE = "solve"
FOR = "for"
AND = "and"
EQUALS = "="
COMMA = ","
keyword_tokens = [SOLVE, FOR, AND, EQUALS, COMMA]
class Token:
def __init__(self, type: int|str, value: str = None):
self.type = type
self.value = value
def __repr__(self):
if self.value == None:
return f"{self.type}"
return f"{self.type}|'{self.value}'"
def tokenize(expression: str) -> list[Token]:
"""
this splits a math instruction into tokens.
example:
"solve x + 1 = 5 and y = 2*x for x, y"
result:
["solve", "x + 1", "=", "5", "and", "y", "=", "2*x", "for", "x", "and", "y", "end_of_input"]
"""
tokens = [] # output list of tokens
symbols = expression.replace(",", " , ").replace("=", " = ").split(" ")
current_token = [] # everything that is not directly in math_keyword_tokens gets binned here
for s in symbols:
found = False
for keyword in keyword_tokens:
if s.lower() == keyword:
if len(current_token) != 0:
tokens.append(Token(EXPRESSION, " ".join(current_token)))
current_token = []
tokens.append(Token(keyword))
found = True
break
if found == False:
current_token.append(s)
if len(current_token) != 0:
tokens.append(Token(EXPRESSION, " ".join(current_token)))
current_token = []
tokens.append(Token(END_OF_INPUT))
return tokens