-
Notifications
You must be signed in to change notification settings - Fork 1
/
lexer.py
114 lines (96 loc) · 3.93 KB
/
lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from enum import Enum, unique, auto
from typing import List
class TokenType(Enum):
"""Enum of possible types for the token class
"""
INTEGER = auto()
NAME = auto()
EQUAL = auto()
PARANTHESE_OPEN = auto()
PARANTHESE_CLOSE = auto()
BRACE_OPEN = auto()
BRACE_CLOSE = auto()
FUNCTION_ARROW = auto()
COMMA = auto()
MINUS = auto()
class Token:
"""Language token to distinguish language elements
This class represents a single token in this programming language, like integers or certain symbols.
"""
token_type: TokenType
token_string: str
def __init__(self, token_type, token_string):
self.token_type = token_type
self.token_string = token_string
def appendToToken(self, char):
"""Appends a character to this Token
Adds a new character to the existing Token
"""
self.token_string += char
def __str__(self):
return f'Token({self.token_type}, "{self.token_string}")'
__repr__ = __str__
class InvalidSyntaxException(Exception):
"""Exception raised for errors in the syntax of the provided code.
Attributes:
expression -- input expression in which the error occurred
message -- explanation of the error
"""
def __init__(self, expression, message):
self.expression = expression
self.message = message
def tokenize(input: str) -> List[Token]:
"""Function to tokenize a given string with instructions
Takes a string with valid instructions, and outputs a sequence of tokens of type "Token".
Can raise an InvalidSyntaxException
"""
result_tokens = []
last_token = None
for char in input:
if char.isdecimal():
if last_token == None:
last_token = Token(TokenType.INTEGER, char)
elif last_token.token_type in [TokenType.NAME, TokenType.INTEGER]:
last_token.appendToToken(char)
elif last_token.token_type == TokenType.MINUS:
last_token.token_type = TokenType.INTEGER
last_token.appendToToken(char)
else:
raise InvalidSyntaxException(
char, f"Invalid character {char} after {last_token.token_string}. Excpected to be of type {last_token.token_type}.")
elif char.isalpha():
if last_token == None:
last_token = Token(TokenType.NAME, char)
elif last_token.token_type == TokenType.NAME:
last_token.appendToToken(char)
else:
raise InvalidSyntaxException(
char, f"Invalid character {char} after {last_token.token_string}. Excpected to be of type {last_token.token_type}.")
elif char == ">":
if last_token.token_type == TokenType.MINUS:
last_token = None
result_tokens.append(Token(TokenType.FUNCTION_ARROW, "->"))
else:
if (last_token != None):
result_tokens.append(last_token)
last_token = None
if char == "(":
result_tokens.append(Token(TokenType.PARANTHESE_OPEN, char))
elif char == "-":
last_token = Token(TokenType.MINUS, char)
elif char == ")":
result_tokens.append(Token(TokenType.PARANTHESE_CLOSE, char))
elif char == "{":
result_tokens.append(Token(TokenType.BRACE_OPEN, char))
elif char == "}":
result_tokens.append(Token(TokenType.BRACE_CLOSE, char))
elif char == "=":
result_tokens.append(Token(TokenType.EQUAL, char))
elif char == ",":
result_tokens.append(Token(TokenType.COMMA, char))
elif not char.isspace():
assert False
# Make sure the last token is also added, if there is no space / newline at the end.
if last_token != None:
result_tokens.append(last_token)
return result_tokens