mini-compiler / src /lexer.py
tareque101's picture
Upload 11 files
753d525 verified
Raw
History Blame Contribute Delete
1.99 kB
import re
TOKEN_SPEC = [
# Updated: Supports integers (10) and floats (10.5)
('NUMBER', r'\d+(\.\d+)?'),
('ID', r'[a-zA-Z_][a-zA-Z0-9_]*'),
('ASSIGN', r'='),
('OP', r'[+\-*/]'),
('SEMI', r';'),
('LBRACE', r'\{'),
('RBRACE', r'\}'),
('LPAREN', r'\('),
('RPAREN', r'\)'),
('COMMENT', r'#.*'),
('SKIP', r'[ \t\n\r]+'),
('MISMATCH', r'.'),
]
class Lexer:
# --- UPDATED KEYWORD LIST ---
# Added common types and control structures
KEYWORDS = {
'int', 'float', 'char', 'bool',
'if', 'else', 'while', 'for',
'print', 'true', 'false', 'return'
}
def __init__(self, code):
self.tokens = []
self.errors = []
self.tokenize(code)
def tokenize(self, code):
tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in TOKEN_SPEC)
for mo in re.finditer(tok_regex, code):
kind = mo.lastgroup
value = mo.group()
if kind == 'NUMBER':
# Convert to float if '.' exists, otherwise int
num_value = float(value) if '.' in value else int(value)
self.tokens.append(('NUMBER', num_value))
elif kind == 'ID':
# Check against the expanded KEYWORD set
if value in self.KEYWORDS:
self.tokens.append(('KEYWORD', value))
else:
self.tokens.append(('ID', value))
elif kind == 'COMMENT' or kind == 'SKIP':
continue
elif kind == 'MISMATCH':
self.errors.append(f"Lexical Error: Unexpected character '{value}'")
else:
self.tokens.append((kind, value))
self.tokens.append(('EOF', None))
return self.tokens