import re

TOKEN_SPEC = [
    # Updated: Supports integers (10) and floats (10.5)
    ('NUMBER',   r'\d+(\.\d+)?'), 
    ('ID',       r'[a-zA-Z_][a-zA-Z0-9_]*'),
    ('ASSIGN',   r'='),
    ('OP',       r'[+\-*/]'),
    ('SEMI',     r';'),
    ('LBRACE',   r'\{'),
    ('RBRACE',   r'\}'),
    ('LPAREN',   r'\('),
    ('RPAREN',   r'\)'),
    ('COMMENT',  r'#.*'),
    ('SKIP',     r'[ \t\n\r]+'),
    ('MISMATCH', r'.'),
]

class Lexer:
    # --- UPDATED KEYWORD LIST ---
    # Added common types and control structures
    KEYWORDS = {
        'int', 'float', 'char', 'bool', 
        'if', 'else', 'while', 'for', 
        'print', 'true', 'false', 'return'
    }

    def __init__(self, code):
        self.tokens = []
        self.errors = []
        self.tokenize(code)

    def tokenize(self, code):
        tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in TOKEN_SPEC)
        
        for mo in re.finditer(tok_regex, code):
            kind = mo.lastgroup
            value = mo.group()
            
            if kind == 'NUMBER':
                # Convert to float if '.' exists, otherwise int
                num_value = float(value) if '.' in value else int(value)
                self.tokens.append(('NUMBER', num_value))
                
            elif kind == 'ID':
                # Check against the expanded KEYWORD set
                if value in self.KEYWORDS:
                    self.tokens.append(('KEYWORD', value))
                else:
                    self.tokens.append(('ID', value))
                    
            elif kind == 'COMMENT' or kind == 'SKIP':
                continue
                
            elif kind == 'MISMATCH':
                self.errors.append(f"Lexical Error: Unexpected character '{value}'")
                
            else:
                self.tokens.append((kind, value))
        
        self.tokens.append(('EOF', None))
        return self.tokens