import ast import os import logging import tempfile from typing import List, Dict, Any, Tuple, Optional from clang import cindex import javalang import javalang.tree as T import esprima from bs4 import BeautifulSoup import tree_sitter_rust as ts_rust from tree_sitter import Language, Parser import re from .utils.path_utils import generate_entity_aliases LOGGER_NAME = "AST_ENTITY_EXTRACTOR" logger = logging.getLogger(LOGGER_NAME) class BaseASTEntityExtractor: def extract_entities(self, code: str, file_path: str = None) -> Tuple[List[Dict[str, Any]], List[str]]: """ Extract entities from source code. Args: code: Source code as string file_path: Optional path to the source file (for better context and include resolution) Returns: Tuple of (declared_entities, called_entities) """ raise NotImplementedError # Add a reset contract so extractors can be reused safely def reset(self) -> None: """ Reset internal state so the extractor instance can be reused. Concrete extractors should override this to clear their buffers. """ raise NotImplementedError class HTMLEntityExtractor(BaseASTEntityExtractor): """ Hybrid HTML AST-based entity extractor. Responsibilities: • Parse HTML into a tree • Extract declared DOM entities (ids, names, classes) • Extract JavaScript calls from inline event handlers • Extract JS entities from