Spaces:
Sleeping
Sleeping
| """AST-based parsing helpers for Python code review.""" | |
| from __future__ import annotations | |
| import ast | |
| from dataclasses import dataclass, field | |
| from typing import Any | |
| class _StructureVisitor(ast.NodeVisitor): | |
| """Collect lightweight structural signals from Python source.""" | |
| imports: set[str] = field(default_factory=set) | |
| route_decorators: set[str] = field(default_factory=set) | |
| function_names: list[str] = field(default_factory=list) | |
| class_names: list[str] = field(default_factory=list) | |
| code_smells: list[str] = field(default_factory=list) | |
| branch_count: int = 0 | |
| max_loop_depth: int = 0 | |
| max_nesting_depth: int = 0 | |
| current_loop_depth: int = 0 | |
| current_nesting_depth: int = 0 | |
| recursive_functions: set[str] = field(default_factory=set) | |
| current_function: str | None = None | |
| docstring_total: int = 0 | |
| docstring_with_docs: int = 0 | |
| backward_calls: int = 0 | |
| optimizer_step_calls: int = 0 | |
| container_builds: int = 0 | |
| def visit_Import(self, node: ast.Import) -> None: # noqa: N802 | |
| for alias in node.names: | |
| self.imports.add(alias.name.split(".")[0]) | |
| self.generic_visit(node) | |
| def visit_ImportFrom(self, node: ast.ImportFrom) -> None: # noqa: N802 | |
| if node.module: | |
| self.imports.add(node.module.split(".")[0]) | |
| self.generic_visit(node) | |
| def _push_nesting(self) -> None: | |
| self.current_nesting_depth += 1 | |
| self.max_nesting_depth = max(self.max_nesting_depth, self.current_nesting_depth) | |
| def _pop_nesting(self) -> None: | |
| self.current_nesting_depth = max(0, self.current_nesting_depth - 1) | |
| def _visit_loop(self, node: ast.AST) -> None: | |
| self.branch_count += 1 | |
| self.current_loop_depth += 1 | |
| self.max_loop_depth = max(self.max_loop_depth, self.current_loop_depth) | |
| self._push_nesting() | |
| self.generic_visit(node) | |
| self._pop_nesting() | |
| self.current_loop_depth = max(0, self.current_loop_depth - 1) | |
| def visit_For(self, node: ast.For) -> None: # noqa: N802 | |
| self._visit_loop(node) | |
| def visit_AsyncFor(self, node: ast.AsyncFor) -> None: # noqa: N802 | |
| self._visit_loop(node) | |
| def visit_While(self, node: ast.While) -> None: # noqa: N802 | |
| self._visit_loop(node) | |
| def visit_If(self, node: ast.If) -> None: # noqa: N802 | |
| self.branch_count += 1 | |
| self._push_nesting() | |
| self.generic_visit(node) | |
| self._pop_nesting() | |
| def visit_Try(self, node: ast.Try) -> None: # noqa: N802 | |
| self.branch_count += 1 | |
| self._push_nesting() | |
| self.generic_visit(node) | |
| self._pop_nesting() | |
| def visit_With(self, node: ast.With) -> None: # noqa: N802 | |
| self._push_nesting() | |
| self.generic_visit(node) | |
| self._pop_nesting() | |
| def visit_AsyncWith(self, node: ast.AsyncWith) -> None: # noqa: N802 | |
| self._push_nesting() | |
| self.generic_visit(node) | |
| self._pop_nesting() | |
| def visit_comprehension(self, node: ast.comprehension) -> None: # noqa: N802 | |
| self._visit_loop(node) | |
| def visit_FunctionDef(self, node: ast.FunctionDef) -> None: # noqa: N802 | |
| self.function_names.append(node.name) | |
| self.docstring_total += 1 | |
| if ast.get_docstring(node): | |
| self.docstring_with_docs += 1 | |
| prior = self.current_function | |
| self.current_function = node.name | |
| for decorator in node.decorator_list: | |
| decorator_name = self._decorator_name(decorator) | |
| if decorator_name in {"get", "post", "put", "patch", "delete"}: | |
| self.route_decorators.add(decorator_name) | |
| self.generic_visit(node) | |
| self.current_function = prior | |
| def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: # noqa: N802 | |
| self.visit_FunctionDef(node) | |
| def visit_ClassDef(self, node: ast.ClassDef) -> None: # noqa: N802 | |
| self.class_names.append(node.name) | |
| self.generic_visit(node) | |
| def visit_Call(self, node: ast.Call) -> None: # noqa: N802 | |
| dotted_name = self._call_name(node.func) | |
| if dotted_name.endswith(".backward") or dotted_name == "backward": | |
| self.backward_calls += 1 | |
| if dotted_name.endswith(".step") or dotted_name == "step": | |
| if "optimizer" in dotted_name: | |
| self.optimizer_step_calls += 1 | |
| if dotted_name in {"list", "dict", "set", "tuple"}: | |
| self.container_builds += 1 | |
| if self.current_function and dotted_name == self.current_function: | |
| self.recursive_functions.add(self.current_function) | |
| self.generic_visit(node) | |
| def _call_name(node: ast.AST) -> str: | |
| if isinstance(node, ast.Name): | |
| return node.id | |
| if isinstance(node, ast.Attribute): | |
| left = _StructureVisitor._call_name(node.value) | |
| return f"{left}.{node.attr}" if left else node.attr | |
| return "" | |
| def _decorator_name(node: ast.AST) -> str: | |
| if isinstance(node, ast.Call): | |
| return _StructureVisitor._decorator_name(node.func) | |
| if isinstance(node, ast.Attribute): | |
| return node.attr.lower() | |
| if isinstance(node, ast.Name): | |
| return node.id.lower() | |
| return "" | |
| def _line_smells(lines: list[str]) -> tuple[int, list[int], bool]: | |
| long_lines = sum(1 for line in lines if len(line) > 88) | |
| trailing_whitespace_lines = [index + 1 for index, line in enumerate(lines) if line.rstrip() != line] | |
| tabs_used = any("\t" in line for line in lines) | |
| return long_lines, trailing_whitespace_lines, tabs_used | |
| def parse_code_structure(code: str) -> dict[str, Any]: | |
| """Extract deterministic syntax, import, and structure signals from Python code.""" | |
| normalized_code = code or "" | |
| lines = normalized_code.splitlines() | |
| long_lines, trailing_whitespace_lines, tabs_used = _line_smells(lines) | |
| result: dict[str, Any] = { | |
| "syntax_valid": True, | |
| "syntax_error": "", | |
| "line_count": len(lines), | |
| "imports": [], | |
| "function_names": [], | |
| "class_names": [], | |
| "long_lines": long_lines, | |
| "trailing_whitespace_lines": trailing_whitespace_lines, | |
| "tabs_used": tabs_used, | |
| "docstring_ratio": 0.0, | |
| "uses_recursion": False, | |
| "max_loop_depth": 0, | |
| "max_nesting_depth": 0, | |
| "route_decorators": [], | |
| "code_smells": [], | |
| "uses_pandas": False, | |
| "uses_numpy": False, | |
| "uses_torch": False, | |
| "uses_sklearn": False, | |
| "uses_fastapi": False, | |
| "uses_flask": False, | |
| "uses_pydantic": False, | |
| "calls_backward": False, | |
| "calls_optimizer_step": False, | |
| "branch_count": 0, | |
| "container_builds": 0, | |
| } | |
| try: | |
| tree = ast.parse(normalized_code or "\n") | |
| except SyntaxError as exc: | |
| result["syntax_valid"] = False | |
| result["syntax_error"] = f"{exc.msg} (line {exc.lineno}, column {exc.offset})" | |
| result["code_smells"] = ["Code does not parse.", "Fix syntax before deeper review."] | |
| return result | |
| visitor = _StructureVisitor() | |
| visitor.visit(tree) | |
| imports = sorted(visitor.imports) | |
| uses_pandas = "pandas" in imports or "pd" in normalized_code | |
| uses_numpy = "numpy" in imports or "np." in normalized_code | |
| uses_torch = "torch" in imports or "torch." in normalized_code | |
| uses_sklearn = "sklearn" in imports | |
| uses_fastapi = "fastapi" in imports | |
| uses_flask = "flask" in imports | |
| uses_pydantic = "pydantic" in imports or "BaseModel" in normalized_code | |
| code_smells = list(visitor.code_smells) | |
| if visitor.max_loop_depth >= 2: | |
| code_smells.append("Nested loops may create avoidable performance pressure.") | |
| if long_lines: | |
| code_smells.append("Long lines reduce readability and reviewability.") | |
| if trailing_whitespace_lines: | |
| code_smells.append("Trailing whitespace suggests style drift.") | |
| if visitor.docstring_total and visitor.docstring_with_docs == 0: | |
| code_smells.append("Public functions are missing docstrings.") | |
| if not visitor.function_names: | |
| code_smells.append("Encapsulate behavior in functions for testability.") | |
| result.update( | |
| { | |
| "imports": imports, | |
| "function_names": visitor.function_names, | |
| "class_names": visitor.class_names, | |
| "docstring_ratio": round( | |
| visitor.docstring_with_docs / max(visitor.docstring_total, 1), | |
| 4, | |
| ), | |
| "uses_recursion": bool(visitor.recursive_functions), | |
| "max_loop_depth": visitor.max_loop_depth, | |
| "max_nesting_depth": visitor.max_nesting_depth, | |
| "route_decorators": sorted(visitor.route_decorators), | |
| "code_smells": code_smells, | |
| "uses_pandas": uses_pandas, | |
| "uses_numpy": uses_numpy, | |
| "uses_torch": uses_torch, | |
| "uses_sklearn": uses_sklearn, | |
| "uses_fastapi": uses_fastapi, | |
| "uses_flask": uses_flask, | |
| "uses_pydantic": uses_pydantic, | |
| "calls_backward": visitor.backward_calls > 0, | |
| "calls_optimizer_step": visitor.optimizer_step_calls > 0, | |
| "branch_count": visitor.branch_count, | |
| "container_builds": visitor.container_builds, | |
| } | |
| ) | |
| return result | |