File size: 9,431 Bytes
019e7db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
"""AST-based parsing helpers for Python code review."""

from __future__ import annotations

import ast
from dataclasses import dataclass, field
from typing import Any


@dataclass(slots=True)
class _StructureVisitor(ast.NodeVisitor):
    """Collect lightweight structural signals from Python source."""

    imports: set[str] = field(default_factory=set)
    route_decorators: set[str] = field(default_factory=set)
    function_names: list[str] = field(default_factory=list)
    class_names: list[str] = field(default_factory=list)
    code_smells: list[str] = field(default_factory=list)
    branch_count: int = 0
    max_loop_depth: int = 0
    max_nesting_depth: int = 0
    current_loop_depth: int = 0
    current_nesting_depth: int = 0
    recursive_functions: set[str] = field(default_factory=set)
    current_function: str | None = None
    docstring_total: int = 0
    docstring_with_docs: int = 0
    backward_calls: int = 0
    optimizer_step_calls: int = 0
    container_builds: int = 0

    def visit_Import(self, node: ast.Import) -> None:  # noqa: N802
        for alias in node.names:
            self.imports.add(alias.name.split(".")[0])
        self.generic_visit(node)

    def visit_ImportFrom(self, node: ast.ImportFrom) -> None:  # noqa: N802
        if node.module:
            self.imports.add(node.module.split(".")[0])
        self.generic_visit(node)

    def _push_nesting(self) -> None:
        self.current_nesting_depth += 1
        self.max_nesting_depth = max(self.max_nesting_depth, self.current_nesting_depth)

    def _pop_nesting(self) -> None:
        self.current_nesting_depth = max(0, self.current_nesting_depth - 1)

    def _visit_loop(self, node: ast.AST) -> None:
        self.branch_count += 1
        self.current_loop_depth += 1
        self.max_loop_depth = max(self.max_loop_depth, self.current_loop_depth)
        self._push_nesting()
        self.generic_visit(node)
        self._pop_nesting()
        self.current_loop_depth = max(0, self.current_loop_depth - 1)

    def visit_For(self, node: ast.For) -> None:  # noqa: N802
        self._visit_loop(node)

    def visit_AsyncFor(self, node: ast.AsyncFor) -> None:  # noqa: N802
        self._visit_loop(node)

    def visit_While(self, node: ast.While) -> None:  # noqa: N802
        self._visit_loop(node)

    def visit_If(self, node: ast.If) -> None:  # noqa: N802
        self.branch_count += 1
        self._push_nesting()
        self.generic_visit(node)
        self._pop_nesting()

    def visit_Try(self, node: ast.Try) -> None:  # noqa: N802
        self.branch_count += 1
        self._push_nesting()
        self.generic_visit(node)
        self._pop_nesting()

    def visit_With(self, node: ast.With) -> None:  # noqa: N802
        self._push_nesting()
        self.generic_visit(node)
        self._pop_nesting()

    def visit_AsyncWith(self, node: ast.AsyncWith) -> None:  # noqa: N802
        self._push_nesting()
        self.generic_visit(node)
        self._pop_nesting()

    def visit_comprehension(self, node: ast.comprehension) -> None:  # noqa: N802
        self._visit_loop(node)

    def visit_FunctionDef(self, node: ast.FunctionDef) -> None:  # noqa: N802
        self.function_names.append(node.name)
        self.docstring_total += 1
        if ast.get_docstring(node):
            self.docstring_with_docs += 1
        prior = self.current_function
        self.current_function = node.name
        for decorator in node.decorator_list:
            decorator_name = self._decorator_name(decorator)
            if decorator_name in {"get", "post", "put", "patch", "delete"}:
                self.route_decorators.add(decorator_name)
        self.generic_visit(node)
        self.current_function = prior

    def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None:  # noqa: N802
        self.visit_FunctionDef(node)

    def visit_ClassDef(self, node: ast.ClassDef) -> None:  # noqa: N802
        self.class_names.append(node.name)
        self.generic_visit(node)

    def visit_Call(self, node: ast.Call) -> None:  # noqa: N802
        dotted_name = self._call_name(node.func)
        if dotted_name.endswith(".backward") or dotted_name == "backward":
            self.backward_calls += 1
        if dotted_name.endswith(".step") or dotted_name == "step":
            if "optimizer" in dotted_name:
                self.optimizer_step_calls += 1
        if dotted_name in {"list", "dict", "set", "tuple"}:
            self.container_builds += 1
        if self.current_function and dotted_name == self.current_function:
            self.recursive_functions.add(self.current_function)
        self.generic_visit(node)

    @staticmethod
    def _call_name(node: ast.AST) -> str:
        if isinstance(node, ast.Name):
            return node.id
        if isinstance(node, ast.Attribute):
            left = _StructureVisitor._call_name(node.value)
            return f"{left}.{node.attr}" if left else node.attr
        return ""

    @staticmethod
    def _decorator_name(node: ast.AST) -> str:
        if isinstance(node, ast.Call):
            return _StructureVisitor._decorator_name(node.func)
        if isinstance(node, ast.Attribute):
            return node.attr.lower()
        if isinstance(node, ast.Name):
            return node.id.lower()
        return ""


def _line_smells(lines: list[str]) -> tuple[int, list[int], bool]:
    long_lines = sum(1 for line in lines if len(line) > 88)
    trailing_whitespace_lines = [index + 1 for index, line in enumerate(lines) if line.rstrip() != line]
    tabs_used = any("\t" in line for line in lines)
    return long_lines, trailing_whitespace_lines, tabs_used


def parse_code_structure(code: str) -> dict[str, Any]:
    """Extract deterministic syntax, import, and structure signals from Python code."""

    normalized_code = code or ""
    lines = normalized_code.splitlines()
    long_lines, trailing_whitespace_lines, tabs_used = _line_smells(lines)

    result: dict[str, Any] = {
        "syntax_valid": True,
        "syntax_error": "",
        "line_count": len(lines),
        "imports": [],
        "function_names": [],
        "class_names": [],
        "long_lines": long_lines,
        "trailing_whitespace_lines": trailing_whitespace_lines,
        "tabs_used": tabs_used,
        "docstring_ratio": 0.0,
        "uses_recursion": False,
        "max_loop_depth": 0,
        "max_nesting_depth": 0,
        "route_decorators": [],
        "code_smells": [],
        "uses_pandas": False,
        "uses_numpy": False,
        "uses_torch": False,
        "uses_sklearn": False,
        "uses_fastapi": False,
        "uses_flask": False,
        "uses_pydantic": False,
        "calls_backward": False,
        "calls_optimizer_step": False,
        "branch_count": 0,
        "container_builds": 0,
    }

    try:
        tree = ast.parse(normalized_code or "\n")
    except SyntaxError as exc:
        result["syntax_valid"] = False
        result["syntax_error"] = f"{exc.msg} (line {exc.lineno}, column {exc.offset})"
        result["code_smells"] = ["Code does not parse.", "Fix syntax before deeper review."]
        return result

    visitor = _StructureVisitor()
    visitor.visit(tree)

    imports = sorted(visitor.imports)
    uses_pandas = "pandas" in imports or "pd" in normalized_code
    uses_numpy = "numpy" in imports or "np." in normalized_code
    uses_torch = "torch" in imports or "torch." in normalized_code
    uses_sklearn = "sklearn" in imports
    uses_fastapi = "fastapi" in imports
    uses_flask = "flask" in imports
    uses_pydantic = "pydantic" in imports or "BaseModel" in normalized_code

    code_smells = list(visitor.code_smells)
    if visitor.max_loop_depth >= 2:
        code_smells.append("Nested loops may create avoidable performance pressure.")
    if long_lines:
        code_smells.append("Long lines reduce readability and reviewability.")
    if trailing_whitespace_lines:
        code_smells.append("Trailing whitespace suggests style drift.")
    if visitor.docstring_total and visitor.docstring_with_docs == 0:
        code_smells.append("Public functions are missing docstrings.")
    if not visitor.function_names:
        code_smells.append("Encapsulate behavior in functions for testability.")

    result.update(
        {
            "imports": imports,
            "function_names": visitor.function_names,
            "class_names": visitor.class_names,
            "docstring_ratio": round(
                visitor.docstring_with_docs / max(visitor.docstring_total, 1),
                4,
            ),
            "uses_recursion": bool(visitor.recursive_functions),
            "max_loop_depth": visitor.max_loop_depth,
            "max_nesting_depth": visitor.max_nesting_depth,
            "route_decorators": sorted(visitor.route_decorators),
            "code_smells": code_smells,
            "uses_pandas": uses_pandas,
            "uses_numpy": uses_numpy,
            "uses_torch": uses_torch,
            "uses_sklearn": uses_sklearn,
            "uses_fastapi": uses_fastapi,
            "uses_flask": uses_flask,
            "uses_pydantic": uses_pydantic,
            "calls_backward": visitor.backward_calls > 0,
            "calls_optimizer_step": visitor.optimizer_step_calls > 0,
            "branch_count": visitor.branch_count,
            "container_builds": visitor.container_builds,
        }
    )
    return result