Spaces:
Sleeping
Sleeping
| """ | |
| codegraph/graph.py β CodeGraph V2 | |
| The innovation that makes SecureCodeEnv unique. | |
| Structured in-memory database of everything the agent has written this episode. | |
| Persisted in Redis between steps via pickle. | |
| V2 changes: | |
| - tree-sitter replaces ast module β supports Python, JS, TS, TSX | |
| - 60% threshold for style detection (was 50%) β prevents false penalties | |
| - "mixed" state added β no penalty when codebase has no clear dominant style | |
| - compress_graph() added β semantic compression for inference context | |
| """ | |
| from dataclasses import dataclass, field | |
| from collections import Counter | |
| from typing import Dict, Any | |
| class CodeGraph: | |
| episode_seed: int = 0 | |
| components: Dict[str, Dict[str, Any]] = field(default_factory=dict) | |
| conventions: Dict[str, Any] = field(default_factory=dict) | |
| def update(self, filename: str, metadata: Dict[str, Any]) -> None: | |
| """Add or replace a file's metadata in the graph, then re-derive conventions.""" | |
| if metadata.get("status") == "syntax_error": | |
| return # Don't pollute graph with broken code | |
| name = _file_to_key(filename) | |
| metadata["file"] = filename | |
| self.components[name] = metadata | |
| self._infer_conventions() | |
| def _infer_conventions(self) -> None: | |
| """ | |
| Derive dominant codebase style from all components. | |
| 60% threshold: a bare majority (51%) wrongly penalises mixed codebases. | |
| When no clear style β 'mixed' β consistency grader awards full marks. | |
| """ | |
| all_fns = [ | |
| f["name"] | |
| for comp in self.components.values() | |
| for f in comp.get("functions", []) | |
| ] | |
| if all_fns: | |
| styles = [_naming_style(n) for n in all_fns] | |
| top, count = Counter(styles).most_common(1)[0] | |
| self.conventions["naming"] = top if count / len(styles) >= 0.60 else "mixed" | |
| else: | |
| self.conventions["naming"] = "unknown" | |
| uses_try = sum( | |
| 1 for c in self.components.values() | |
| if c.get("conventions", {}).get("uses_try_catch", False) | |
| ) | |
| total = len(self.components) | |
| self.conventions["error_handling"] = "try_catch" if uses_try / max(total, 1) >= 0.5 else "none" | |
| uses_hints = sum( | |
| 1 for c in self.components.values() | |
| if c.get("conventions", {}).get("uses_type_hints", False) | |
| ) | |
| self.conventions["uses_type_hints"] = uses_hints / max(total, 1) >= 0.5 | |
| def to_slim_dict(self, limit: int = 6000) -> str: | |
| """ | |
| compress_graph() β semantic compression for inference.py context. | |
| Keeps signatures + conventions, drops function bodies. | |
| V1 blindly truncated at 2000 chars β agents couldn't see patterns they needed. | |
| """ | |
| import json | |
| slim = { | |
| "conventions": self.conventions, | |
| "components": { | |
| name: { | |
| "file": comp.get("file", ""), | |
| "language": comp.get("language", "py"), | |
| "functions": [f["name"] for f in comp.get("functions", [])][:20], | |
| "imports": [i.split(".")[0] for i in comp.get("imports", [])][:15], | |
| "uses_try_catch": comp.get("conventions", {}).get("uses_try_catch", False), | |
| "uses_type_hints": comp.get("conventions", {}).get("uses_type_hints", False), | |
| } | |
| for name, comp in self.components.items() | |
| }, | |
| } | |
| result = json.dumps(slim, indent=2) | |
| if len(result) > limit: | |
| # Further trim: drop imports when still over limit | |
| for name in slim["components"]: | |
| slim["components"][name].pop("imports", None) | |
| result = json.dumps(slim, indent=2)[:limit] | |
| return result | |
| # ββ helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _file_to_key(filename: str) -> str: | |
| """Convert 'src/auth/UserAuth.py' β 'UserAuth'""" | |
| base = filename.split("/")[-1] | |
| for ext in (".py", ".js", ".ts", ".tsx", ".jsx"): | |
| base = base.replace(ext, "") | |
| return base | |
| def _naming_style(name: str) -> str: | |
| if "_" in name: | |
| return "snake_case" | |
| if name and name[0].isupper(): | |
| return "PascalCase" | |
| if any(c.isupper() for c in name[1:]): | |
| return "camelCase" | |
| return "snake_case" # all-lowercase defaults to snake | |