"""
codegraph/graph.py — CodeGraph V2

The innovation that makes SecureCodeEnv unique.
Structured in-memory database of everything the agent has written this episode.
Persisted in Redis between steps via pickle.

V2 changes:
  - tree-sitter replaces ast module → supports Python, JS, TS, TSX
  - 60% threshold for style detection (was 50%) → prevents false penalties
  - "mixed" state added → no penalty when codebase has no clear dominant style
  - compress_graph() added → semantic compression for inference context
"""
from dataclasses import dataclass, field
from collections import Counter
from typing import Dict, Any


@dataclass
class CodeGraph:
    episode_seed: int = 0
    components: Dict[str, Dict[str, Any]] = field(default_factory=dict)
    conventions: Dict[str, Any] = field(default_factory=dict)

    def update(self, filename: str, metadata: Dict[str, Any]) -> None:
        """Add or replace a file's metadata in the graph, then re-derive conventions."""
        if metadata.get("status") == "syntax_error":
            return   # Don't pollute graph with broken code
        name = _file_to_key(filename)
        metadata["file"] = filename
        self.components[name] = metadata
        self._infer_conventions()

    def _infer_conventions(self) -> None:
        """
        Derive dominant codebase style from all components.
        60% threshold: a bare majority (51%) wrongly penalises mixed codebases.
        When no clear style → 'mixed' → consistency grader awards full marks.
        """
        all_fns = [
            f["name"]
            for comp in self.components.values()
            for f in comp.get("functions", [])
        ]
        if all_fns:
            styles = [_naming_style(n) for n in all_fns]
            top, count = Counter(styles).most_common(1)[0]
            self.conventions["naming"] = top if count / len(styles) >= 0.60 else "mixed"
        else:
            self.conventions["naming"] = "unknown"

        uses_try = sum(
            1 for c in self.components.values()
            if c.get("conventions", {}).get("uses_try_catch", False)
        )
        total = len(self.components)
        self.conventions["error_handling"] = "try_catch" if uses_try / max(total, 1) >= 0.5 else "none"

        uses_hints = sum(
            1 for c in self.components.values()
            if c.get("conventions", {}).get("uses_type_hints", False)
        )
        self.conventions["uses_type_hints"] = uses_hints / max(total, 1) >= 0.5

    def to_slim_dict(self, limit: int = 6000) -> str:
        """
        compress_graph() — semantic compression for inference.py context.
        Keeps signatures + conventions, drops function bodies.
        V1 blindly truncated at 2000 chars → agents couldn't see patterns they needed.
        """
        import json
        slim = {
            "conventions": self.conventions,
            "components": {
                name: {
                    "file": comp.get("file", ""),
                    "language": comp.get("language", "py"),
                    "functions": [f["name"] for f in comp.get("functions", [])][:20],
                    "imports": [i.split(".")[0] for i in comp.get("imports", [])][:15],
                    "uses_try_catch": comp.get("conventions", {}).get("uses_try_catch", False),
                    "uses_type_hints": comp.get("conventions", {}).get("uses_type_hints", False),
                }
                for name, comp in self.components.items()
            },
        }
        result = json.dumps(slim, indent=2)
        if len(result) > limit:
            # Further trim: drop imports when still over limit
            for name in slim["components"]:
                slim["components"][name].pop("imports", None)
            result = json.dumps(slim, indent=2)[:limit]
        return result


# ── helpers ──────────────────────────────────────────────────────────────────

def _file_to_key(filename: str) -> str:
    """Convert 'src/auth/UserAuth.py' → 'UserAuth'"""
    base = filename.split("/")[-1]
    for ext in (".py", ".js", ".ts", ".tsx", ".jsx"):
        base = base.replace(ext, "")
    return base


def _naming_style(name: str) -> str:
    if "_" in name:
        return "snake_case"
    if name and name[0].isupper():
        return "PascalCase"
    if any(c.isupper() for c in name[1:]):
        return "camelCase"
    return "snake_case"   # all-lowercase defaults to snake