Spaces:

vishaldhakad
/

Openenv

Sleeping

App Files Files Community

Openenv / codegraph /graph.py

vishaldhakad

intial push

eda351c 3 months ago

Raw

History Blame Contribute Delete

4.58 kB

	"""
	codegraph/graph.py — CodeGraph V2

	The innovation that makes SecureCodeEnv unique.
	Structured in-memory database of everything the agent has written this episode.
	Persisted in Redis between steps via pickle.

	V2 changes:
	- tree-sitter replaces ast module → supports Python, JS, TS, TSX
	- 60% threshold for style detection (was 50%) → prevents false penalties
	- "mixed" state added → no penalty when codebase has no clear dominant style
	- compress_graph() added → semantic compression for inference context
	"""
	from dataclasses import dataclass, field
	from collections import Counter
	from typing import Dict, Any


	@dataclass
	class CodeGraph:
	episode_seed: int = 0
	components: Dict[str, Dict[str, Any]] = field(default_factory=dict)
	conventions: Dict[str, Any] = field(default_factory=dict)

	def update(self, filename: str, metadata: Dict[str, Any]) -> None:
	"""Add or replace a file's metadata in the graph, then re-derive conventions."""
	if metadata.get("status") == "syntax_error":
	return # Don't pollute graph with broken code
	name = _file_to_key(filename)
	metadata["file"] = filename
	self.components[name] = metadata
	self._infer_conventions()

	def _infer_conventions(self) -> None:
	"""
	Derive dominant codebase style from all components.
	60% threshold: a bare majority (51%) wrongly penalises mixed codebases.
	When no clear style → 'mixed' → consistency grader awards full marks.
	"""
	all_fns = [
	f["name"]
	for comp in self.components.values()
	for f in comp.get("functions", [])
	]
	if all_fns:
	styles = [_naming_style(n) for n in all_fns]
	top, count = Counter(styles).most_common(1)[0]
	self.conventions["naming"] = top if count / len(styles) >= 0.60 else "mixed"
	else:
	self.conventions["naming"] = "unknown"

	uses_try = sum(
	1 for c in self.components.values()
	if c.get("conventions", {}).get("uses_try_catch", False)
	)
	total = len(self.components)
	self.conventions["error_handling"] = "try_catch" if uses_try / max(total, 1) >= 0.5 else "none"

	uses_hints = sum(
	1 for c in self.components.values()
	if c.get("conventions", {}).get("uses_type_hints", False)
	)
	self.conventions["uses_type_hints"] = uses_hints / max(total, 1) >= 0.5

	def to_slim_dict(self, limit: int = 6000) -> str:
	"""
	compress_graph() — semantic compression for inference.py context.
	Keeps signatures + conventions, drops function bodies.
	V1 blindly truncated at 2000 chars → agents couldn't see patterns they needed.
	"""
	import json
	slim = {
	"conventions": self.conventions,
	"components": {
	name: {
	"file": comp.get("file", ""),
	"language": comp.get("language", "py"),
	"functions": [f["name"] for f in comp.get("functions", [])][:20],
	"imports": [i.split(".")[0] for i in comp.get("imports", [])][:15],
	"uses_try_catch": comp.get("conventions", {}).get("uses_try_catch", False),
	"uses_type_hints": comp.get("conventions", {}).get("uses_type_hints", False),
	}
	for name, comp in self.components.items()
	},
	}
	result = json.dumps(slim, indent=2)
	if len(result) > limit:
	# Further trim: drop imports when still over limit
	for name in slim["components"]:
	slim["components"][name].pop("imports", None)
	result = json.dumps(slim, indent=2)[:limit]
	return result


	# ── helpers ──────────────────────────────────────────────────────────────────

	def _file_to_key(filename: str) -> str:
	"""Convert 'src/auth/UserAuth.py' → 'UserAuth'"""
	base = filename.split("/")[-1]
	for ext in (".py", ".js", ".ts", ".tsx", ".jsx"):
	base = base.replace(ext, "")
	return base


	def _naming_style(name: str) -> str:
	if "_" in name:
	return "snake_case"
	if name and name[0].isupper():
	return "PascalCase"
	if any(c.isupper() for c in name[1:]):
	return "camelCase"
	return "snake_case" # all-lowercase defaults to snake