NagaNithin-V commited on
Commit
7952f32
·
1 Parent(s): dead589

Deploy GraphForge OpenEnv — AST-parsed KG code-editing environment

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +5 -0
  2. Dockerfile +27 -0
  3. README.md +31 -5
  4. env/__init__.py +34 -0
  5. env/actions.py +90 -0
  6. env/ast_parser.py +249 -0
  7. env/client.py +36 -0
  8. env/environment.py +467 -0
  9. env/models.py +46 -0
  10. env/server.py +44 -0
  11. env/tasks.py +363 -0
  12. graphforge/__init__.py +24 -0
  13. graphforge/actions/__init__.py +15 -0
  14. graphforge/actions/dispatcher.py +442 -0
  15. graphforge/actions/errors.py +44 -0
  16. graphforge/actions/schema.py +180 -0
  17. graphforge/actions/signature.py +116 -0
  18. graphforge/behavioral/__init__.py +25 -0
  19. graphforge/constraints/__init__.py +49 -0
  20. graphforge/constraints/checker.py +141 -0
  21. graphforge/constraints/schema.py +129 -0
  22. graphforge/graph/__init__.py +23 -0
  23. graphforge/graph/schema.py +308 -0
  24. graphforge/knowledge_graph.py +233 -0
  25. graphforge/materializer/__init__.py +20 -0
  26. graphforge/materializer/codegen.py +169 -0
  27. graphforge/materializer/materialize.py +134 -0
  28. graphforge/materializer/patterns.py +34 -0
  29. graphforge/parser/__init__.py +27 -0
  30. graphforge/repo_parser.py +271 -0
  31. graphforge/repo_registry.py +145 -0
  32. graphforge/reward/__init__.py +45 -0
  33. graphforge/reward/engine.py +211 -0
  34. graphforge/sample_repos/humanize/__init__.py +18 -0
  35. graphforge/sample_repos/humanize/filesize.py +49 -0
  36. graphforge/sample_repos/humanize/number.py +198 -0
  37. graphforge/sample_repos/humanize/time.py +225 -0
  38. graphforge/sample_repos/task_manager/__init__.py +1 -0
  39. graphforge/sample_repos/task_manager/api.py +48 -0
  40. graphforge/sample_repos/task_manager/models.py +47 -0
  41. graphforge/sample_repos/task_manager/storage.py +37 -0
  42. graphforge/sample_repos/task_manager/validators.py +25 -0
  43. graphforge/server/__init__.py +19 -0
  44. graphforge/server/app.py +124 -0
  45. graphforge/server/episode.py +171 -0
  46. graphforge/server/runner.py +144 -0
  47. graphforge/task_generator.py +227 -0
  48. graphforge/tasks/__init__.py +10 -0
  49. graphforge/tasks/bank.py +71 -0
  50. graphforge/tasks/schema.py +45 -0
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .env
5
+ *.egg-info/
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Space Dockerfile.
2
+ # Mirrors the root Dockerfile, exists separately because HF Spaces looks for
3
+ # the Dockerfile inside the Space root by default.
4
+
5
+ FROM python:3.11-slim
6
+
7
+ WORKDIR /app
8
+
9
+ COPY pyproject.toml ./
10
+ COPY graphforge ./graphforge
11
+ COPY env ./env
12
+ COPY openenv.yaml ./
13
+
14
+ RUN pip install --no-cache-dir \
15
+ "pydantic>=2.6" \
16
+ "fastapi>=0.110" \
17
+ "uvicorn[standard]>=0.27" \
18
+ "httpx>=0.27" \
19
+ "openenv-core>=0.1.0" \
20
+ "pyyaml>=6.0"
21
+
22
+ ENV PYTHONUNBUFFERED=1
23
+ ENV PYTHONPATH=/app
24
+
25
+ EXPOSE 7860
26
+
27
+ CMD ["uvicorn", "env.server:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,12 +1,38 @@
1
  ---
2
- title: Graphforge Openenv
3
- emoji: 💻
4
- colorFrom: green
5
  colorTo: purple
6
  sdk: docker
 
7
  pinned: false
8
  license: mit
9
- short_description: A graph-first code-editing RL environment for Python repos.
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: GraphForge OpenEnv
3
+ emoji: 🧱
4
+ colorFrom: indigo
5
  colorTo: purple
6
  sdk: docker
7
+ app_port: 8000
8
  pinned: false
9
  license: mit
 
10
  ---
11
 
12
+ # GraphForge OpenEnv server
13
+
14
+ Live deployment of the GraphForge environment for the Meta PyTorch OpenEnv
15
+ Hackathon. The server hosts the OpenEnv-compliant `/reset`, `/step`, `/state`
16
+ endpoints over HTTP. Anything that speaks the OpenEnv client protocol (or
17
+ plain JSON) can drive episodes.
18
+
19
+ See the main project repo for the architecture overview, training notebook,
20
+ plots, and writeup.
21
+
22
+ ## Endpoints
23
+
24
+ ```
25
+ POST /reset → GraphForgeObservation
26
+ POST /step { ... } → { observation, reward, done }
27
+ GET /state → GraphForgeState
28
+ GET /healthz
29
+ ```
30
+
31
+ ## Quick smoke test
32
+
33
+ ```bash
34
+ EID=$(curl -s -X POST $SPACE_URL/reset | python3 -c "import sys,json; print(json.load(sys.stdin)['episode_id'])")
35
+ curl -s -X POST $SPACE_URL/step -H 'content-type: application/json' \
36
+ -d '{"kind": "add_module", "payload": {"name": "validators", "responsibility": "validation"}}' \
37
+ | python3 -m json.tool
38
+ ```
env/__init__.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Multi-turn repo-editing OpenEnv environment.
2
+
3
+ Public surface:
4
+ RepoEditAction, RepoEditObservation, RepoEditState — wire models
5
+ RepoEditEnvironment — OpenEnv environment
6
+ RepoEditEnv — HTTP client
7
+ """
8
+
9
+ from env.actions import (
10
+ AddNodeAction,
11
+ InspectAction,
12
+ QueryAction,
13
+ RemoveNodeAction,
14
+ RepoEditAction,
15
+ SubmitAction,
16
+ UpdateNodeAction,
17
+ )
18
+ from env.client import RepoEditEnv
19
+ from env.environment import RepoEditEnvironment
20
+ from env.models import RepoEditObservation, RepoEditState
21
+
22
+ __all__ = [
23
+ "AddNodeAction",
24
+ "InspectAction",
25
+ "QueryAction",
26
+ "RemoveNodeAction",
27
+ "RepoEditAction",
28
+ "RepoEditEnv",
29
+ "RepoEditEnvironment",
30
+ "RepoEditObservation",
31
+ "RepoEditState",
32
+ "SubmitAction",
33
+ "UpdateNodeAction",
34
+ ]
env/actions.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Action schema for the multi-turn repo-editing environment.
2
+
3
+ All actions are expressed as JSON dicts with a "kind" discriminator.
4
+ The agent emits one action per turn inside <action>...</action> XML tags.
5
+
6
+ Actions
7
+ -------
8
+ query Search the knowledge graph for relevant nodes.
9
+ inspect View the full source of a specific node.
10
+ add_node Insert a new function or class into a module/class.
11
+ update_node Replace the source of an existing node.
12
+ remove_node Delete a node from the graph.
13
+ submit Apply all pending changes, run tests, end the episode.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from typing import Literal
19
+
20
+ from pydantic import BaseModel, ConfigDict
21
+
22
+
23
+ _cfg = ConfigDict(extra="forbid")
24
+
25
+
26
+ class QueryAction(BaseModel):
27
+ model_config = _cfg
28
+ kind: Literal["query"] = "query"
29
+ keywords: str
30
+ node_type: str = "all" # "all" | "function" | "class" | "module" | "method"
31
+
32
+
33
+ class InspectAction(BaseModel):
34
+ model_config = _cfg
35
+ kind: Literal["inspect"] = "inspect"
36
+ node_id: str
37
+
38
+
39
+ class AddNodeAction(BaseModel):
40
+ model_config = _cfg
41
+ kind: Literal["add_node"] = "add_node"
42
+ parent_id: str # node_id of the parent (module or class)
43
+ name: str # name of the new function/class
44
+ node_type: str # "function" | "class"
45
+ code: str # full source of the new node (incl. def/class line)
46
+
47
+
48
+ class UpdateNodeAction(BaseModel):
49
+ model_config = _cfg
50
+ kind: Literal["update_node"] = "update_node"
51
+ node_id: str # which node to replace
52
+ new_code: str # full replacement source (incl. def/class line)
53
+
54
+
55
+ class RemoveNodeAction(BaseModel):
56
+ model_config = _cfg
57
+ kind: Literal["remove_node"] = "remove_node"
58
+ node_id: str
59
+
60
+
61
+ class SubmitAction(BaseModel):
62
+ model_config = _cfg
63
+ kind: Literal["submit"] = "submit"
64
+
65
+
66
+ RepoEditAction = (
67
+ QueryAction
68
+ | InspectAction
69
+ | AddNodeAction
70
+ | UpdateNodeAction
71
+ | RemoveNodeAction
72
+ | SubmitAction
73
+ )
74
+
75
+
76
+ def parse_action(raw: dict) -> RepoEditAction:
77
+ """Dispatch raw dict to the correct action model."""
78
+ kind = raw.get("kind", "")
79
+ mapping = {
80
+ "query": QueryAction,
81
+ "inspect": InspectAction,
82
+ "add_node": AddNodeAction,
83
+ "update_node": UpdateNodeAction,
84
+ "remove_node": RemoveNodeAction,
85
+ "submit": SubmitAction,
86
+ }
87
+ cls = mapping.get(kind)
88
+ if cls is None:
89
+ raise ValueError(f"Unknown action kind: {kind!r}. Valid: {list(mapping)}")
90
+ return cls.model_validate(raw)
env/ast_parser.py ADDED
@@ -0,0 +1,249 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """AST-based DAG parser and code injection utilities.
2
+
3
+ parse_source(source, module_name) -> CodeDAG
4
+ Parses a Python source string and returns a structured DAG with nodes
5
+ (module, function, imported_module) and typed edges (contains, calls, imports).
6
+
7
+ inject_function_body(source, func_name, new_body) -> str
8
+ Replaces the body of func_name in source with new_body, preserving the
9
+ def line and any docstring. Used by the environment's step() method.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import ast
15
+ from dataclasses import dataclass, field
16
+
17
+
18
+ # ── DAG data model ────────────────────────────────────────────────────────────
19
+
20
+ @dataclass
21
+ class DAGNode:
22
+ name: str
23
+ node_type: str # "module" | "function" | "class" | "imported_module"
24
+ signature: str = ""
25
+ is_stub: bool = False
26
+ body_summary: str = ""
27
+
28
+
29
+ @dataclass
30
+ class DAGEdge:
31
+ edge_type: str # "contains" | "calls" | "imports"
32
+ source: str
33
+ target: str
34
+
35
+
36
+ @dataclass
37
+ class FunctionInfo:
38
+ name: str
39
+ signature: str
40
+ is_stub: bool
41
+ start_line: int # 1-indexed
42
+ end_line: int # 1-indexed, inclusive
43
+ has_docstring: bool
44
+ docstring_end_line: int # 1-indexed; == start_line when no docstring
45
+
46
+
47
+ @dataclass
48
+ class CodeDAG:
49
+ module_name: str
50
+ nodes: list[DAGNode] = field(default_factory=list)
51
+ edges: list[DAGEdge] = field(default_factory=list)
52
+ function_infos: dict[str, FunctionInfo] = field(default_factory=dict)
53
+
54
+ def callers_of(self, func_name: str) -> list[str]:
55
+ return [e.source for e in self.edges if e.edge_type == "calls" and e.target == func_name]
56
+
57
+ def callees_of(self, func_name: str) -> list[str]:
58
+ return [e.target for e in self.edges if e.edge_type == "calls" and e.source == func_name]
59
+
60
+ def stub_functions(self) -> list[str]:
61
+ return [n.name for n in self.nodes if n.node_type == "function" and n.is_stub]
62
+
63
+
64
+ # ── helpers ───────────────────────────────────────────────────────────────────
65
+
66
+ def _signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
67
+ parts = []
68
+ for arg in node.args.args:
69
+ ann = f": {ast.unparse(arg.annotation)}" if arg.annotation else ""
70
+ parts.append(f"{arg.arg}{ann}")
71
+ ret = f" -> {ast.unparse(node.returns)}" if node.returns else ""
72
+ return f"({', '.join(parts)}){ret}"
73
+
74
+
75
+ def _is_stub(node: ast.FunctionDef | ast.AsyncFunctionDef, source: str) -> bool:
76
+ func_src = "\n".join(source.splitlines()[node.lineno - 1:node.end_lineno])
77
+ if "# STUB" in func_src:
78
+ return True
79
+ # body that is just "raise NotImplementedError"
80
+ stmts = [s for s in node.body
81
+ if not (isinstance(s, ast.Expr) and isinstance(s.value, ast.Constant))]
82
+ if len(stmts) == 1 and isinstance(stmts[0], ast.Raise):
83
+ exc = stmts[0].exc
84
+ if isinstance(exc, ast.Name) and exc.id == "NotImplementedError":
85
+ return True
86
+ if isinstance(exc, ast.Call) and isinstance(exc.func, ast.Name) and exc.func.id == "NotImplementedError":
87
+ return True
88
+ return False
89
+
90
+
91
+ def _extract_calls(node: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
92
+ calls: set[str] = set()
93
+ for child in ast.walk(node):
94
+ if isinstance(child, ast.Call):
95
+ if isinstance(child.func, ast.Name):
96
+ calls.add(child.func.id)
97
+ return calls
98
+
99
+
100
+ # ── main parser ───────────────────────────────────────────────────────────────
101
+
102
+ def parse_source(source: str, module_name: str = "module") -> CodeDAG:
103
+ """Parse Python source into a CodeDAG."""
104
+ tree = ast.parse(source)
105
+ dag = CodeDAG(module_name=module_name)
106
+ dag.nodes.append(DAGNode(name=module_name, node_type="module"))
107
+
108
+ func_names: set[str] = set()
109
+
110
+ # imports
111
+ for node in ast.walk(tree):
112
+ if isinstance(node, ast.Import):
113
+ for alias in node.names:
114
+ imp = alias.asname or alias.name
115
+ dag.nodes.append(DAGNode(name=imp, node_type="imported_module"))
116
+ dag.edges.append(DAGEdge("imports", module_name, imp))
117
+ elif isinstance(node, ast.ImportFrom) and node.module:
118
+ dag.nodes.append(DAGNode(name=node.module, node_type="imported_module"))
119
+ dag.edges.append(DAGEdge("imports", module_name, node.module))
120
+
121
+ # top-level functions and classes
122
+ for node in tree.body:
123
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
124
+ sig = _signature(node)
125
+ stub = _is_stub(node, source)
126
+ has_doc = (
127
+ bool(node.body)
128
+ and isinstance(node.body[0], ast.Expr)
129
+ and isinstance(node.body[0].value, ast.Constant)
130
+ )
131
+ doc_end = node.body[0].end_lineno if has_doc else node.lineno
132
+
133
+ dag.nodes.append(DAGNode(
134
+ name=node.name,
135
+ node_type="function",
136
+ signature=sig,
137
+ is_stub=stub,
138
+ body_summary="STUB — needs implementation" if stub else "(implemented)",
139
+ ))
140
+ dag.edges.append(DAGEdge("contains", module_name, node.name))
141
+ dag.function_infos[node.name] = FunctionInfo(
142
+ name=node.name,
143
+ signature=sig,
144
+ is_stub=stub,
145
+ start_line=node.lineno,
146
+ end_line=node.end_lineno,
147
+ has_docstring=has_doc,
148
+ docstring_end_line=doc_end,
149
+ )
150
+ func_names.add(node.name)
151
+
152
+ elif isinstance(node, ast.ClassDef):
153
+ dag.nodes.append(DAGNode(name=node.name, node_type="class"))
154
+ dag.edges.append(DAGEdge("contains", module_name, node.name))
155
+ for item in node.body:
156
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
157
+ qname = f"{node.name}.{item.name}"
158
+ dag.nodes.append(DAGNode(
159
+ name=qname,
160
+ node_type="function",
161
+ signature=_signature(item),
162
+ is_stub=_is_stub(item, source),
163
+ ))
164
+ dag.edges.append(DAGEdge("contains", node.name, qname))
165
+ func_names.add(qname)
166
+
167
+ # call edges (same-module only)
168
+ for node in tree.body:
169
+ if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
170
+ for callee in _extract_calls(node):
171
+ if callee in func_names and callee != node.name:
172
+ dag.edges.append(DAGEdge("calls", node.name, callee))
173
+
174
+ return dag
175
+
176
+
177
+ # ── code injection ────────────────────────────────────────────────────────────
178
+
179
+ def inject_function_body(source: str, func_name: str, new_body: str) -> str:
180
+ """Replace the body of func_name in source with new_body.
181
+
182
+ Preserves the def line and any docstring. new_body should be the raw body
183
+ text (with or without indentation — we normalise it).
184
+ """
185
+ tree = ast.parse(source)
186
+ lines = source.splitlines(keepends=True)
187
+
188
+ for node in tree.body:
189
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
190
+ continue
191
+ if node.name != func_name:
192
+ continue
193
+
194
+ # Determine where to keep up to (def line + optional docstring)
195
+ has_doc = (
196
+ bool(node.body)
197
+ and isinstance(node.body[0], ast.Expr)
198
+ and isinstance(node.body[0].value, ast.Constant)
199
+ )
200
+ keep_until = node.body[0].end_lineno if has_doc else node.lineno
201
+ # keep_until is 1-indexed; lines[:keep_until] gives 0..keep_until-1
202
+
203
+ before = lines[:keep_until]
204
+ after = lines[node.end_lineno:] # everything after the function
205
+
206
+ # Normalise body indent: strip common leading whitespace, then re-add 4 spaces.
207
+ raw_lines = new_body.splitlines()
208
+ # find minimum indent of non-empty lines
209
+ min_indent = min(
210
+ (len(l) - len(l.lstrip()) for l in raw_lines if l.strip()),
211
+ default=0,
212
+ )
213
+ body_lines: list[str] = []
214
+ for raw_line in raw_lines:
215
+ if raw_line.strip():
216
+ body_lines.append(" " + raw_line[min_indent:] + "\n")
217
+ else:
218
+ body_lines.append("\n")
219
+
220
+ if not body_lines:
221
+ body_lines = [" pass\n"]
222
+
223
+ return "".join(before + body_lines + after)
224
+
225
+ raise ValueError(f"Function {func_name!r} not found in source")
226
+
227
+
228
+ # ── DAG → text description (for prompts) ─────────────────────────────────────
229
+
230
+ def dag_to_text(dag: CodeDAG) -> str:
231
+ """Render the DAG as a concise human-readable block for the agent prompt."""
232
+ lines: list[str] = [f"## Module: {dag.module_name}", "", "### Nodes"]
233
+
234
+ for n in dag.nodes:
235
+ if n.node_type == "module":
236
+ lines.append(f"- [MODULE] {n.name}")
237
+ elif n.node_type == "function":
238
+ status = "[ STUB ]" if n.is_stub else "[ready ]"
239
+ lines.append(f"- [FUNC] {status} {n.name}{n.signature}")
240
+ elif n.node_type == "class":
241
+ lines.append(f"- [CLASS] {n.name}")
242
+ elif n.node_type == "imported_module":
243
+ lines.append(f"- [IMPORT] {n.name}")
244
+
245
+ lines += ["", "### Edges"]
246
+ for e in dag.edges:
247
+ lines.append(f"- {e.source} --{e.edge_type}--> {e.target}")
248
+
249
+ return "\n".join(lines)
env/client.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HTTP client for the repo-editing environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ import httpx
8
+
9
+ from env.models import RepoEditObservation, RepoEditState
10
+
11
+
12
+ class RepoEditEnv:
13
+ def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 60.0) -> None:
14
+ self._client = httpx.Client(base_url=base_url.rstrip("/"), timeout=timeout)
15
+
16
+ def reset(self, task_id: str | None = None) -> RepoEditObservation:
17
+ params = {"task_id": task_id} if task_id else {}
18
+ r = self._client.post("/reset", params=params)
19
+ r.raise_for_status()
20
+ return RepoEditObservation.model_validate(r.json())
21
+
22
+ def step(self, action_dict: dict[str, Any]) -> dict[str, Any]:
23
+ r = self._client.post("/step", json=action_dict)
24
+ r.raise_for_status()
25
+ return r.json()
26
+
27
+ def state(self) -> RepoEditState:
28
+ r = self._client.get("/state")
29
+ r.raise_for_status()
30
+ return RepoEditState.model_validate(r.json())
31
+
32
+ def __enter__(self) -> "RepoEditEnv":
33
+ return self
34
+
35
+ def __exit__(self, *_: object) -> None:
36
+ self._client.close()
env/environment.py ADDED
@@ -0,0 +1,467 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Multi-turn repo-editing OpenEnv environment.
2
+
3
+ Episode flow
4
+ ------------
5
+ reset() Parse the target repo into a KnowledgeGraph. Return an observation
6
+ containing the full graph overview and the task description.
7
+
8
+ step() The agent emits one RepoEditAction per turn:
9
+ - query → search results (information, no graph mutation)
10
+ - inspect → full node source (information)
11
+ - add_node → insert new function/class into the live graph
12
+ - update_node → replace a node's source in the live graph
13
+ - remove_node → delete a node
14
+ - submit → materialise all changes back to disk (temp), run tests,
15
+ compute reward, end episode
16
+
17
+ Reward structure (sparse — designed for long-horizon RL)
18
+ ---------------------------------------------------------
19
+ Per-turn cost : -0.05 (forces efficiency)
20
+ Malformed action : -0.2
21
+ On submit
22
+ all tests pass : +1.0
23
+ partial pass : +0.5 * (n_pass / n_total)
24
+ compile error : 0.0
25
+ Episode cap hit : 0.0
26
+
27
+ This sparse reward deliberately requires the agent to plan, navigate, and
28
+ execute across many turns — it cannot succeed by guessing on the first turn.
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import ast
34
+ import json
35
+ import os
36
+ import re
37
+ import sys
38
+ import tempfile
39
+ import textwrap
40
+ import traceback
41
+ import uuid
42
+ from pathlib import Path
43
+ from typing import Any
44
+
45
+ from env.actions import (
46
+ AddNodeAction,
47
+ InspectAction,
48
+ QueryAction,
49
+ RemoveNodeAction,
50
+ RepoEditAction,
51
+ SubmitAction,
52
+ UpdateNodeAction,
53
+ parse_action,
54
+ )
55
+ from env.models import RepoEditObservation, RepoEditState
56
+ from env.tasks import SAMPLE_REPOS_DIR, TASK_BANK, RepoTask, all_task_ids, get_task
57
+ from graphforge.knowledge_graph import KGEdge, KGNode, KnowledgeGraph
58
+ from graphforge.repo_parser import parse_repo, _node_id
59
+
60
+ try:
61
+ from openenv.core import Environment # type: ignore
62
+ _HAS_OPENENV = True
63
+ except Exception:
64
+ _HAS_OPENENV = False
65
+ from typing import Generic, TypeVar
66
+ A = TypeVar("A")
67
+ O = TypeVar("O")
68
+ S = TypeVar("S")
69
+
70
+ class Environment(Generic[A, O, S]): # type: ignore[no-redef]
71
+ def reset(self) -> O: ...
72
+ def step(self, action: A) -> tuple[O, float, bool]: ...
73
+ def get_state(self) -> S: ...
74
+
75
+
76
+ # ── constants ─────────────────────────────────────────────────────────────────
77
+
78
+ PER_TURN_COST = -0.05
79
+ MALFORMED_PENALTY = -0.2
80
+
81
+
82
+ # ── materialiser (graph → disk) ───────────────────────────────────────────────
83
+
84
+ def _materialise_changes(
85
+ kg: KnowledgeGraph,
86
+ repo_src_path: Path,
87
+ tmp_dir: str,
88
+ ) -> dict[str, str]:
89
+ """Write mutated module sources to tmp_dir. Returns {rel_path: source}."""
90
+ files: dict[str, str] = {}
91
+ for node in kg.all_nodes("module"):
92
+ if not node.file_path:
93
+ continue
94
+ # Re-assemble module source from its children's current sources
95
+ # For simplicity: use the node.source field (which we keep in sync)
96
+ files[node.file_path] = node.source
97
+ dest = Path(tmp_dir) / node.file_path
98
+ dest.parent.mkdir(parents=True, exist_ok=True)
99
+ dest.write_text(node.source, encoding="utf-8")
100
+ # Copy non-py files (like __init__.py markers) from original
101
+ for root, _, fnames in os.walk(str(repo_src_path)):
102
+ for fname in fnames:
103
+ if fname.endswith(".py"):
104
+ continue
105
+ src = Path(root) / fname
106
+ rel = src.relative_to(repo_src_path)
107
+ dst = Path(tmp_dir) / rel
108
+ dst.parent.mkdir(parents=True, exist_ok=True)
109
+ dst.write_bytes(src.read_bytes())
110
+ return files
111
+
112
+
113
+ # ── code injection into module source ─────────────────────────────────────────
114
+
115
+ def _apply_add_node(
116
+ module_source: str,
117
+ code: str,
118
+ class_name: str | None = None,
119
+ ) -> str:
120
+ """Insert code into module_source.
121
+
122
+ If class_name is given, the code is indented and appended inside the class
123
+ body. Otherwise it is appended at module level.
124
+ """
125
+ new_code = textwrap.dedent(code).strip()
126
+ if class_name is None:
127
+ return module_source.rstrip() + "\n\n\n" + new_code + "\n"
128
+
129
+ # Insert indented method just before the end of the class block
130
+ indented = "\n".join(" " + line for line in new_code.splitlines())
131
+ # Find the class definition via AST and splice
132
+ try:
133
+ tree = ast.parse(module_source)
134
+ lines = module_source.splitlines(keepends=True)
135
+ for node in tree.body:
136
+ if isinstance(node, ast.ClassDef) and node.name == class_name:
137
+ insert_at = node.end_lineno # 1-indexed, inclusive last line of class
138
+ before = "".join(lines[:insert_at])
139
+ after = "".join(lines[insert_at:])
140
+ return before.rstrip() + "\n\n" + indented + "\n" + after
141
+ except Exception:
142
+ pass
143
+ # Fallback: append at module level
144
+ return module_source.rstrip() + "\n\n\n" + indented + "\n"
145
+
146
+
147
+ def _apply_update_node(
148
+ module_source: str,
149
+ old_source: str,
150
+ new_code: str,
151
+ ) -> str:
152
+ """Replace old_source verbatim in module_source with new_code."""
153
+ new_code_clean = textwrap.dedent(new_code).strip()
154
+ if old_source in module_source:
155
+ return module_source.replace(old_source, new_code_clean, 1)
156
+ # Fallback: try stripping indentation differences
157
+ return module_source + "\n\n# PATCHED\n" + new_code_clean + "\n"
158
+
159
+
160
+ def _apply_remove_node(module_source: str, old_source: str) -> str:
161
+ if old_source in module_source:
162
+ return module_source.replace(old_source, "", 1)
163
+ return module_source
164
+
165
+
166
+ def _validate_python(source: str) -> tuple[bool, str]:
167
+ try:
168
+ ast.parse(source)
169
+ return True, ""
170
+ except SyntaxError as exc:
171
+ return False, str(exc)
172
+
173
+
174
+ # ── environment ───────────────────────────────────────────────────────────────
175
+
176
+ class RepoEditEnvironment(
177
+ Environment[RepoEditAction, RepoEditObservation, RepoEditState]
178
+ ):
179
+ """Multi-turn OpenEnv environment for repository-level code editing.
180
+
181
+ The agent receives a Knowledge Graph of a real Python repo and must
182
+ navigate it to find the right location, then apply the correct edit.
183
+ Reward is sparse: only granted on a passing submit().
184
+ """
185
+
186
+ def __init__(self, task_id: str | None = None) -> None:
187
+ self._configured_task_id = task_id
188
+ self._task: RepoTask | None = None
189
+ self._kg: KnowledgeGraph | None = None
190
+ self._episode_id: str | None = None
191
+ self._turn: int = 0
192
+ self._done: bool = False
193
+ self._total_reward: float = 0.0
194
+ self._history: list[dict[str, Any]] = []
195
+
196
+ # ----- OpenEnv contract ---------------------------------------------------
197
+
198
+ def reset(self, task_id: str | None = None, task: Any = None) -> RepoEditObservation:
199
+ """Reset the environment.
200
+
201
+ Pass either task_id (looks up TASK_BANK) or a task object directly
202
+ (supports AutoTask from graphforge.task_generator).
203
+ """
204
+ if task is not None:
205
+ tid = task.task_id
206
+ else:
207
+ tid = task_id or self._configured_task_id or _pick_random_task()
208
+ task = TASK_BANK.get(tid)
209
+ if task is None:
210
+ raise ValueError(f"Unknown task_id: {tid!r}. Available: {all_task_ids()}")
211
+
212
+ # Resolve the repo path: use task.repo_path if set, else fall back to sample_repos/
213
+ if getattr(task, "repo_path", None):
214
+ repo_path = task.repo_path
215
+ else:
216
+ repo_path = str(SAMPLE_REPOS_DIR / task.repo_name)
217
+
218
+ self._task = task
219
+ self._kg = parse_repo(repo_path)
220
+ self._episode_id = str(uuid.uuid4())[:8]
221
+ self._turn = 0
222
+ self._done = False
223
+ self._total_reward = 0.0
224
+ self._history = []
225
+
226
+ return RepoEditObservation(
227
+ episode_id=self._episode_id,
228
+ task_id=tid,
229
+ turn=0,
230
+ max_turns=task.max_turns,
231
+ graph_overview=self._kg.overview(),
232
+ task_description=task.description,
233
+ action_result="Episode started. Use query/inspect to navigate, then add_node/update_node to edit, then submit.",
234
+ done=False,
235
+ )
236
+
237
+ def step(self, action: RepoEditAction) -> tuple[RepoEditObservation, float, bool]:
238
+ if self._task is None or self._kg is None:
239
+ raise RuntimeError("step() called before reset()")
240
+ if self._done:
241
+ return self._terminal_obs("Episode already done."), 0.0, True
242
+
243
+ self._turn += 1
244
+ turn_reward = PER_TURN_COST
245
+
246
+ # Dispatch
247
+ try:
248
+ result_text, extra_reward, done = self._dispatch(action)
249
+ turn_reward += extra_reward
250
+ except Exception as exc:
251
+ result_text = f"[ERROR] {exc}"
252
+ turn_reward += MALFORMED_PENALTY
253
+ done = False
254
+
255
+ self._total_reward += turn_reward
256
+
257
+ # Episode cap
258
+ if not done and self._turn >= self._task.max_turns:
259
+ done = True
260
+ result_text += f"\n[Episode cap reached: {self._task.max_turns} turns]"
261
+
262
+ self._done = done
263
+ self._history.append({
264
+ "turn": self._turn,
265
+ "action_kind": getattr(action, "kind", "unknown"),
266
+ "reward": turn_reward,
267
+ })
268
+
269
+ obs = RepoEditObservation(
270
+ episode_id=self._episode_id,
271
+ task_id=self._task.task_id,
272
+ turn=self._turn,
273
+ max_turns=self._task.max_turns,
274
+ graph_overview=self._kg.overview(),
275
+ task_description=self._task.description,
276
+ action_result=result_text,
277
+ turn_reward=turn_reward,
278
+ total_reward=self._total_reward,
279
+ done=done,
280
+ )
281
+ return obs, turn_reward, done
282
+
283
+ def get_state(self) -> RepoEditState:
284
+ return RepoEditState(
285
+ episode_id=self._episode_id,
286
+ task_id=self._task.task_id if self._task else None,
287
+ turn=self._turn,
288
+ done=self._done,
289
+ total_reward=self._total_reward,
290
+ )
291
+
292
+ @property
293
+ def state(self) -> RepoEditState:
294
+ return self.get_state()
295
+
296
+ # ----- action dispatch ----------------------------------------------------
297
+
298
+ def _dispatch(
299
+ self, action: RepoEditAction
300
+ ) -> tuple[str, float, bool]:
301
+ """Returns (result_text, extra_reward, done)."""
302
+ kg = self._kg
303
+ assert kg is not None
304
+
305
+ if isinstance(action, QueryAction):
306
+ nt = None if action.node_type == "all" else action.node_type
307
+ results = kg.search(action.keywords, node_type=nt)
308
+ if not results:
309
+ return f"No nodes found for query: {action.keywords!r}", 0.0, False
310
+ lines = [f"Found {len(results)} node(s) matching {action.keywords!r}:"]
311
+ for n in results[:10]:
312
+ lines.append(f" {n.node_id} ({n.file_path}:{n.line_start})")
313
+ return "\n".join(lines), 0.0, False
314
+
315
+ if isinstance(action, InspectAction):
316
+ detail = kg.node_detail(action.node_id)
317
+ return detail, 0.0, False
318
+
319
+ if isinstance(action, AddNodeAction):
320
+ parent = kg.get_node(action.parent_id)
321
+ if parent is None:
322
+ return f"[ERROR] parent_id {action.parent_id!r} not found.", MALFORMED_PENALTY, False
323
+ ok, err = _validate_python(action.code)
324
+ if not ok:
325
+ return f"[SYNTAX ERROR in your code] {err}", MALFORMED_PENALTY, False
326
+
327
+ # Append to parent module's source
328
+ module_node = _find_module_for(kg, action.parent_id)
329
+ if module_node is None:
330
+ return f"[ERROR] could not find module for parent {action.parent_id!r}", MALFORMED_PENALTY, False
331
+
332
+ parent_node = kg.get_node(action.parent_id)
333
+ class_name = parent_node.name if parent_node and parent_node.node_type == "class" else None
334
+ module_node.source = _apply_add_node(module_node.source, action.code, class_name=class_name)
335
+
336
+ # Register the new node in the KG
337
+ ntype = action.node_type if action.node_type in ("function", "class", "method") else "function"
338
+ new_id = _node_id(ntype, module_node.file_path, action.name)
339
+ new_node = KGNode(
340
+ node_id=new_id,
341
+ node_type=ntype,
342
+ name=action.name,
343
+ file_path=module_node.file_path,
344
+ line_start=module_node.line_end,
345
+ line_end=module_node.line_end + action.code.count("\n") + 1,
346
+ source=textwrap.dedent(action.code).strip(),
347
+ )
348
+ kg.insert_node(action.parent_id, new_node)
349
+ return f"Added {ntype} `{action.name}` to `{module_node.file_path}`.\nNew node_id: {new_id}", 0.0, False
350
+
351
+ if isinstance(action, UpdateNodeAction):
352
+ target = kg.get_node(action.node_id)
353
+ if target is None:
354
+ return f"[ERROR] node_id {action.node_id!r} not found.", MALFORMED_PENALTY, False
355
+ ok, err = _validate_python(action.new_code)
356
+ if not ok:
357
+ return f"[SYNTAX ERROR in your code] {err}", MALFORMED_PENALTY, False
358
+
359
+ module_node = _find_module_for(kg, action.node_id)
360
+ if module_node is None:
361
+ return f"[ERROR] could not find module for {action.node_id!r}", MALFORMED_PENALTY, False
362
+
363
+ old_source = target.source
364
+ module_node.source = _apply_update_node(module_node.source, old_source, action.new_code)
365
+ target.source = textwrap.dedent(action.new_code).strip()
366
+ return f"Updated `{action.node_id}`.", 0.0, False
367
+
368
+ if isinstance(action, RemoveNodeAction):
369
+ target = kg.get_node(action.node_id)
370
+ if target is None:
371
+ return f"[ERROR] node_id {action.node_id!r} not found.", MALFORMED_PENALTY, False
372
+ module_node = _find_module_for(kg, action.node_id)
373
+ if module_node:
374
+ module_node.source = _apply_remove_node(module_node.source, target.source)
375
+ kg.remove_node(action.node_id)
376
+ return f"Removed `{action.node_id}`.", 0.0, False
377
+
378
+ if isinstance(action, SubmitAction):
379
+ return self._run_submit()
380
+
381
+ return f"[ERROR] unrecognised action type: {type(action)}", MALFORMED_PENALTY, False
382
+
383
+ def _run_submit(self) -> tuple[str, float, bool]:
384
+ """Write modified sources to a temp dir, run tests there, clean up."""
385
+ kg = self._kg
386
+ task = self._task
387
+ assert kg is not None and task is not None
388
+
389
+ reward, msg = _run_tests_in_tempdir(kg, task.test_code, task.repo_name)
390
+ return f"[SUBMIT RESULT]\n{msg}", reward, True
391
+
392
+ def _terminal_obs(self, msg: str) -> RepoEditObservation:
393
+ return RepoEditObservation(
394
+ episode_id=self._episode_id,
395
+ task_id=self._task.task_id if self._task else None,
396
+ turn=self._turn,
397
+ max_turns=self._task.max_turns if self._task else 0,
398
+ graph_overview="",
399
+ task_description="",
400
+ action_result=msg,
401
+ done=True,
402
+ total_reward=self._total_reward,
403
+ )
404
+
405
+
406
+ # ── helpers ───────────────────────────────────────────────────────────────────
407
+
408
+ def _find_module_for(kg: KnowledgeGraph, node_id: str) -> KGNode | None:
409
+ """Walk up the parent chain until we hit a module node."""
410
+ current_id = node_id
411
+ seen: set[str] = set()
412
+ while current_id and current_id not in seen:
413
+ seen.add(current_id)
414
+ node = kg.get_node(current_id)
415
+ if node and node.node_type == "module":
416
+ return node
417
+ parent = kg.parent_of(current_id)
418
+ if parent is None:
419
+ break
420
+ current_id = parent.node_id
421
+ return None
422
+
423
+
424
+ def _run_tests_in_tempdir(
425
+ kg: KnowledgeGraph, test_code: str, pkg_name: str
426
+ ) -> tuple[float, str]:
427
+ """Write mutated module sources to a temp dir, import from there, run tests.
428
+
429
+ This works for ANY Python repo — no hardcoded package paths needed.
430
+ The test_code must use short imports: `from <pkg_name>.<module> import ...`
431
+ """
432
+ with tempfile.TemporaryDirectory() as tmpdir:
433
+ pkg_dir = Path(tmpdir) / pkg_name
434
+ pkg_dir.mkdir(parents=True)
435
+ (pkg_dir / "__init__.py").write_text("")
436
+
437
+ # Write each module's current (potentially mutated) source
438
+ for node in kg.all_nodes("module"):
439
+ if not node.file_path or node.file_path == "__init__.py":
440
+ continue
441
+ dest = pkg_dir / node.file_path
442
+ dest.parent.mkdir(parents=True, exist_ok=True)
443
+ dest.write_text(node.source, encoding="utf-8")
444
+
445
+ # Remove any stale cached copies of this package
446
+ stale = [k for k in sys.modules if k == pkg_name or k.startswith(pkg_name + ".")]
447
+ for k in stale:
448
+ del sys.modules[k]
449
+
450
+ sys.path.insert(0, tmpdir)
451
+ try:
452
+ exec(compile(test_code, "<tests>", "exec"), {}) # noqa: S102
453
+ return 1.0, "✓ All tests passed!"
454
+ except AssertionError as exc:
455
+ return 0.0, f"✗ Test failed: {exc}"
456
+ except Exception:
457
+ return 0.0, f"✗ Exception during tests:\n{traceback.format_exc(limit=5)}"
458
+ finally:
459
+ sys.path.remove(tmpdir)
460
+ stale = [k for k in sys.modules if k == pkg_name or k.startswith(pkg_name + ".")]
461
+ for k in stale:
462
+ del sys.modules[k]
463
+
464
+
465
+ def _pick_random_task() -> str:
466
+ import random
467
+ return random.choice(all_task_ids())
env/models.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Pydantic wire models for the multi-turn repo-editing environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any, Optional
6
+
7
+ from pydantic import BaseModel, ConfigDict, Field
8
+
9
+ _cfg = ConfigDict(extra="ignore")
10
+
11
+
12
+ class RepoEditObservation(BaseModel):
13
+ """What the env returns after reset() or step().
14
+
15
+ Contains the current graph overview + the result of the last action.
16
+ The agent should read action_result carefully before deciding the next step.
17
+ """
18
+
19
+ model_config = _cfg
20
+
21
+ episode_id: Optional[str] = None
22
+ task_id: Optional[str] = None
23
+ turn: int = 0
24
+ max_turns: int = 15
25
+
26
+ graph_overview: str = "" # compact text view of the entire repo KG
27
+ task_description: str = "" # what the agent needs to accomplish
28
+ action_result: str = "" # feedback from the last action
29
+
30
+ turn_reward: float = 0.0
31
+ total_reward: float = 0.0
32
+ done: bool = False
33
+
34
+ info: dict[str, Any] = Field(default_factory=dict)
35
+
36
+
37
+ class RepoEditState(BaseModel):
38
+ """Episode-level state snapshot."""
39
+
40
+ model_config = _cfg
41
+
42
+ episode_id: Optional[str] = None
43
+ task_id: Optional[str] = None
44
+ turn: int = 0
45
+ done: bool = False
46
+ total_reward: float = 0.0
env/server.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI server for the multi-turn repo-editing environment."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ from fastapi import FastAPI, HTTPException
8
+
9
+ from env.actions import RepoEditAction, parse_action
10
+ from env.environment import RepoEditEnvironment
11
+ from env.models import RepoEditObservation, RepoEditState
12
+
13
+ _env = RepoEditEnvironment()
14
+
15
+
16
+ def _make_app() -> FastAPI:
17
+ app = FastAPI(title="Repo-Edit OpenEnv", version="0.3.0")
18
+
19
+ @app.post("/reset", response_model=RepoEditObservation)
20
+ def reset(task_id: str | None = None) -> RepoEditObservation:
21
+ return _env.reset(task_id=task_id)
22
+
23
+ @app.post("/step")
24
+ def step(action_dict: dict[str, Any]) -> dict[str, Any]:
25
+ try:
26
+ action = parse_action(action_dict)
27
+ obs, reward, done = _env.step(action)
28
+ except (ValueError, RuntimeError) as exc:
29
+ raise HTTPException(status_code=400, detail=str(exc)) from exc
30
+ return {"observation": obs.model_dump(), "reward": reward, "done": done}
31
+
32
+ @app.get("/state", response_model=RepoEditState)
33
+ def state() -> RepoEditState:
34
+ return _env.get_state()
35
+
36
+ @app.get("/healthz")
37
+ def healthz() -> dict[str, Any]:
38
+ return {"status": "ok"}
39
+
40
+ return app
41
+
42
+
43
+ app = _make_app()
44
+ __all__ = ["app"]
env/tasks.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Multi-turn repo-editing tasks.
2
+
3
+ Each Task specifies:
4
+ - A target repo to work on (points to a sample_repos/ subdir)
5
+ - A natural-language description of the change to make
6
+ - A set of test functions (Python code strings) that verify the change
7
+ - The maximum number of turns allowed
8
+
9
+ Training tasks are deliberately structured to require multi-step navigation:
10
+ 1. The agent must QUERY the graph to find relevant nodes
11
+ 2. INSPECT nodes to understand the existing code
12
+ 3. ADD or UPDATE nodes to implement the change
13
+ 4. SUBMIT to trigger compilation + test execution
14
+
15
+ This sparse reward structure forces the agent to develop structured planning
16
+ and state tracking across long trajectories — the core theme of this project.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import importlib.util
22
+ import sys
23
+ import textwrap
24
+ import traceback
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+
28
+
29
+ SAMPLE_REPOS_DIR = Path(__file__).resolve().parent.parent / "graphforge" / "sample_repos"
30
+
31
+
32
+ @dataclass
33
+ class RepoTask:
34
+ task_id: str
35
+ repo_name: str # package name (used as tempdir subdir)
36
+ description: str # natural-language task for the agent
37
+ test_code: str # Python assertions using short imports
38
+ max_turns: int = 15
39
+ difficulty: int = 0 # 0=easy, 1=medium, 2=hard
40
+ hints: list[str] = field(default_factory=list)
41
+ repo_path: str | None = None # if set, full path to repo source dir
42
+
43
+
44
+ TASK_BANK: dict[str, RepoTask] = {}
45
+
46
+
47
+ def _reg(task: RepoTask) -> RepoTask:
48
+ TASK_BANK[task.task_id] = task
49
+ return task
50
+
51
+
52
+ # ── Task 0: add validate_due_date ────────────────────────────────────────────
53
+
54
+ _reg(RepoTask(
55
+ task_id="t0.validate_due_date",
56
+ repo_name="task_manager",
57
+ description=textwrap.dedent("""\
58
+ Add a function `validate_due_date(due_date) -> bool` to `validators.py`.
59
+
60
+ The function should return True if:
61
+ - due_date is None (no deadline), OR
62
+ - due_date is a datetime.date instance
63
+
64
+ It should return False for any other type (strings, integers, etc.).
65
+ """).strip(),
66
+ test_code=textwrap.dedent("""\
67
+ from datetime import date
68
+ from task_manager.validators import validate_due_date
69
+ assert validate_due_date(None) is True, "None is valid (no deadline)"
70
+ assert validate_due_date(date(2025, 1, 1)) is True, "date object is valid"
71
+ assert validate_due_date("2025-01-01") is False, "string is not valid"
72
+ assert validate_due_date(20250101) is False, "int is not valid"
73
+ assert validate_due_date([]) is False, "list is not valid"
74
+ """).strip(),
75
+ max_turns=12,
76
+ hints=[
77
+ "Look in validators.py to see the style of existing validators.",
78
+ "The function signature should be: def validate_due_date(due_date) -> bool",
79
+ "Import datetime.date inside the function or at the top of validators.py.",
80
+ ],
81
+ ))
82
+
83
+ # ── Task 1: add Task.is_overdue ───────────────────────────────────────────────
84
+
85
+ _reg(RepoTask(
86
+ task_id="t1.is_overdue",
87
+ repo_name="task_manager",
88
+ description=textwrap.dedent("""\
89
+ Add a method `is_overdue(self, today: date) -> bool` to the `Task`
90
+ class in `models.py`.
91
+
92
+ The method should return True if:
93
+ - the task has a due_date AND
94
+ - today is strictly after the due_date AND
95
+ - the task is not yet done
96
+
97
+ It should return False if there is no due_date, or if the task is done,
98
+ or if today <= due_date.
99
+ """).strip(),
100
+ test_code=textwrap.dedent("""\
101
+ from datetime import date
102
+ from task_manager.models import Task
103
+
104
+ t_past = Task("x", "low", [], due_date=date(2020, 1, 1))
105
+ t_future = Task("y", "low", [], due_date=date(2099, 1, 1))
106
+ t_none = Task("z", "low", [], due_date=None)
107
+ t_done = Task("d", "low", [], due_date=date(2020, 1, 1))
108
+ t_done.complete()
109
+
110
+ today = date.today()
111
+ assert t_past.is_overdue(today) is True, "past due date → overdue"
112
+ assert t_future.is_overdue(today) is False, "future due date → not overdue"
113
+ assert t_none.is_overdue(today) is False, "no due date → not overdue"
114
+ assert t_done.is_overdue(today) is False, "done task → not overdue"
115
+ """).strip(),
116
+ max_turns=15,
117
+ difficulty=1,
118
+ hints=[
119
+ "The Task class is in models.py.",
120
+ "The method should check self.due_date, today, and self.done.",
121
+ ],
122
+ ))
123
+
124
+ # ── Task 2: add TaskStore.find_by_tag ─────────────────────────────────────────
125
+
126
+ _reg(RepoTask(
127
+ task_id="t2.find_by_tag",
128
+ repo_name="task_manager",
129
+ description=textwrap.dedent("""\
130
+ Add a method `find_by_tag(self, tag: str) -> list[Task]` to the
131
+ `TaskStore` class in `storage.py`.
132
+
133
+ The method should return a list of all tasks that have `tag` in
134
+ their `tags` list. Return an empty list if no tasks match.
135
+ """).strip(),
136
+ test_code=textwrap.dedent("""\
137
+ from task_manager.models import Task
138
+ from task_manager.storage import TaskStore
139
+
140
+ store = TaskStore()
141
+ store.add(Task("t1", "high", ["python", "backend"], None))
142
+ store.add(Task("t2", "low", ["frontend"], None))
143
+ store.add(Task("t3", "medium", ["python"], None))
144
+
145
+ result = store.find_by_tag("python")
146
+ assert len(result) == 2, f"Expected 2, got {len(result)}"
147
+ titles = {t.title for t in result}
148
+ assert titles == {"t1", "t3"}, f"Wrong titles: {titles}"
149
+
150
+ empty = store.find_by_tag("devops")
151
+ assert empty == [], f"Expected [], got {empty}"
152
+ """).strip(),
153
+ max_turns=15,
154
+ difficulty=1,
155
+ ))
156
+
157
+ # ── Task 3 (hard): enforce priority validation in api.create_task ─────────────
158
+
159
+ _reg(RepoTask(
160
+ task_id="t3.enforce_priority",
161
+ repo_name="task_manager",
162
+ description=textwrap.dedent("""\
163
+ Update the `create_task` function in `api.py` so that it validates
164
+ the `priority` argument using `validate_priority` from `validators.py`.
165
+
166
+ If the priority is invalid, raise `ValueError` with a clear message.
167
+ The existing validations for title and tags must still work.
168
+
169
+ Note: `validate_priority` already exists in validators.py.
170
+ You must import and call it inside `create_task`.
171
+ """).strip(),
172
+ test_code=textwrap.dedent("""\
173
+ from task_manager import api as _api
174
+ _api.reset_store() # clean state between runs
175
+
176
+ # valid priority passes through
177
+ t = _api.create_task("Buy milk", priority="high")
178
+ assert t.priority == "high"
179
+
180
+ # invalid priority raises ValueError
181
+ raised = False
182
+ try:
183
+ _api.create_task("Bad task", priority="urgent")
184
+ except ValueError:
185
+ raised = True
186
+ assert raised, "create_task should raise ValueError for invalid priority"
187
+
188
+ # title validation still works
189
+ raised2 = False
190
+ try:
191
+ _api.create_task("", priority="low")
192
+ except ValueError:
193
+ raised2 = True
194
+ assert raised2, "create_task should still reject empty title"
195
+ """).strip(),
196
+ max_turns=18,
197
+ difficulty=2,
198
+ hints=[
199
+ "api.py already imports validate_title and validate_tags from validators.",
200
+ "You need to also import validate_priority and call it in create_task.",
201
+ ],
202
+ ))
203
+
204
+
205
+ # ── Humanize tasks (real-world library) ──────────────────────────────────────
206
+
207
+ _reg(RepoTask(
208
+ task_id="t4.intpercent",
209
+ repo_name="humanize",
210
+ description=textwrap.dedent("""\
211
+ Add a function `intpercent(value: float, decimal_places: int = 1) -> str`
212
+ to `number.py`.
213
+
214
+ The function should convert a fraction to a percentage string:
215
+ 0.0 → "0.0%"
216
+ 0.5 → "50.0%"
217
+ 0.753 → "75.3%"
218
+ 1.0 → "100.0%"
219
+
220
+ Use `decimal_places` to control how many digits appear after the decimal.
221
+ If decimal_places=0, return an integer percentage with no decimal point.
222
+ """).strip(),
223
+ test_code=textwrap.dedent("""\
224
+ from humanize.number import intpercent
225
+ assert intpercent(0.0) == "0.0%", f"got {intpercent(0.0)!r}"
226
+ assert intpercent(0.5) == "50.0%", f"got {intpercent(0.5)!r}"
227
+ assert intpercent(0.753) == "75.3%", f"got {intpercent(0.753)!r}"
228
+ assert intpercent(1.0) == "100.0%", f"got {intpercent(1.0)!r}"
229
+ assert intpercent(0.5, decimal_places=0) == "50%", f"got {intpercent(0.5, decimal_places=0)!r}"
230
+ """).strip(),
231
+ max_turns=12,
232
+ difficulty=0,
233
+ hints=[
234
+ "Look at number.py — the existing functions show the style to follow.",
235
+ "Use f-string formatting: f'{value * 100:.{decimal_places}f}%'",
236
+ ],
237
+ ))
238
+
239
+ _reg(RepoTask(
240
+ task_id="t5.naturalfilecount",
241
+ repo_name="humanize",
242
+ description=textwrap.dedent("""\
243
+ Add a function `naturalfilecount(n: int) -> str` to `filesize.py`.
244
+
245
+ The function should return a human-readable file count:
246
+ 0 → "no files"
247
+ 1 → "1 file"
248
+ 2 → "2 files"
249
+ 99 → "99 files"
250
+ """).strip(),
251
+ test_code=textwrap.dedent("""\
252
+ from humanize.filesize import naturalfilecount
253
+ assert naturalfilecount(0) == "no files", f"got {naturalfilecount(0)!r}"
254
+ assert naturalfilecount(1) == "1 file", f"got {naturalfilecount(1)!r}"
255
+ assert naturalfilecount(2) == "2 files", f"got {naturalfilecount(2)!r}"
256
+ assert naturalfilecount(99) == "99 files", f"got {naturalfilecount(99)!r}"
257
+ """).strip(),
258
+ max_turns=12,
259
+ difficulty=0,
260
+ hints=[
261
+ "Look at filesize.py — naturalsize is the only function there.",
262
+ "This is a short function: handle n==0, n==1, and n>1 as three cases.",
263
+ ],
264
+ ))
265
+
266
+ _reg(RepoTask(
267
+ task_id="t6.metric",
268
+ repo_name="humanize",
269
+ description=textwrap.dedent("""\
270
+ Add a function `metric(value: float, unit: str = "") -> str` to `number.py`.
271
+
272
+ The function should format a number using SI metric prefixes:
273
+ 1_500_000 → "1.5 M"
274
+ 2_000 → "2.0 k"
275
+ 500 → "500" (no prefix below 1000)
276
+
277
+ Supported prefixes (largest to smallest): T (10¹²), G (10⁹), M (10⁶), k (10³).
278
+ If a unit is provided, append it after the prefix: metric(1500, "Hz") → "1.5 kHz".
279
+ Always format the scaled number to 1 decimal place.
280
+ """).strip(),
281
+ test_code=textwrap.dedent("""\
282
+ from humanize.number import metric
283
+ assert metric(1_500_000) == "1.5 M", f"got {metric(1_500_000)!r}"
284
+ assert metric(2_000) == "2.0 k", f"got {metric(2_000)!r}"
285
+ assert metric(500) == "500", f"got {metric(500)!r}"
286
+ assert metric(1_500, "Hz") == "1.5 kHz", f"got {metric(1_500, 'Hz')!r}"
287
+ assert metric(2e9, "W") == "2.0 GW", f"got {metric(2e9, 'W')!r}"
288
+ """).strip(),
289
+ max_turns=15,
290
+ difficulty=1,
291
+ hints=[
292
+ "Loop through prefixes from largest to smallest: (1e12,'T'), (1e9,'G'), (1e6,'M'), (1e3,'k').",
293
+ "If abs(value) >= threshold, scale and format; otherwise return str(int(value)).",
294
+ ],
295
+ ))
296
+
297
+ _reg(RepoTask(
298
+ task_id="t7.age",
299
+ repo_name="humanize",
300
+ description=textwrap.dedent("""\
301
+ Add a function `age(birth_date) -> str` to `time.py`.
302
+
303
+ The function receives a `datetime.date` and returns a human-readable age:
304
+ - If the person is under 1 year old, return "X months old" (use 30-day months).
305
+ - If exactly 1 year, return "1 year old".
306
+ - Otherwise return "X years old".
307
+
308
+ Use `datetime.date.today()` as the reference point.
309
+ Assume birth_date is always a valid date in the past.
310
+ """).strip(),
311
+ test_code=textwrap.dedent("""\
312
+ import datetime as dt
313
+ from humanize.time import age
314
+
315
+ today = dt.date.today()
316
+ dob_25y = today.replace(year=today.year - 25)
317
+ dob_1y = today.replace(year=today.year - 1)
318
+ dob_6m = today - dt.timedelta(days=182)
319
+ dob_2m = today - dt.timedelta(days=61)
320
+
321
+ assert age(dob_25y) == "25 years old", f"got {age(dob_25y)!r}"
322
+ assert age(dob_1y) == "1 year old", f"got {age(dob_1y)!r}"
323
+ assert age(dob_6m) == "6 months old", f"got {age(dob_6m)!r}"
324
+ assert age(dob_2m) == "2 months old", f"got {age(dob_2m)!r}"
325
+ """).strip(),
326
+ max_turns=15,
327
+ difficulty=1,
328
+ hints=[
329
+ "import datetime as dt is already at the top of time.py.",
330
+ "days = (dt.date.today() - birth_date).days; years = days // 365; months = days // 30",
331
+ ],
332
+ ))
333
+
334
+
335
+ # ── test runner ───────────────────────────────────────────────────────────────
336
+
337
+ def run_tests(task: RepoTask) -> tuple[bool, str]:
338
+ """Execute task.test_code and return (passed, message)."""
339
+ # Reload all task_manager modules to pick up any source-level changes
340
+ _reload_task_manager()
341
+ try:
342
+ exec(compile(task.test_code, "<test>", "exec"), {}) # noqa: S102
343
+ return True, "All assertions passed."
344
+ except AssertionError as exc:
345
+ return False, f"AssertionError: {exc}"
346
+ except Exception:
347
+ return False, traceback.format_exc(limit=5)
348
+
349
+
350
+ def _reload_task_manager() -> None:
351
+ """Force-reload all task_manager submodules so edits take effect."""
352
+ prefix = "graphforge.sample_repos.task_manager"
353
+ to_reload = [k for k in sys.modules if k.startswith(prefix)]
354
+ for mod_name in to_reload:
355
+ del sys.modules[mod_name]
356
+
357
+
358
+ def all_task_ids() -> list[str]:
359
+ return list(TASK_BANK.keys())
360
+
361
+
362
+ def get_task(task_id: str) -> RepoTask | None:
363
+ return TASK_BANK.get(task_id)
graphforge/__init__.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """GraphForge — graph-first code generation environment for long-horizon RL.
2
+
3
+ The agent constructs Python programs by mutating a typed function-call graph;
4
+ source files are a deterministic projection of the canonical graph.
5
+
6
+ Top-level subsystems:
7
+ graph canonical graph schema (Modules, Nodes, Edges)
8
+ actions eleven-action surface, atomic dispatcher with rollback
9
+ types signature parser + edge type-flow validator
10
+ templates ~25-template body library, parameterized
11
+ materializer graph -> Python source
12
+ parser Python source -> graph (round-trip)
13
+ validator parse / import / mypy --strict gate
14
+ behavioral hypothesis-based property test runner
15
+ constraints per-kind constraint checker dispatch
16
+ reward reward engine (per-turn + terminal)
17
+ tasks task bank + variant generator
18
+ server FastAPI OpenEnv server
19
+ training GRPO multi-turn rollout
20
+
21
+ See README.md for design rationale and PROPOSAL.md for the full spec.
22
+ """
23
+
24
+ __version__ = "0.0.1"
graphforge/actions/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Action surface for GraphForge.
2
+
3
+ Public API:
4
+
5
+ from graphforge.actions import dispatch, ActionResult
6
+ from graphforge.actions.schema import Action, AddNode, ...
7
+ from graphforge.actions.errors import ActionError
8
+
9
+ See PROPOSAL.md §4 for the full action vocabulary.
10
+ """
11
+
12
+ from graphforge.actions.dispatcher import ActionResult, dispatch
13
+ from graphforge.actions.errors import ActionError
14
+
15
+ __all__ = ["ActionError", "ActionResult", "dispatch"]
graphforge/actions/dispatcher.py ADDED
@@ -0,0 +1,442 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Atomic action dispatcher.
2
+
3
+ Applies an :class:`Action` to a :class:`Graph`. Every mutation is atomic:
4
+ the dispatcher snapshots the graph before the handler runs and restores it on
5
+ any failure. Failures surface as :class:`ActionError` with a stable code, never
6
+ as silent partial state.
7
+
8
+ Information actions (query_*, materialize_*, run_*) are routed but their
9
+ implementations live in their respective subsystems and are stubbed for now.
10
+ ``submit`` returns a sentinel so the episode runner can recognize termination.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+ from typing import Any
17
+
18
+ from graphforge.actions import errors as E
19
+ from graphforge.actions.schema import (
20
+ Action,
21
+ AddEdge,
22
+ AddModule,
23
+ AddNode,
24
+ AttachBody,
25
+ MaterializeAndValidate,
26
+ QuerySpec,
27
+ QuerySubgraph,
28
+ QueryTypes,
29
+ RemoveEdge,
30
+ RemoveModule,
31
+ RemoveNode,
32
+ RunBehavioralTests,
33
+ SetNodeModule,
34
+ Submit,
35
+ )
36
+ from graphforge.actions.signature import parse_signature
37
+ from graphforge.graph.schema import (
38
+ ArgMapping,
39
+ Edge,
40
+ Graph,
41
+ Module,
42
+ Node,
43
+ )
44
+ from graphforge.templates import get_template, validate_args
45
+
46
+
47
+ # ---- result envelope -------------------------------------------------
48
+
49
+
50
+ @dataclass
51
+ class ActionResult:
52
+ """Envelope returned by :func:`dispatch`."""
53
+
54
+ ok: bool
55
+ payload: dict[str, Any]
56
+ terminal: bool = False
57
+
58
+ @classmethod
59
+ def success(cls, **payload: Any) -> "ActionResult":
60
+ return cls(ok=True, payload=payload, terminal=False)
61
+
62
+ @classmethod
63
+ def failure(cls, err: E.ActionError) -> "ActionResult":
64
+ return cls(ok=False, payload=err.to_dict(), terminal=False)
65
+
66
+ @classmethod
67
+ def terminate(cls, **payload: Any) -> "ActionResult":
68
+ return cls(ok=True, payload=payload, terminal=True)
69
+
70
+
71
+ # ---- dispatcher ------------------------------------------------------
72
+
73
+
74
+ def dispatch(graph: Graph, action: Action) -> ActionResult:
75
+ """Apply ``action`` to ``graph`` in place. Atomic on failure.
76
+
77
+ On any handler exception (including :class:`ActionError`) the graph is
78
+ rolled back to the pre-call snapshot.
79
+ """
80
+ snap = graph.snapshot()
81
+ try:
82
+ return _route(graph, action)
83
+ except E.ActionError as err:
84
+ _restore(graph, snap)
85
+ return ActionResult.failure(err)
86
+ except Exception as exc: # pragma: no cover — unexpected handler bug
87
+ _restore(graph, snap)
88
+ return ActionResult.failure(
89
+ E.ActionError(E.SCHEMA_REJECTION, f"unhandled: {exc}")
90
+ )
91
+
92
+
93
+ def _restore(graph: Graph, snap: Graph) -> None:
94
+ graph.modules = snap.modules
95
+ graph.nodes = snap.nodes
96
+ graph.edges = snap.edges
97
+
98
+
99
+ def _route(graph: Graph, action: Action) -> ActionResult:
100
+ # Mutations
101
+ if isinstance(action, AddModule):
102
+ return _h_add_module(graph, action)
103
+ if isinstance(action, RemoveModule):
104
+ return _h_remove_module(graph, action)
105
+ if isinstance(action, AddNode):
106
+ return _h_add_node(graph, action)
107
+ if isinstance(action, RemoveNode):
108
+ return _h_remove_node(graph, action)
109
+ if isinstance(action, SetNodeModule):
110
+ return _h_set_node_module(graph, action)
111
+ if isinstance(action, AttachBody):
112
+ return _h_attach_body(graph, action)
113
+ if isinstance(action, AddEdge):
114
+ return _h_add_edge(graph, action)
115
+ if isinstance(action, RemoveEdge):
116
+ return _h_remove_edge(graph, action)
117
+ # Information (delegated; stubs for now)
118
+ if isinstance(action, QuerySpec):
119
+ return _h_query_spec(graph, action)
120
+ if isinstance(action, QuerySubgraph):
121
+ return _h_query_subgraph(graph, action)
122
+ if isinstance(action, QueryTypes):
123
+ return _h_query_types(graph, action)
124
+ if isinstance(action, MaterializeAndValidate):
125
+ return _h_materialize(graph, action)
126
+ if isinstance(action, RunBehavioralTests):
127
+ return _h_run_tests(graph, action)
128
+ if isinstance(action, Submit):
129
+ return _h_submit(graph, action)
130
+ raise E.ActionError(E.SCHEMA_REJECTION, f"unknown action: {type(action).__name__}")
131
+
132
+
133
+ # ---- mutation handlers ----------------------------------------------
134
+
135
+
136
+ def _h_add_module(graph: Graph, a: AddModule) -> ActionResult:
137
+ if graph.find_module(a.name) is not None:
138
+ raise E.ActionError(
139
+ E.NAME_COLLISION, f"module {a.name!r} already exists", name=a.name
140
+ )
141
+ graph.modules.append(Module(name=a.name, responsibility=a.responsibility))
142
+ return ActionResult.success(added_module=a.name)
143
+
144
+
145
+ def _h_remove_module(graph: Graph, a: RemoveModule) -> ActionResult:
146
+ mod = graph.find_module(a.name)
147
+ if mod is None:
148
+ raise E.ActionError(E.UNKNOWN_MODULE, f"module {a.name!r} does not exist", name=a.name)
149
+ if any(n.module == a.name for n in graph.nodes):
150
+ raise E.ActionError(
151
+ E.MODULE_NOT_EMPTY,
152
+ f"module {a.name!r} still contains nodes",
153
+ name=a.name,
154
+ node_count=sum(1 for n in graph.nodes if n.module == a.name),
155
+ )
156
+ graph.modules = [m for m in graph.modules if m.name != a.name]
157
+ return ActionResult.success(removed_module=a.name)
158
+
159
+
160
+ def _h_add_node(graph: Graph, a: AddNode) -> ActionResult:
161
+ if graph.find_module(a.module) is None:
162
+ raise E.ActionError(E.UNKNOWN_MODULE, f"module {a.module!r} does not exist", name=a.module)
163
+ if graph.find_node(a.name, a.module) is not None:
164
+ raise E.ActionError(
165
+ E.NAME_COLLISION,
166
+ f"node {a.module}.{a.name} already exists",
167
+ name=a.name,
168
+ module=a.module,
169
+ )
170
+ # Surface signature parse — catches errors that the pydantic regex misses.
171
+ try:
172
+ parse_signature(a.signature)
173
+ except ValueError as ve:
174
+ raise E.ActionError(E.SCHEMA_REJECTION, str(ve), signature=a.signature) from ve
175
+ decl_order = max((n.decl_order for n in graph.nodes), default=-1) + 1
176
+ graph.nodes.append(
177
+ Node(
178
+ name=a.name,
179
+ module=a.module,
180
+ signature=a.signature,
181
+ purity=a.purity,
182
+ error_policy=a.error_policy,
183
+ decl_order=decl_order,
184
+ )
185
+ )
186
+ return ActionResult.success(added_node=f"{a.module}.{a.name}", decl_order=decl_order)
187
+
188
+
189
+ def _h_remove_node(graph: Graph, a: RemoveNode) -> ActionResult:
190
+ n = graph.find_node(a.name, a.module)
191
+ if n is None:
192
+ raise E.ActionError(
193
+ E.UNKNOWN_NODE, f"node {a.module}.{a.name} does not exist", name=a.name, module=a.module
194
+ )
195
+ qn = n.qualified_name
196
+ refs = [e for e in graph.edges if e.caller == qn or e.callee == qn]
197
+ if refs:
198
+ raise E.ActionError(
199
+ E.NODE_HAS_REFERENCES,
200
+ f"node {qn} is referenced by {len(refs)} edge(s)",
201
+ name=a.name,
202
+ module=a.module,
203
+ referencing_edges=[(e.caller, e.callee) for e in refs],
204
+ )
205
+ graph.nodes = [m for m in graph.nodes if not (m.name == a.name and m.module == a.module)]
206
+ return ActionResult.success(removed_node=qn)
207
+
208
+
209
+ def _h_set_node_module(graph: Graph, a: SetNodeModule) -> ActionResult:
210
+ n = graph.find_node(a.name, a.current_module)
211
+ if n is None:
212
+ raise E.ActionError(
213
+ E.UNKNOWN_NODE,
214
+ f"node {a.current_module}.{a.name} does not exist",
215
+ name=a.name,
216
+ module=a.current_module,
217
+ )
218
+ new_mod = graph.find_module(a.new_module)
219
+ if new_mod is None:
220
+ raise E.ActionError(
221
+ E.UNKNOWN_MODULE,
222
+ f"target module {a.new_module!r} does not exist",
223
+ name=a.new_module,
224
+ )
225
+ if graph.find_node(a.name, a.new_module) is not None:
226
+ raise E.ActionError(
227
+ E.NAME_COLLISION,
228
+ f"node named {a.name!r} already exists in {a.new_module!r}",
229
+ name=a.name,
230
+ module=a.new_module,
231
+ )
232
+ old_qn = n.qualified_name
233
+ new_qn = f"{a.new_module}.{a.name}"
234
+ n.module = a.new_module
235
+ # Rewrite edge endpoints that referred to the old qualified name.
236
+ for e in graph.edges:
237
+ if e.caller == old_qn:
238
+ e.caller = new_qn
239
+ if e.callee == old_qn:
240
+ e.callee = new_qn
241
+ # Post-condition: rewriting must not have introduced an import cycle.
242
+ if graph.has_module_cycle():
243
+ raise E.ActionError(
244
+ E.WOULD_CREATE_CYCLE,
245
+ f"moving {old_qn} -> {new_qn} would create an import cycle",
246
+ from_qn=old_qn,
247
+ to_qn=new_qn,
248
+ )
249
+ return ActionResult.success(moved_node={"from": old_qn, "to": new_qn})
250
+
251
+
252
+ def _h_attach_body(graph: Graph, a: AttachBody) -> ActionResult:
253
+ n = graph.find_node(a.name, a.module)
254
+ if n is None:
255
+ raise E.ActionError(
256
+ E.UNKNOWN_NODE,
257
+ f"node {a.module}.{a.name} does not exist",
258
+ name=a.name,
259
+ module=a.module,
260
+ )
261
+ spec = get_template(a.template)
262
+ if spec is None:
263
+ raise E.ActionError(
264
+ E.UNKNOWN_TEMPLATE, f"unknown template {a.template!r}", template=a.template
265
+ )
266
+ problems = validate_args(a.template, a.args)
267
+ if problems:
268
+ raise E.ActionError(
269
+ E.TEMPLATE_ARGS_INVALID,
270
+ f"args invalid for template {a.template!r}: {'; '.join(problems)}",
271
+ template=a.template,
272
+ problems=problems,
273
+ )
274
+ out_d = graph.fan_out(n.qualified_name)
275
+ in_d = graph.fan_in(n.qualified_name)
276
+ if not spec.edges_ok(out_d, in_d):
277
+ raise E.ActionError(
278
+ E.TEMPLATE_ARGS_INVALID,
279
+ f"template {a.template!r} requires different edge structure "
280
+ f"(out_d={out_d}, in_d={in_d})",
281
+ template=a.template,
282
+ out_degree=out_d,
283
+ in_degree=in_d,
284
+ )
285
+ n.body_template = a.template
286
+ n.body_template_args = dict(a.args)
287
+ return ActionResult.success(
288
+ attached={"node": n.qualified_name, "template": a.template}
289
+ )
290
+
291
+
292
+ def _h_add_edge(graph: Graph, a: AddEdge) -> ActionResult:
293
+ caller = graph.find_node_qualified(a.caller)
294
+ callee = graph.find_node_qualified(a.callee)
295
+ if caller is None:
296
+ raise E.ActionError(E.UNKNOWN_NODE, f"caller {a.caller!r} does not exist", node=a.caller)
297
+ if callee is None:
298
+ raise E.ActionError(E.UNKNOWN_NODE, f"callee {a.callee!r} does not exist", node=a.callee)
299
+ if graph.find_edge(a.caller, a.callee) is not None:
300
+ raise E.ActionError(
301
+ E.DUPLICATE_EDGE,
302
+ f"edge {a.caller} -> {a.callee} already exists",
303
+ caller=a.caller,
304
+ callee=a.callee,
305
+ )
306
+ # Validate arg_mapping covers all required parameters of callee.
307
+ callee_sig = parse_signature(callee.signature)
308
+ caller_sig = parse_signature(caller.signature)
309
+ mapped_callee = {m.callee_param for m in a.arg_mapping}
310
+ mapped_caller = {m.caller_arg for m in a.arg_mapping}
311
+ missing = set(callee_sig.required_params) - mapped_callee
312
+ if missing:
313
+ raise E.ActionError(
314
+ E.ARG_MAPPING_INVALID,
315
+ f"arg_mapping is missing required callee params: {sorted(missing)}",
316
+ missing=sorted(missing),
317
+ )
318
+ bogus_callee = mapped_callee - set(callee_sig.all_params)
319
+ if bogus_callee:
320
+ raise E.ActionError(
321
+ E.ARG_MAPPING_INVALID,
322
+ f"arg_mapping references unknown callee params: {sorted(bogus_callee)}",
323
+ unknown=sorted(bogus_callee),
324
+ )
325
+ bogus_caller = mapped_caller - set(caller_sig.all_params)
326
+ if bogus_caller:
327
+ raise E.ActionError(
328
+ E.ARG_MAPPING_INVALID,
329
+ f"arg_mapping references unknown caller args: {sorted(bogus_caller)}",
330
+ unknown=sorted(bogus_caller),
331
+ )
332
+ # Add tentatively; check post-condition.
333
+ graph.edges.append(
334
+ Edge(
335
+ caller=a.caller,
336
+ callee=a.callee,
337
+ arg_mapping=[ArgMapping(**m.model_dump()) for m in a.arg_mapping],
338
+ )
339
+ )
340
+ if graph.has_module_cycle():
341
+ raise E.ActionError(
342
+ E.WOULD_CREATE_CYCLE,
343
+ f"adding edge {a.caller} -> {a.callee} would create an import cycle",
344
+ caller=a.caller,
345
+ callee=a.callee,
346
+ )
347
+ return ActionResult.success(added_edge={"caller": a.caller, "callee": a.callee})
348
+
349
+
350
+ def _h_remove_edge(graph: Graph, a: RemoveEdge) -> ActionResult:
351
+ e = graph.find_edge(a.caller, a.callee)
352
+ if e is None:
353
+ raise E.ActionError(
354
+ E.UNKNOWN_EDGE,
355
+ f"edge {a.caller} -> {a.callee} does not exist",
356
+ caller=a.caller,
357
+ callee=a.callee,
358
+ )
359
+ graph.edges = [
360
+ x for x in graph.edges if not (x.caller == a.caller and x.callee == a.callee)
361
+ ]
362
+ return ActionResult.success(removed_edge={"caller": a.caller, "callee": a.callee})
363
+
364
+
365
+ # ---- info / terminal handlers (stubs) -------------------------------
366
+
367
+
368
+ def _h_query_spec(graph: Graph, a: QuerySpec) -> ActionResult:
369
+ # TODO: route to graphforge.constraints once tasks/specs are wired in.
370
+ return ActionResult.success(
371
+ not_implemented="query_spec routed via dispatcher; constraint engine TODO",
372
+ constraint_kind=a.constraint_kind,
373
+ )
374
+
375
+
376
+ def _h_query_subgraph(graph: Graph, a: QuerySubgraph) -> ActionResult:
377
+ scope = a.scope
378
+ if scope.startswith("module:"):
379
+ mod = scope[len("module:") :]
380
+ nodes = [n.model_dump() for n in graph.nodes_in_module(mod)]
381
+ edges = [
382
+ e.model_dump()
383
+ for e in graph.edges
384
+ if e.caller.split(".")[0] == mod and e.callee.split(".")[0] == mod
385
+ ]
386
+ return ActionResult.success(scope=scope, nodes=nodes, edges=edges)
387
+ if scope.startswith("neighbors:"):
388
+ qn = scope[len("neighbors:") :]
389
+ return ActionResult.success(
390
+ scope=scope,
391
+ callers=graph.callers_of(qn),
392
+ callees=graph.callees_of(qn),
393
+ )
394
+ if scope.startswith("path:"):
395
+ # TODO: shortest-path search over call graph.
396
+ return ActionResult.success(
397
+ scope=scope, not_implemented="path search TODO"
398
+ )
399
+ raise E.ActionError(E.SCHEMA_REJECTION, f"unrecognized subgraph scope {scope!r}")
400
+
401
+
402
+ def _h_query_types(graph: Graph, a: QueryTypes) -> ActionResult:
403
+ # TODO: delegate to graphforge.types.
404
+ return ActionResult.success(
405
+ scope=a.scope, not_implemented="type engine TODO"
406
+ )
407
+
408
+
409
+ def _h_materialize(graph: Graph, a: MaterializeAndValidate) -> ActionResult:
410
+ """Project the graph to source and run the parse-only validator gate.
411
+
412
+ Heavier validation gates (mypy --strict, import-resolution, behavioral
413
+ tests) are added to this action's report as their subsystems land.
414
+ """
415
+ from graphforge.materializer import materialize as _materialize
416
+ from graphforge.validator import full_check
417
+
418
+ try:
419
+ files = _materialize(graph)
420
+ except ValueError as ve:
421
+ # Codegen rejected the graph (e.g. unknown pattern, template/edge
422
+ # structure mismatch missed by the dispatcher's preconditions).
423
+ raise E.ActionError(
424
+ E.SCHEMA_REJECTION, f"materialization failed: {ve}"
425
+ ) from ve
426
+ report = full_check(files)
427
+ return ActionResult.success(
428
+ files=list(files.keys()),
429
+ bytes_total=sum(len(s) for s in files.values()),
430
+ report=report.to_dict(),
431
+ )
432
+
433
+
434
+ def _h_run_tests(graph: Graph, a: RunBehavioralTests) -> ActionResult:
435
+ # TODO: delegate to graphforge.behavioral.
436
+ raise E.ActionError(
437
+ E.SCHEMA_REJECTION, "run_behavioral_tests is not yet implemented"
438
+ )
439
+
440
+
441
+ def _h_submit(graph: Graph, a: Submit) -> ActionResult:
442
+ return ActionResult.terminate(submitted=True)
graphforge/actions/errors.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Structured action errors.
2
+
3
+ Every failure mode in the action dispatcher surfaces as an :class:`ActionError`
4
+ with a stable ``code`` so the agent can be trained against deterministic error
5
+ strings (see PROPOSAL.md §4.4 — "failures return structured errors describing
6
+ the cause"). Codes are kept short and stable across versions.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any
12
+
13
+
14
+ class ActionError(Exception):
15
+ """Raised by action handlers; caught and reported by the dispatcher."""
16
+
17
+ def __init__(self, code: str, message: str, **details: Any) -> None:
18
+ super().__init__(f"[{code}] {message}")
19
+ self.code = code
20
+ self.message = message
21
+ self.details = details
22
+
23
+ def to_dict(self) -> dict[str, Any]:
24
+ return {"error": self.code, "message": self.message, **self.details}
25
+
26
+
27
+ # ---- canonical codes -------------------------------------------------
28
+ # Schema layer
29
+ SCHEMA_REJECTION = "schema_rejection"
30
+ # Pre-condition layer
31
+ UNKNOWN_MODULE = "unknown_module"
32
+ UNKNOWN_NODE = "unknown_node"
33
+ UNKNOWN_EDGE = "unknown_edge"
34
+ NAME_COLLISION = "name_collision"
35
+ MODULE_NOT_EMPTY = "module_not_empty"
36
+ NODE_HAS_REFERENCES = "node_has_references"
37
+ DUPLICATE_EDGE = "duplicate_edge"
38
+ UNKNOWN_TEMPLATE = "unknown_template"
39
+ TEMPLATE_ARGS_INVALID = "template_args_invalid"
40
+ RESPONSIBILITY_MISMATCH = "responsibility_mismatch"
41
+ ARG_MAPPING_INVALID = "arg_mapping_invalid"
42
+ # Post-condition layer
43
+ WOULD_CREATE_CYCLE = "would_create_cycle"
44
+ TYPE_MISMATCH = "type_mismatch"
graphforge/actions/schema.py ADDED
@@ -0,0 +1,180 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Action message schemas.
2
+
3
+ These are the wire shapes accepted by the dispatcher. Every action is a
4
+ discriminated-union member keyed on ``kind``.
5
+
6
+ The action vocabulary mirrors PROPOSAL.md §4. Total surface:
7
+
8
+ Graph mutations
9
+ add_module, remove_module
10
+ add_node, remove_node, set_node_module, attach_body
11
+ add_edge, remove_edge
12
+ Information
13
+ query_spec, query_subgraph, query_types,
14
+ materialize_and_validate, run_behavioral_tests
15
+ Terminal
16
+ submit
17
+
18
+ Note: the proposal abstract states "eleven actions"; the section-4 listing
19
+ contains fourteen. We implement the section-4 set; the abstract count will
20
+ be corrected in the next revision of PROPOSAL.md.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ from typing import Annotated, Literal, Optional, Union
26
+
27
+ from pydantic import BaseModel, ConfigDict, Field
28
+
29
+ from graphforge.graph.schema import ArgMapping, ErrorPolicy, Purity, ResponsibilityTag
30
+
31
+
32
+ # Common config: forbid unknown fields, fail loudly on schema drift.
33
+ _cfg = ConfigDict(extra="forbid")
34
+
35
+
36
+ # ---- mutations -------------------------------------------------------
37
+
38
+
39
+ class AddModule(BaseModel):
40
+ model_config = _cfg
41
+ kind: Literal["add_module"] = "add_module"
42
+ name: str
43
+ responsibility: ResponsibilityTag
44
+
45
+
46
+ class RemoveModule(BaseModel):
47
+ model_config = _cfg
48
+ kind: Literal["remove_module"] = "remove_module"
49
+ name: str
50
+
51
+
52
+ class AddNode(BaseModel):
53
+ model_config = _cfg
54
+ kind: Literal["add_node"] = "add_node"
55
+ name: str
56
+ module: str
57
+ signature: str
58
+ purity: Purity = "impure"
59
+ error_policy: ErrorPolicy = "none"
60
+
61
+
62
+ class RemoveNode(BaseModel):
63
+ model_config = _cfg
64
+ kind: Literal["remove_node"] = "remove_node"
65
+ name: str
66
+ module: str
67
+
68
+
69
+ class SetNodeModule(BaseModel):
70
+ model_config = _cfg
71
+ kind: Literal["set_node_module"] = "set_node_module"
72
+ name: str
73
+ current_module: str
74
+ new_module: str
75
+
76
+
77
+ class AttachBody(BaseModel):
78
+ model_config = _cfg
79
+ kind: Literal["attach_body"] = "attach_body"
80
+ name: str
81
+ module: str
82
+ template: str
83
+ args: dict[str, object] = Field(default_factory=dict)
84
+
85
+
86
+ class AddEdge(BaseModel):
87
+ model_config = _cfg
88
+ kind: Literal["add_edge"] = "add_edge"
89
+ caller: str
90
+ callee: str
91
+ arg_mapping: list[ArgMapping] = Field(default_factory=list)
92
+
93
+
94
+ class RemoveEdge(BaseModel):
95
+ model_config = _cfg
96
+ kind: Literal["remove_edge"] = "remove_edge"
97
+ caller: str
98
+ callee: str
99
+
100
+
101
+ # ---- information actions --------------------------------------------
102
+
103
+
104
+ class QuerySpec(BaseModel):
105
+ model_config = _cfg
106
+ kind: Literal["query_spec"] = "query_spec"
107
+ constraint_kind: Optional[str] = None
108
+
109
+
110
+ class QuerySubgraph(BaseModel):
111
+ model_config = _cfg
112
+ kind: Literal["query_subgraph"] = "query_subgraph"
113
+ scope: str # "module:<name>" | "neighbors:<qualified>" | "path:<from>:<to>"
114
+
115
+
116
+ class QueryTypes(BaseModel):
117
+ model_config = _cfg
118
+ kind: Literal["query_types"] = "query_types"
119
+ scope: str # "all" | "module:<name>" | "node:<qualified>"
120
+
121
+
122
+ class MaterializeAndValidate(BaseModel):
123
+ model_config = _cfg
124
+ kind: Literal["materialize_and_validate"] = "materialize_and_validate"
125
+
126
+
127
+ class RunBehavioralTests(BaseModel):
128
+ model_config = _cfg
129
+ kind: Literal["run_behavioral_tests"] = "run_behavioral_tests"
130
+ materialized: bool = True
131
+
132
+
133
+ # ---- terminal --------------------------------------------------------
134
+
135
+
136
+ class Submit(BaseModel):
137
+ model_config = _cfg
138
+ kind: Literal["submit"] = "submit"
139
+
140
+
141
+ # ---- discriminated union --------------------------------------------
142
+
143
+ Action = Annotated[
144
+ Union[
145
+ AddModule,
146
+ RemoveModule,
147
+ AddNode,
148
+ RemoveNode,
149
+ SetNodeModule,
150
+ AttachBody,
151
+ AddEdge,
152
+ RemoveEdge,
153
+ QuerySpec,
154
+ QuerySubgraph,
155
+ QueryTypes,
156
+ MaterializeAndValidate,
157
+ RunBehavioralTests,
158
+ Submit,
159
+ ],
160
+ Field(discriminator="kind"),
161
+ ]
162
+
163
+
164
+ __all__ = [
165
+ "Action",
166
+ "AddModule",
167
+ "RemoveModule",
168
+ "AddNode",
169
+ "RemoveNode",
170
+ "SetNodeModule",
171
+ "AttachBody",
172
+ "AddEdge",
173
+ "RemoveEdge",
174
+ "QuerySpec",
175
+ "QuerySubgraph",
176
+ "QueryTypes",
177
+ "MaterializeAndValidate",
178
+ "RunBehavioralTests",
179
+ "Submit",
180
+ ]
graphforge/actions/signature.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cheap signature parser.
2
+
3
+ Used by the dispatcher to validate ``add_edge`` arg-mappings against the
4
+ callee's parameter list. Real type flow validation (caller_arg type vs
5
+ callee_param type) is the type engine; this module only extracts parameter
6
+ *names* from a signature string of the form::
7
+
8
+ (a: int, b: str = "x", *, c: bool) -> bool
9
+
10
+ Annotations are tolerated as opaque text. Defaults are tolerated and treated
11
+ as making the parameter optional.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ from dataclasses import dataclass
18
+
19
+
20
+ @dataclass(frozen=True)
21
+ class Parameter:
22
+ name: str
23
+ annotation: str | None
24
+ has_default: bool
25
+
26
+
27
+ @dataclass(frozen=True)
28
+ class ParsedSignature:
29
+ parameters: list[Parameter]
30
+ return_annotation: str
31
+
32
+ @property
33
+ def required_params(self) -> list[str]:
34
+ return [p.name for p in self.parameters if not p.has_default]
35
+
36
+ @property
37
+ def all_params(self) -> list[str]:
38
+ return [p.name for p in self.parameters]
39
+
40
+
41
+ _SIG_RE = re.compile(r"^\s*\((?P<params>.*)\)\s*->\s*(?P<ret>.+?)\s*$", re.DOTALL)
42
+
43
+
44
+ def parse_signature(sig: str) -> ParsedSignature:
45
+ """Parse a function signature string. Lenient — caller validates more deeply.
46
+
47
+ Raises ``ValueError`` on signatures that fail surface checks. The schema
48
+ layer (Node validator) already requires ``(`` and ``->``; this is the
49
+ secondary parse used at dispatch time.
50
+ """
51
+ m = _SIG_RE.match(sig)
52
+ if not m:
53
+ raise ValueError(f"could not parse signature: {sig!r}")
54
+ raw_params = m.group("params").strip()
55
+ ret = m.group("ret").strip()
56
+
57
+ params: list[Parameter] = []
58
+ if raw_params:
59
+ for piece in _split_top_level(raw_params, ","):
60
+ piece = piece.strip()
61
+ if not piece or piece in {"*", "/"}:
62
+ continue
63
+ if piece.startswith("**"):
64
+ piece = piece[2:].lstrip()
65
+ elif piece.startswith("*"):
66
+ piece = piece[1:].lstrip()
67
+ has_default = False
68
+ if "=" in piece:
69
+ # split off default at top-level '=' (ignore ones inside [..]).
70
+ head, default = _split_default(piece)
71
+ piece = head.strip()
72
+ has_default = default is not None
73
+ name = piece
74
+ annotation: str | None = None
75
+ if ":" in piece:
76
+ name, annotation = piece.split(":", 1)
77
+ name = name.strip()
78
+ annotation = annotation.strip()
79
+ if not name.isidentifier():
80
+ raise ValueError(f"unparseable parameter {piece!r} in {sig!r}")
81
+ params.append(Parameter(name=name, annotation=annotation, has_default=has_default))
82
+
83
+ return ParsedSignature(parameters=params, return_annotation=ret)
84
+
85
+
86
+ def _split_top_level(s: str, sep: str) -> list[str]:
87
+ """Split ``s`` on ``sep`` at bracket-depth 0."""
88
+ out: list[str] = []
89
+ depth = 0
90
+ buf: list[str] = []
91
+ for ch in s:
92
+ if ch in "([{":
93
+ depth += 1
94
+ elif ch in ")]}":
95
+ depth -= 1
96
+ if ch == sep and depth == 0:
97
+ out.append("".join(buf))
98
+ buf = []
99
+ else:
100
+ buf.append(ch)
101
+ if buf:
102
+ out.append("".join(buf))
103
+ return out
104
+
105
+
106
+ def _split_default(piece: str) -> tuple[str, str | None]:
107
+ """Split off ``= default`` at bracket-depth 0. Returns (head, default | None)."""
108
+ depth = 0
109
+ for i, ch in enumerate(piece):
110
+ if ch in "([{":
111
+ depth += 1
112
+ elif ch in ")]}":
113
+ depth -= 1
114
+ elif ch == "=" and depth == 0:
115
+ return piece[:i], piece[i + 1 :]
116
+ return piece, None
graphforge/behavioral/__init__.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Behavioral test runner.
2
+
3
+ Responsibilities (PROPOSAL.md §2.1, §6.2):
4
+
5
+ * Run a property-based test suite (hypothesis) against materialized code,
6
+ in a sandboxed subprocess with timeout + memory limit.
7
+ * Tests are part of the task definition; their bodies are *hidden* from
8
+ the agent. The agent sees only test names and pass/fail at submission.
9
+ * Distinguish failures (assertion) from errors (timeout, crash) — both
10
+ count as test failures, but they're surfaced separately for diagnostics.
11
+
12
+ Public surface (TODO):
13
+
14
+ run_tests(files, tests, timeout=12.0) -> dict[str, TestResult]
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+
20
+ def run_tests( # pragma: no cover — TODO
21
+ files: dict[str, str],
22
+ tests: list[object],
23
+ timeout: float = 12.0,
24
+ ) -> dict[str, object]:
25
+ raise NotImplementedError("behavioral runner TODO — see PROPOSAL.md §6.2")
graphforge/constraints/__init__.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constraint vocabulary and dispatch.
2
+
3
+ Three families (PROPOSAL.md §2.2):
4
+
5
+ * Structural — node_exists, edge_exists, module_count, acyclic_imports,
6
+ fan_in_max, fan_out_max, dag_depth_max, internal_only, …
7
+ * Type / signature — signature_matches, return_type, arg_type,
8
+ type_consistency, no_any_types, pure_function (TODO)
9
+ * Behavioral / materialization — materializes, imports_resolve,
10
+ type_checks, behavioral_test_passes, error_handling_present|absent
11
+
12
+ Currently shipped: tier-0 subset of structural + ``materializes``. Additional
13
+ kinds land as new discriminated members in :mod:`schema` and matching
14
+ ``_check_*`` functions in :mod:`checker`.
15
+ """
16
+
17
+ from graphforge.constraints.checker import (
18
+ SatisfactionReport,
19
+ check,
20
+ evaluate_all,
21
+ )
22
+ from graphforge.constraints.schema import (
23
+ AcyclicImports,
24
+ Constraint,
25
+ EdgeExists,
26
+ Materializes,
27
+ ModuleCount,
28
+ ModuleResponsibility,
29
+ ModuleSizeMax,
30
+ NodeAbsent,
31
+ NodeExists,
32
+ STRUCTURAL_KINDS,
33
+ )
34
+
35
+ __all__ = [
36
+ "AcyclicImports",
37
+ "Constraint",
38
+ "EdgeExists",
39
+ "Materializes",
40
+ "ModuleCount",
41
+ "ModuleResponsibility",
42
+ "ModuleSizeMax",
43
+ "NodeAbsent",
44
+ "NodeExists",
45
+ "STRUCTURAL_KINDS",
46
+ "SatisfactionReport",
47
+ "check",
48
+ "evaluate_all",
49
+ ]
graphforge/constraints/checker.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constraint checker dispatch.
2
+
3
+ Each constraint kind has a small ``_check_*`` function. ``check`` routes by
4
+ isinstance and ``evaluate_all`` reports which constraints from a list are
5
+ satisfied or not.
6
+
7
+ Behavioral / materialization constraints (currently just ``materializes``)
8
+ delegate to the materializer and validator subsystems.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from dataclasses import dataclass, field
14
+
15
+ from graphforge.constraints.schema import (
16
+ AcyclicImports,
17
+ Constraint,
18
+ EdgeExists,
19
+ Materializes,
20
+ ModuleCount,
21
+ ModuleResponsibility,
22
+ ModuleSizeMax,
23
+ NodeAbsent,
24
+ NodeExists,
25
+ STRUCTURAL_KINDS,
26
+ )
27
+ from graphforge.graph.schema import Graph
28
+
29
+
30
+ @dataclass
31
+ class SatisfactionReport:
32
+ satisfied: list[Constraint] = field(default_factory=list)
33
+ unsatisfied: list[Constraint] = field(default_factory=list)
34
+
35
+ @property
36
+ def total(self) -> int:
37
+ return len(self.satisfied) + len(self.unsatisfied)
38
+
39
+ @property
40
+ def all_satisfied(self) -> bool:
41
+ return self.total > 0 and not self.unsatisfied
42
+
43
+ def split_by_family(self) -> tuple["SatisfactionReport", "SatisfactionReport"]:
44
+ """Split into (structural, behavioral) sub-reports.
45
+
46
+ Useful for the reward engine, which scores the two families with
47
+ different magnitudes per PROPOSAL.md §5.2.
48
+ """
49
+ sr = SatisfactionReport()
50
+ br = SatisfactionReport()
51
+ for c in self.satisfied:
52
+ (sr if c.kind in STRUCTURAL_KINDS else br).satisfied.append(c)
53
+ for c in self.unsatisfied:
54
+ (sr if c.kind in STRUCTURAL_KINDS else br).unsatisfied.append(c)
55
+ return sr, br
56
+
57
+ def to_dict(self) -> dict[str, object]:
58
+ return {
59
+ "satisfied": [c.model_dump() for c in self.satisfied],
60
+ "unsatisfied": [c.model_dump() for c in self.unsatisfied],
61
+ "total": self.total,
62
+ "all_satisfied": self.all_satisfied,
63
+ }
64
+
65
+
66
+ # ---- per-kind checkers ----------------------------------------------
67
+
68
+
69
+ def _check_node_exists(g: Graph, c: NodeExists) -> bool:
70
+ return g.find_node(c.name, c.module) is not None
71
+
72
+
73
+ def _check_node_absent(g: Graph, c: NodeAbsent) -> bool:
74
+ return g.find_node(c.name, c.module) is None
75
+
76
+
77
+ def _check_edge_exists(g: Graph, c: EdgeExists) -> bool:
78
+ return g.find_edge(c.caller, c.callee) is not None
79
+
80
+
81
+ def _check_module_count(g: Graph, c: ModuleCount) -> bool:
82
+ return len(g.modules) == c.n
83
+
84
+
85
+ def _check_module_size_max(g: Graph, c: ModuleSizeMax) -> bool:
86
+ return len(g.nodes_in_module(c.module)) <= c.n
87
+
88
+
89
+ def _check_module_responsibility(g: Graph, c: ModuleResponsibility) -> bool:
90
+ m = g.find_module(c.module)
91
+ return m is not None and m.responsibility == c.responsibility
92
+
93
+
94
+ def _check_acyclic_imports(g: Graph, _c: AcyclicImports) -> bool:
95
+ return not g.has_module_cycle()
96
+
97
+
98
+ def _check_materializes(g: Graph, _c: Materializes) -> bool:
99
+ # Imported lazily so that callers who don't use this checker don't pay
100
+ # the cost of pulling the materializer/validator graph.
101
+ from graphforge.materializer import materialize
102
+ from graphforge.validator import full_check
103
+
104
+ try:
105
+ files = materialize(g)
106
+ except Exception:
107
+ return False
108
+ return full_check(files).ok
109
+
110
+
111
+ # ---- dispatch --------------------------------------------------------
112
+
113
+
114
+ def check(graph: Graph, constraint: Constraint) -> bool:
115
+ if isinstance(constraint, NodeExists):
116
+ return _check_node_exists(graph, constraint)
117
+ if isinstance(constraint, NodeAbsent):
118
+ return _check_node_absent(graph, constraint)
119
+ if isinstance(constraint, EdgeExists):
120
+ return _check_edge_exists(graph, constraint)
121
+ if isinstance(constraint, ModuleCount):
122
+ return _check_module_count(graph, constraint)
123
+ if isinstance(constraint, ModuleSizeMax):
124
+ return _check_module_size_max(graph, constraint)
125
+ if isinstance(constraint, ModuleResponsibility):
126
+ return _check_module_responsibility(graph, constraint)
127
+ if isinstance(constraint, AcyclicImports):
128
+ return _check_acyclic_imports(graph, constraint)
129
+ if isinstance(constraint, Materializes):
130
+ return _check_materializes(graph, constraint)
131
+ raise ValueError(f"unknown constraint kind: {constraint!r}")
132
+
133
+
134
+ def evaluate_all(graph: Graph, constraints: list[Constraint]) -> SatisfactionReport:
135
+ rep = SatisfactionReport()
136
+ for c in constraints:
137
+ if check(graph, c):
138
+ rep.satisfied.append(c)
139
+ else:
140
+ rep.unsatisfied.append(c)
141
+ return rep
graphforge/constraints/schema.py ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Constraint schemas (tier-0 subset).
2
+
3
+ Constraints are pydantic discriminated-union members keyed on ``kind``.
4
+ Tier-0 carves out the smallest set sufficient to express a real task and
5
+ exercise the reward engine end-to-end. The remaining vocabulary in
6
+ PROPOSAL.md §2.2 (fan_in_max, dag_depth_max, type_consistency,
7
+ behavioral_test_passes, …) lands on top of this same shape as new
8
+ discriminated members + checker functions.
9
+
10
+ Each constraint member is a pure data record. Behavior lives in
11
+ :mod:`graphforge.constraints.checker`.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import Annotated, Literal, Union
17
+
18
+ from pydantic import BaseModel, ConfigDict, Field
19
+
20
+ from graphforge.graph.schema import ResponsibilityTag
21
+
22
+ _cfg = ConfigDict(extra="forbid")
23
+
24
+
25
+ # ---- structural ------------------------------------------------------
26
+
27
+
28
+ class NodeExists(BaseModel):
29
+ model_config = _cfg
30
+ kind: Literal["node_exists"] = "node_exists"
31
+ name: str
32
+ module: str
33
+
34
+
35
+ class NodeAbsent(BaseModel):
36
+ model_config = _cfg
37
+ kind: Literal["node_absent"] = "node_absent"
38
+ name: str
39
+ module: str
40
+
41
+
42
+ class EdgeExists(BaseModel):
43
+ model_config = _cfg
44
+ kind: Literal["edge_exists"] = "edge_exists"
45
+ caller: str # qualified
46
+ callee: str # qualified
47
+
48
+
49
+ class ModuleCount(BaseModel):
50
+ model_config = _cfg
51
+ kind: Literal["module_count"] = "module_count"
52
+ n: int = Field(..., ge=0)
53
+
54
+
55
+ class ModuleSizeMax(BaseModel):
56
+ model_config = _cfg
57
+ kind: Literal["module_size_max"] = "module_size_max"
58
+ module: str
59
+ n: int = Field(..., ge=0)
60
+
61
+
62
+ class ModuleResponsibility(BaseModel):
63
+ model_config = _cfg
64
+ kind: Literal["module_responsibility"] = "module_responsibility"
65
+ module: str
66
+ responsibility: ResponsibilityTag
67
+
68
+
69
+ class AcyclicImports(BaseModel):
70
+ model_config = _cfg
71
+ kind: Literal["acyclic_imports"] = "acyclic_imports"
72
+
73
+
74
+ # ---- behavioral / materialization -----------------------------------
75
+
76
+
77
+ class Materializes(BaseModel):
78
+ model_config = _cfg
79
+ kind: Literal["materializes"] = "materializes"
80
+
81
+
82
+ # ---- discriminated union --------------------------------------------
83
+
84
+ Constraint = Annotated[
85
+ Union[
86
+ NodeExists,
87
+ NodeAbsent,
88
+ EdgeExists,
89
+ ModuleCount,
90
+ ModuleSizeMax,
91
+ ModuleResponsibility,
92
+ AcyclicImports,
93
+ Materializes,
94
+ ],
95
+ Field(discriminator="kind"),
96
+ ]
97
+
98
+
99
+ # Set of kinds considered "structural" for the reward engine's per-constraint
100
+ # +1 magnitude. The "behavioral" family is reserved for property-test results
101
+ # (BehavioralTestPasses, TODO) which earn the higher +3 magnitude. The
102
+ # ``materializes`` constraint is structural for scoring purposes; the more
103
+ # severe "Materialization fails: -8" penalty in PROPOSAL.md §5.2 is an
104
+ # independent gate driven by the materializer raising or returning parse
105
+ # errors, not by this constraint kind.
106
+ STRUCTURAL_KINDS = {
107
+ "node_exists",
108
+ "node_absent",
109
+ "edge_exists",
110
+ "module_count",
111
+ "module_size_max",
112
+ "module_responsibility",
113
+ "acyclic_imports",
114
+ "materializes",
115
+ }
116
+
117
+
118
+ __all__ = [
119
+ "AcyclicImports",
120
+ "Constraint",
121
+ "EdgeExists",
122
+ "Materializes",
123
+ "ModuleCount",
124
+ "ModuleResponsibility",
125
+ "ModuleSizeMax",
126
+ "NodeAbsent",
127
+ "NodeExists",
128
+ "STRUCTURAL_KINDS",
129
+ ]
graphforge/graph/__init__.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Canonical graph schema. See :mod:`graphforge.graph.schema`."""
2
+
3
+ from graphforge.graph.schema import (
4
+ ArgMapping,
5
+ Edge,
6
+ ErrorPolicy,
7
+ Graph,
8
+ Module,
9
+ Node,
10
+ Purity,
11
+ ResponsibilityTag,
12
+ )
13
+
14
+ __all__ = [
15
+ "ArgMapping",
16
+ "Edge",
17
+ "ErrorPolicy",
18
+ "Graph",
19
+ "Module",
20
+ "Node",
21
+ "Purity",
22
+ "ResponsibilityTag",
23
+ ]
graphforge/graph/schema.py ADDED
@@ -0,0 +1,308 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Canonical graph schema.
2
+
3
+ The graph is the single source of truth for an in-progress program. Every
4
+ materialization is a deterministic function of (graph, template library).
5
+
6
+ Wire format mirrors the JSON shape documented in PROPOSAL.md §3.1, exactly:
7
+
8
+ {
9
+ "modules": [{"name": ..., "responsibility": ...}, ...],
10
+ "nodes": [{"name": ..., "module": ..., "signature": ...,
11
+ "body_template": ..., "body_template_args": {...},
12
+ "purity": ..., "error_policy": ..., "decl_order": ...}, ...],
13
+ "edges": [{"caller": "<module>.<name>",
14
+ "callee": "<module>.<name>",
15
+ "arg_mapping": [{"caller_arg": ..., "callee_param": ...}, ...]}, ...]
16
+ }
17
+
18
+ This module enforces shape and well-formedness only. Higher-order invariants
19
+ (unique names, edge endpoints exist, no cycles, type-flow compatibility) are
20
+ enforced by the action dispatcher and the type engine, not the schema, so
21
+ that callers can build partial / invalid graphs and inspect why they fail.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import hashlib
27
+ import json
28
+ from typing import Literal, Optional
29
+
30
+ from pydantic import BaseModel, ConfigDict, Field, field_validator
31
+
32
+ # ----------------------------------------------------------------------
33
+ # Enumerated tags
34
+ # ----------------------------------------------------------------------
35
+
36
+ # Responsibility tags constrain which kinds of nodes a module is allowed to
37
+ # host. The canonical set; new tags are added intentionally because tasks
38
+ # encode constraints against this vocabulary.
39
+ ResponsibilityTag = Literal[
40
+ "io",
41
+ "validation",
42
+ "transform",
43
+ "orchestration",
44
+ "storage",
45
+ "formatting",
46
+ "lookup",
47
+ "policy",
48
+ "logging",
49
+ "computation",
50
+ ]
51
+
52
+ Purity = Literal["pure", "impure"]
53
+
54
+ # How a function handles errors in its body. "guard" means it includes a
55
+ # guard / try-except. "propagate" means it deliberately lets errors flow up.
56
+ # "none" is the default — no claim either way.
57
+ ErrorPolicy = Literal["guard", "propagate", "none"]
58
+
59
+
60
+ # ----------------------------------------------------------------------
61
+ # Atomic records
62
+ # ----------------------------------------------------------------------
63
+
64
+
65
+ class Module(BaseModel):
66
+ """A declared module — one Python file at materialization time."""
67
+
68
+ model_config = ConfigDict(extra="forbid", frozen=False)
69
+
70
+ name: str = Field(..., min_length=1)
71
+ responsibility: ResponsibilityTag
72
+
73
+ @field_validator("name")
74
+ @classmethod
75
+ def _name_is_identifier(cls, v: str) -> str:
76
+ if not v.isidentifier():
77
+ raise ValueError(f"module name {v!r} is not a Python identifier")
78
+ if v.startswith("_"):
79
+ raise ValueError(f"module name {v!r} must not start with an underscore")
80
+ return v
81
+
82
+
83
+ class Node(BaseModel):
84
+ """A declared function. ``body_template`` may be unset until attach_body."""
85
+
86
+ model_config = ConfigDict(extra="forbid", frozen=False)
87
+
88
+ name: str = Field(..., min_length=1)
89
+ module: str = Field(..., min_length=1)
90
+ signature: str = Field(..., min_length=2) # e.g., "(x: int) -> bool"
91
+ body_template: Optional[str] = None
92
+ body_template_args: dict[str, object] = Field(default_factory=dict)
93
+ purity: Purity = "impure"
94
+ error_policy: ErrorPolicy = "none"
95
+ decl_order: int = 0
96
+
97
+ @field_validator("name")
98
+ @classmethod
99
+ def _name_is_identifier(cls, v: str) -> str:
100
+ if not v.isidentifier():
101
+ raise ValueError(f"node name {v!r} is not a Python identifier")
102
+ return v
103
+
104
+ @field_validator("signature")
105
+ @classmethod
106
+ def _signature_shape(cls, v: str) -> str:
107
+ # Cheap surface check; the type engine does the real parse.
108
+ if not v.lstrip().startswith("("):
109
+ raise ValueError(f"signature must start with '(': got {v!r}")
110
+ if "->" not in v:
111
+ raise ValueError(f"signature must include '->' return arrow: got {v!r}")
112
+ return v
113
+
114
+ # Convenience -----------------------------------------------------
115
+
116
+ @property
117
+ def qualified_name(self) -> str:
118
+ """``<module>.<name>`` — the canonical address used on edges."""
119
+ return f"{self.module}.{self.name}"
120
+
121
+
122
+ class ArgMapping(BaseModel):
123
+ """How an edge wires a caller's argument to a callee's parameter."""
124
+
125
+ model_config = ConfigDict(extra="forbid", frozen=False)
126
+
127
+ caller_arg: str = Field(..., min_length=1)
128
+ callee_param: str = Field(..., min_length=1)
129
+
130
+
131
+ class Edge(BaseModel):
132
+ """A CALLS edge. Endpoints are qualified node names ``<module>.<name>``."""
133
+
134
+ model_config = ConfigDict(extra="forbid", frozen=False)
135
+
136
+ caller: str = Field(..., min_length=3)
137
+ callee: str = Field(..., min_length=3)
138
+ arg_mapping: list[ArgMapping] = Field(default_factory=list)
139
+
140
+ @field_validator("caller", "callee")
141
+ @classmethod
142
+ def _qualified(cls, v: str) -> str:
143
+ if v.count(".") != 1:
144
+ raise ValueError(
145
+ f"edge endpoint {v!r} is not qualified (expected '<module>.<name>')"
146
+ )
147
+ mod, name = v.split(".")
148
+ if not mod.isidentifier() or not name.isidentifier():
149
+ raise ValueError(f"edge endpoint {v!r} has non-identifier parts")
150
+ return v
151
+
152
+
153
+ # ----------------------------------------------------------------------
154
+ # Graph
155
+ # ----------------------------------------------------------------------
156
+
157
+
158
+ class Graph(BaseModel):
159
+ """Canonical graph state. Mutable; cloned via ``snapshot``/``restore``."""
160
+
161
+ model_config = ConfigDict(extra="forbid", frozen=False)
162
+
163
+ modules: list[Module] = Field(default_factory=list)
164
+ nodes: list[Node] = Field(default_factory=list)
165
+ edges: list[Edge] = Field(default_factory=list)
166
+
167
+ # ----- lookup ----------------------------------------------------
168
+
169
+ def find_module(self, name: str) -> Optional[Module]:
170
+ for m in self.modules:
171
+ if m.name == name:
172
+ return m
173
+ return None
174
+
175
+ def find_node(self, name: str, module: str) -> Optional[Node]:
176
+ for n in self.nodes:
177
+ if n.name == name and n.module == module:
178
+ return n
179
+ return None
180
+
181
+ def find_node_qualified(self, qualified: str) -> Optional[Node]:
182
+ if qualified.count(".") != 1:
183
+ return None
184
+ mod, nm = qualified.split(".")
185
+ return self.find_node(nm, mod)
186
+
187
+ def find_edge(self, caller: str, callee: str) -> Optional[Edge]:
188
+ for e in self.edges:
189
+ if e.caller == caller and e.callee == callee:
190
+ return e
191
+ return None
192
+
193
+ def nodes_in_module(self, module: str) -> list[Node]:
194
+ return [n for n in self.nodes if n.module == module]
195
+
196
+ def callers_of(self, qualified: str) -> list[str]:
197
+ return [e.caller for e in self.edges if e.callee == qualified]
198
+
199
+ def callees_of(self, qualified: str) -> list[str]:
200
+ return [e.callee for e in self.edges if e.caller == qualified]
201
+
202
+ def fan_in(self, qualified: str) -> int:
203
+ return len(self.callers_of(qualified))
204
+
205
+ def fan_out(self, qualified: str) -> int:
206
+ return len(self.callees_of(qualified))
207
+
208
+ # ----- structural derivations ------------------------------------
209
+
210
+ def import_edges(self) -> set[tuple[str, str]]:
211
+ """Set of (caller_module, callee_module) pairs from cross-module edges."""
212
+ out: set[tuple[str, str]] = set()
213
+ for e in self.edges:
214
+ cm = e.caller.split(".")[0]
215
+ tm = e.callee.split(".")[0]
216
+ if cm != tm:
217
+ out.add((cm, tm))
218
+ return out
219
+
220
+ def has_module_cycle(self) -> bool:
221
+ """True iff the cross-module import graph contains a directed cycle."""
222
+ adj: dict[str, set[str]] = {m.name: set() for m in self.modules}
223
+ for src, dst in self.import_edges():
224
+ adj.setdefault(src, set()).add(dst)
225
+ adj.setdefault(dst, set())
226
+ WHITE, GRAY, BLACK = 0, 1, 2
227
+ color: dict[str, int] = {k: WHITE for k in adj}
228
+
229
+ def visit(u: str) -> bool:
230
+ color[u] = GRAY
231
+ for v in adj.get(u, ()):
232
+ if color[v] == GRAY:
233
+ return True
234
+ if color[v] == WHITE and visit(v):
235
+ return True
236
+ color[u] = BLACK
237
+ return False
238
+
239
+ return any(color[u] == WHITE and visit(u) for u in adj)
240
+
241
+ def call_graph_depth(self) -> int:
242
+ """Longest path length (in edges) in the function call DAG.
243
+
244
+ If the call graph is cyclic, returns the special value -1 (callers
245
+ should treat this as an invariant violation).
246
+ """
247
+ adj: dict[str, list[str]] = {n.qualified_name: [] for n in self.nodes}
248
+ for e in self.edges:
249
+ adj.setdefault(e.caller, []).append(e.callee)
250
+ adj.setdefault(e.callee, [])
251
+ memo: dict[str, int] = {}
252
+ ON_STACK = -2
253
+
254
+ def dfs(u: str) -> int:
255
+ if u in memo:
256
+ if memo[u] == ON_STACK:
257
+ return -1
258
+ return memo[u]
259
+ memo[u] = ON_STACK
260
+ best = 0
261
+ for v in adj.get(u, ()):
262
+ d = dfs(v)
263
+ if d == -1:
264
+ return -1
265
+ best = max(best, d + 1)
266
+ memo[u] = best
267
+ return best
268
+
269
+ results = [dfs(u) for u in adj]
270
+ if any(r == -1 for r in results):
271
+ return -1
272
+ return max(results, default=0)
273
+
274
+ # ----- copying / hashing -----------------------------------------
275
+
276
+ def snapshot(self) -> "Graph":
277
+ """Deep copy. Used by the dispatcher for atomic action rollback."""
278
+ return self.model_copy(deep=True)
279
+
280
+ def structural_hash(self) -> str:
281
+ """Stable SHA-256 over a canonical JSON projection.
282
+
283
+ Insensitive to list ordering on the dimensions where order is not
284
+ semantically meaningful (modules, nodes), but sensitive to
285
+ ``decl_order`` because that affects materialized output.
286
+ """
287
+ canon: dict[str, object] = {
288
+ "modules": sorted(
289
+ [m.model_dump() for m in self.modules],
290
+ key=lambda d: d["name"],
291
+ ),
292
+ "nodes": sorted(
293
+ [n.model_dump() for n in self.nodes],
294
+ key=lambda d: (d["module"], d["name"]),
295
+ ),
296
+ "edges": sorted(
297
+ [e.model_dump() for e in self.edges],
298
+ key=lambda d: (d["caller"], d["callee"]),
299
+ ),
300
+ }
301
+ blob = json.dumps(canon, sort_keys=True, default=str).encode("utf-8")
302
+ return hashlib.sha256(blob).hexdigest()
303
+
304
+ # ----- factories -------------------------------------------------
305
+
306
+ @classmethod
307
+ def empty(cls) -> "Graph":
308
+ return cls()
graphforge/knowledge_graph.py ADDED
@@ -0,0 +1,233 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """In-memory Knowledge Graph for a Python repository.
2
+
3
+ Mirrors the structure of a Neo4j property graph but lives in RAM:
4
+
5
+ Nodes
6
+ -----
7
+ repo — the repository root
8
+ package — a directory containing __init__.py
9
+ module — a .py file
10
+ class — a class definition
11
+ function — a top-level or nested function / async function
12
+ method — a method inside a class
13
+
14
+ Edges (directed)
15
+ -----------------
16
+ contains — parent → child (repo→package, package→module, module→class, …)
17
+ calls — function/method → function/method (same-file same-package)
18
+ imports — module → module (from x import y / import x)
19
+ inherits — class → class
20
+
21
+ Each node stores the actual source lines so the agent can read/edit them.
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import textwrap
27
+ from dataclasses import dataclass, field
28
+ from typing import Iterable
29
+
30
+
31
+ # ── node & edge ───────────────────────────────────────────────────────────────
32
+
33
+ @dataclass
34
+ class KGNode:
35
+ node_id: str # unique key, e.g. "function:validators.py:validate_title"
36
+ node_type: str # module | class | function | method | package | repo
37
+ name: str # short identifier
38
+ file_path: str # relative path from repo root (empty for repo/package)
39
+ line_start: int = 0
40
+ line_end: int = 0
41
+ source: str = "" # full source text of this node (incl. def line)
42
+ docstring: str = ""
43
+ metadata: dict = field(default_factory=dict)
44
+
45
+ def brief(self) -> str:
46
+ """One-line summary for graph overviews."""
47
+ loc = f" [{self.file_path}:{self.line_start}]" if self.file_path else ""
48
+ return f"[{self.node_type.upper():<8}] {self.node_id}{loc}"
49
+
50
+
51
+ @dataclass
52
+ class KGEdge:
53
+ edge_type: str # contains | calls | imports | inherits
54
+ source_id: str
55
+ target_id: str
56
+
57
+
58
+ # ── knowledge graph ───────────────────────────────────────────────────────────
59
+
60
+ class KnowledgeGraph:
61
+ """Property graph for a repository.
62
+
63
+ Supports rich queries used by the agent and reward checker.
64
+ """
65
+
66
+ def __init__(self, repo_path: str) -> None:
67
+ self.repo_path = repo_path
68
+ self._nodes: dict[str, KGNode] = {}
69
+ self._edges: list[KGEdge] = []
70
+
71
+ # ── mutation ──────────────────────────────────────────────────────────────
72
+
73
+ def add_node(self, node: KGNode) -> None:
74
+ self._nodes[node.node_id] = node
75
+
76
+ def add_edge(self, edge: KGEdge) -> None:
77
+ self._edges.append(edge)
78
+
79
+ def update_node_source(self, node_id: str, new_source: str) -> None:
80
+ """Replace a node's source and recount lines."""
81
+ node = self._nodes[node_id]
82
+ node.source = new_source
83
+ lines = new_source.splitlines()
84
+ node.line_end = node.line_start + len(lines) - 1
85
+
86
+ def insert_node(
87
+ self,
88
+ parent_id: str,
89
+ new_node: KGNode,
90
+ ) -> None:
91
+ """Add new_node to the graph and wire a contains edge from parent."""
92
+ self._nodes[new_node.node_id] = new_node
93
+ self._edges.append(KGEdge("contains", parent_id, new_node.node_id))
94
+
95
+ def remove_node(self, node_id: str) -> None:
96
+ self._nodes.pop(node_id, None)
97
+ self._edges = [e for e in self._edges
98
+ if e.source_id != node_id and e.target_id != node_id]
99
+
100
+ # ── queries ───────────────────────────────────────────────────────────────
101
+
102
+ def get_node(self, node_id: str) -> KGNode | None:
103
+ return self._nodes.get(node_id)
104
+
105
+ def all_nodes(self, node_type: str | None = None) -> list[KGNode]:
106
+ nodes = list(self._nodes.values())
107
+ if node_type:
108
+ nodes = [n for n in nodes if n.node_type == node_type]
109
+ return nodes
110
+
111
+ def children_of(self, node_id: str) -> list[KGNode]:
112
+ child_ids = {e.target_id for e in self._edges
113
+ if e.source_id == node_id and e.edge_type == "contains"}
114
+ return [self._nodes[cid] for cid in child_ids if cid in self._nodes]
115
+
116
+ def parent_of(self, node_id: str) -> KGNode | None:
117
+ for e in self._edges:
118
+ if e.target_id == node_id and e.edge_type == "contains":
119
+ return self._nodes.get(e.source_id)
120
+ return None
121
+
122
+ def callers_of(self, node_id: str) -> list[KGNode]:
123
+ caller_ids = {e.source_id for e in self._edges
124
+ if e.target_id == node_id and e.edge_type == "calls"}
125
+ return [self._nodes[cid] for cid in caller_ids if cid in self._nodes]
126
+
127
+ def callees_of(self, node_id: str) -> list[KGNode]:
128
+ callee_ids = {e.target_id for e in self._edges
129
+ if e.source_id == node_id and e.edge_type == "calls"}
130
+ return [self._nodes[cid] for cid in callee_ids if cid in self._nodes]
131
+
132
+ def imports_of(self, module_id: str) -> list[KGNode]:
133
+ imp_ids = {e.target_id for e in self._edges
134
+ if e.source_id == module_id and e.edge_type == "imports"}
135
+ return [self._nodes[i] for i in imp_ids if i in self._nodes]
136
+
137
+ def search(self, keywords: str, node_type: str | None = None) -> list[KGNode]:
138
+ """Fuzzy keyword search over node names, docstrings, and source."""
139
+ kws = keywords.lower().split()
140
+ results: list[KGNode] = []
141
+ for node in self._nodes.values():
142
+ if node_type and node.node_type != node_type:
143
+ continue
144
+ haystack = f"{node.name} {node.docstring} {node.source}".lower()
145
+ if all(kw in haystack for kw in kws):
146
+ results.append(node)
147
+ return results
148
+
149
+ def subgraph(self, root_id: str, depth: int = 2) -> list[KGNode]:
150
+ """BFS from root_id up to depth hops; returns all encountered nodes."""
151
+ visited: set[str] = set()
152
+ frontier = {root_id}
153
+ for _ in range(depth):
154
+ next_frontier: set[str] = set()
155
+ for nid in frontier:
156
+ if nid in visited:
157
+ continue
158
+ visited.add(nid)
159
+ for e in self._edges:
160
+ if e.source_id == nid and e.target_id not in visited:
161
+ next_frontier.add(e.target_id)
162
+ frontier = next_frontier
163
+ visited.update(frontier)
164
+ return [self._nodes[nid] for nid in visited if nid in self._nodes]
165
+
166
+ # ── text representations ──────────────────────────────────────────────────
167
+
168
+ def overview(self, max_chars: int = 3000) -> str:
169
+ """Compact multi-line overview of the repo graph, capped to avoid LLM context overflow."""
170
+ lines: list[str] = [f"## Repository: {self.repo_path}", ""]
171
+ modules = self.all_nodes("module")
172
+ all_fns = self.all_nodes("function")
173
+ all_cls = self.all_nodes("class")
174
+ lines.append(f" {len(modules)} modules · {len(all_fns)} functions · {len(all_cls)} classes")
175
+ lines.append("")
176
+
177
+ for mod in sorted(modules, key=lambda n: n.file_path):
178
+ children = self.children_of(mod.node_id)
179
+ funcs = [c for c in children if c.node_type in ("function", "method")]
180
+ classes = [c for c in children if c.node_type == "class"]
181
+ summary = []
182
+ if classes:
183
+ summary.append(f"{len(classes)} class{'es' if len(classes)>1 else ''}")
184
+ if funcs:
185
+ summary.append(f"{len(funcs)} fn{'s' if len(funcs)>1 else ''}")
186
+ lines.append(f" [{mod.file_path}] ({', '.join(summary) or 'empty'})")
187
+ for cls in sorted(classes, key=lambda n: n.name):
188
+ methods = [c for c in self.children_of(cls.node_id) if c.node_type == "method"]
189
+ mnames = ", ".join(m.name for m in sorted(methods, key=lambda n: n.line_start))
190
+ lines.append(f" class {cls.name} → {mnames or '(no methods)'}")
191
+ lines.append(f" node_id: {cls.node_id}")
192
+ for fn in sorted(funcs, key=lambda n: n.line_start):
193
+ lines.append(f" def {fn.name}{fn.metadata.get('signature', '')}")
194
+ lines.append(f" node_id: {fn.node_id}")
195
+
196
+ # Stop expanding if we are already near the character cap
197
+ current = "\n".join(lines)
198
+ if len(current) > max_chars:
199
+ remaining = len(modules) - (modules.index(mod) + 1)
200
+ if remaining:
201
+ lines.append(f"\n ... [{remaining} more modules not shown — use query() to explore]")
202
+ break
203
+
204
+ return "\n".join(lines)
205
+
206
+ def node_detail(self, node_id: str) -> str:
207
+ """Full inspection view of a single node."""
208
+ node = self._nodes.get(node_id)
209
+ if node is None:
210
+ return f"[ERROR] node_id {node_id!r} not found in graph."
211
+ lines = [
212
+ f"## Node: {node.node_id}",
213
+ f"type : {node.node_type}",
214
+ f"file : {node.file_path} (lines {node.line_start}–{node.line_end})",
215
+ ]
216
+ if node.docstring:
217
+ lines.append(f"docstring: {node.docstring[:120]}")
218
+ callers = self.callers_of(node_id)
219
+ callees = self.callees_of(node_id)
220
+ if callers:
221
+ lines.append("called by: " + ", ".join(n.name for n in callers))
222
+ if callees:
223
+ lines.append("calls : " + ", ".join(n.name for n in callees))
224
+ children = self.children_of(node_id)
225
+ if children:
226
+ lines.append("contains : " + ", ".join(c.name for c in children))
227
+ lines += ["", "### Source", "```python", node.source or "(no source)", "```"]
228
+ return "\n".join(lines)
229
+
230
+ def snapshot(self) -> "KnowledgeGraph":
231
+ """Deep copy — used to preserve state before mutations."""
232
+ import copy
233
+ return copy.deepcopy(self)
graphforge/materializer/__init__.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Graph -> Python source projection.
2
+
3
+ Responsibilities (PROPOSAL.md §3.3):
4
+
5
+ * Emit one ``<module>.py`` per declared module.
6
+ * Emit functions in :attr:`Node.decl_order` order.
7
+ * Compute ``from <module> import <name>`` lines from cross-module edges,
8
+ deduplicated and sorted.
9
+ * Expand body templates with the node's ``body_template_args`` to produce
10
+ a runnable function body.
11
+
12
+ The materializer is total over well-formed graphs: every dispatcher-accepted
13
+ graph must produce parseable source. Round-trip correctness (the produced
14
+ source re-parses to the same graph) is enforced by tests in
15
+ :mod:`graphforge.parser` (TODO).
16
+ """
17
+
18
+ from graphforge.materializer.materialize import materialize
19
+
20
+ __all__ = ["materialize"]
graphforge/materializer/codegen.py ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Per-template body codegen.
2
+
3
+ Each public ``render_<template>`` function takes the host node, its outgoing
4
+ edges in deterministic order, and returns a multi-line indented body suitable
5
+ for inserting after a ``def`` line. Bodies use only stdlib and never reference
6
+ unresolved names (the orchestrator ensures imports + pattern constants are
7
+ in scope).
8
+
9
+ Codegen is intentionally simple: the goal is *runnable, readable* Python that
10
+ respects template semantics, not optimal idiomatic code.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from graphforge.graph.schema import Edge, Graph, Node
16
+ from graphforge.materializer import patterns
17
+
18
+ INDENT = " "
19
+
20
+
21
+ # ---- helpers ---------------------------------------------------------
22
+
23
+
24
+ def _kwargs_for(edge: Edge) -> str:
25
+ """Render an edge's arg_mapping as ``param=arg, param2=arg2``."""
26
+ return ", ".join(f"{m.callee_param}={m.caller_arg}" for m in edge.arg_mapping)
27
+
28
+
29
+ def _callee_name(edge: Edge) -> str:
30
+ """The local symbol used at the call site (just the function name).
31
+
32
+ The orchestrator emits ``from <module> import <name>`` for cross-module
33
+ callees, so the call site can always use the bare name.
34
+ """
35
+ return edge.callee.split(".", 1)[1]
36
+
37
+
38
+ def _indent(lines: list[str]) -> str:
39
+ return "\n".join(INDENT + line for line in lines)
40
+
41
+
42
+ # ---- per-template renderers -----------------------------------------
43
+
44
+
45
+ def render_passthrough_call(node: Node, out_edges: list[Edge], _g: Graph) -> str:
46
+ if len(out_edges) != 1:
47
+ raise ValueError(
48
+ f"passthrough_call on {node.qualified_name} requires 1 out-edge, "
49
+ f"got {len(out_edges)}"
50
+ )
51
+ e = out_edges[0]
52
+ return _indent([f"return {_callee_name(e)}({_kwargs_for(e)})"])
53
+
54
+
55
+ def render_sequential_calls(node: Node, out_edges: list[Edge], _g: Graph) -> str:
56
+ if not out_edges:
57
+ raise ValueError(
58
+ f"sequential_calls on {node.qualified_name} requires >=1 out-edge"
59
+ )
60
+ lines: list[str] = []
61
+ for e in out_edges[:-1]:
62
+ lines.append(f"{_callee_name(e)}({_kwargs_for(e)})")
63
+ last = out_edges[-1]
64
+ lines.append(f"return {_callee_name(last)}({_kwargs_for(last)})")
65
+ return _indent(lines)
66
+
67
+
68
+ def render_validate_with_regex(node: Node, out_edges: list[Edge], _g: Graph) -> str:
69
+ if out_edges:
70
+ raise ValueError(
71
+ f"validate_with_regex on {node.qualified_name} must have 0 out-edges"
72
+ )
73
+ pattern_name = str(node.body_template_args.get("pattern", ""))
74
+ if patterns.get_pattern(pattern_name) is None:
75
+ raise ValueError(
76
+ f"unknown regex pattern {pattern_name!r} on {node.qualified_name}; "
77
+ f"known: {patterns.known_patterns()}"
78
+ )
79
+ constant = patterns.constant_name(pattern_name)
80
+ # The host signature is expected to be (s: str) -> bool — but we just use
81
+ # the first parameter name, whatever it is, to be tolerant.
82
+ from graphforge.actions.signature import parse_signature
83
+ parsed = parse_signature(node.signature)
84
+ if not parsed.parameters:
85
+ raise ValueError(
86
+ f"validate_with_regex on {node.qualified_name} requires "
87
+ f"at least one parameter"
88
+ )
89
+ arg = parsed.parameters[0].name
90
+ return _indent([f"return re.match({constant}, {arg}) is not None"])
91
+
92
+
93
+ def render_early_return_guard(node: Node, out_edges: list[Edge], _g: Graph) -> str:
94
+ if len(out_edges) != 1:
95
+ raise ValueError(
96
+ f"early_return_guard on {node.qualified_name} requires 1 out-edge"
97
+ )
98
+ condition = str(node.body_template_args.get("condition", "True"))
99
+ e = out_edges[0]
100
+ return _indent(
101
+ [
102
+ f"if not ({condition}):",
103
+ f"{INDENT}return None",
104
+ f"return {_callee_name(e)}({_kwargs_for(e)})",
105
+ ]
106
+ )
107
+
108
+
109
+ def render_try_call_with_fallback(node: Node, out_edges: list[Edge], _g: Graph) -> str:
110
+ if len(out_edges) != 2:
111
+ raise ValueError(
112
+ f"try_call_with_fallback on {node.qualified_name} requires "
113
+ f"exactly 2 out-edges (primary, fallback)"
114
+ )
115
+ primary, fallback = out_edges
116
+ return _indent(
117
+ [
118
+ "try:",
119
+ f"{INDENT}return {_callee_name(primary)}({_kwargs_for(primary)})",
120
+ "except Exception:",
121
+ f"{INDENT}return {_callee_name(fallback)}({_kwargs_for(fallback)})",
122
+ ]
123
+ )
124
+
125
+
126
+ def render_leaf_constant(node: Node, out_edges: list[Edge], _g: Graph) -> str:
127
+ if out_edges:
128
+ raise ValueError(
129
+ f"leaf_constant on {node.qualified_name} must have 0 out-edges"
130
+ )
131
+ if "value" not in node.body_template_args:
132
+ raise ValueError(
133
+ f"leaf_constant on {node.qualified_name} requires args.value"
134
+ )
135
+ value = node.body_template_args["value"]
136
+ return _indent([f"return {value!r}"])
137
+
138
+
139
+ # ---- registry --------------------------------------------------------
140
+
141
+
142
+ _RENDERERS: dict[str, object] = {
143
+ "passthrough_call": render_passthrough_call,
144
+ "sequential_calls": render_sequential_calls,
145
+ "validate_with_regex": render_validate_with_regex,
146
+ "early_return_guard": render_early_return_guard,
147
+ "try_call_with_fallback": render_try_call_with_fallback,
148
+ "leaf_constant": render_leaf_constant,
149
+ }
150
+
151
+
152
+ def render_body(node: Node, out_edges: list[Edge], graph: Graph) -> str:
153
+ """Render the body for ``node`` based on its attached body template."""
154
+ if node.body_template is None:
155
+ # No body attached yet — emit a placeholder so the file still parses.
156
+ return _indent(['raise NotImplementedError("body not attached")'])
157
+ fn = _RENDERERS.get(node.body_template)
158
+ if fn is None:
159
+ raise ValueError(
160
+ f"no codegen for template {node.body_template!r} on {node.qualified_name}"
161
+ )
162
+ return fn(node, out_edges, graph) # type: ignore[operator]
163
+
164
+
165
+ def template_imports(template: str | None) -> set[str]:
166
+ """Stdlib imports a template needs, beyond cross-module function imports."""
167
+ if template == "validate_with_regex":
168
+ return {"re"}
169
+ return set()
graphforge/materializer/materialize.py ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Materialize a :class:`Graph` into a dict of ``{filename: source}``.
2
+
3
+ Determinism guarantees:
4
+
5
+ * One file per module, named ``<module>.py``.
6
+ * Within a file, functions emitted in :attr:`Node.decl_order`.
7
+ * Imports sorted: stdlib first (alpha), then ``from <module> import <name>``
8
+ (alpha by module, alpha by name).
9
+ * Pattern constants emitted only if used, in alpha order.
10
+ * Out-edges of a node iterated in insertion order, which matters for
11
+ ``sequential_calls`` and ``try_call_with_fallback`` semantics.
12
+
13
+ The orchestrator is a pure function: same graph in, same source out.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ from collections import defaultdict
19
+ from typing import Iterable
20
+
21
+ from graphforge.graph.schema import Edge, Graph, Node
22
+ from graphforge.materializer import codegen, patterns
23
+
24
+ HEADER = '"""Auto-generated by graphforge.materializer. Do not edit by hand."""\n'
25
+ FUTURE = "from __future__ import annotations\n"
26
+
27
+
28
+ # ---- helpers ---------------------------------------------------------
29
+
30
+
31
+ def _out_edges_in_order(graph: Graph, qualified: str) -> list[Edge]:
32
+ """Out-edges of ``qualified`` in insertion order."""
33
+ return [e for e in graph.edges if e.caller == qualified]
34
+
35
+
36
+ def _nodes_by_module(graph: Graph) -> dict[str, list[Node]]:
37
+ """Map module-name -> nodes in decl_order."""
38
+ by_mod: dict[str, list[Node]] = defaultdict(list)
39
+ for n in graph.nodes:
40
+ by_mod[n.module].append(n)
41
+ for ns in by_mod.values():
42
+ ns.sort(key=lambda n: (n.decl_order, n.name))
43
+ return by_mod
44
+
45
+
46
+ def _cross_module_imports(graph: Graph, module: str) -> list[tuple[str, str]]:
47
+ """``[(callee_module, callee_name), ...]`` needed by ``module``."""
48
+ pairs: set[tuple[str, str]] = set()
49
+ for e in graph.edges:
50
+ caller_mod = e.caller.split(".", 1)[0]
51
+ if caller_mod != module:
52
+ continue
53
+ callee_mod, callee_name = e.callee.split(".", 1)
54
+ if callee_mod != module:
55
+ pairs.add((callee_mod, callee_name))
56
+ return sorted(pairs)
57
+
58
+
59
+ def _stdlib_imports_for(nodes: Iterable[Node]) -> list[str]:
60
+ """Stdlib imports the templates in this module require."""
61
+ needed: set[str] = set()
62
+ for n in nodes:
63
+ needed |= codegen.template_imports(n.body_template)
64
+ return sorted(needed)
65
+
66
+
67
+ def _patterns_used_by(nodes: Iterable[Node]) -> list[str]:
68
+ """Named patterns referenced by validate_with_regex nodes in this module."""
69
+ used: set[str] = set()
70
+ for n in nodes:
71
+ if n.body_template == "validate_with_regex":
72
+ name = str(n.body_template_args.get("pattern", ""))
73
+ if patterns.get_pattern(name) is not None:
74
+ used.add(name)
75
+ return sorted(used)
76
+
77
+
78
+ # ---- core ------------------------------------------------------------
79
+
80
+
81
+ def materialize(graph: Graph) -> dict[str, str]:
82
+ """Project ``graph`` to a ``{filename: source}`` map.
83
+
84
+ Modules with zero nodes are still emitted as empty files (just header +
85
+ future import) so that downstream import-resolution sees them.
86
+ """
87
+ by_mod = _nodes_by_module(graph)
88
+ files: dict[str, str] = {}
89
+ for module in graph.modules:
90
+ nodes = by_mod.get(module.name, [])
91
+ files[f"{module.name}.py"] = _render_module(graph, module.name, nodes)
92
+ return files
93
+
94
+
95
+ def _render_module(graph: Graph, module_name: str, nodes: list[Node]) -> str:
96
+ parts: list[str] = [HEADER, FUTURE, "\n"]
97
+
98
+ # Stdlib imports.
99
+ for imp in _stdlib_imports_for(nodes):
100
+ parts.append(f"import {imp}\n")
101
+
102
+ # Cross-module function imports.
103
+ for callee_mod, callee_name in _cross_module_imports(graph, module_name):
104
+ parts.append(f"from {callee_mod} import {callee_name}\n")
105
+
106
+ if (
107
+ any(_stdlib_imports_for(nodes))
108
+ or _cross_module_imports(graph, module_name)
109
+ ):
110
+ parts.append("\n")
111
+
112
+ # Pattern constants used in this module. We emit a plain string literal
113
+ # (not a raw-string-prefixed one) because ``repr()`` already produces a
114
+ # valid Python string literal — wrapping it in ``r"..."`` would double
115
+ # the backslashes and break regex metacharacters like ``\s`` and ``\d``.
116
+ used_patterns = _patterns_used_by(nodes)
117
+ for name in used_patterns:
118
+ regex = patterns.get_pattern(name)
119
+ constant = patterns.constant_name(name)
120
+ parts.append(f"{constant} = {regex!r}\n")
121
+ if used_patterns:
122
+ parts.append("\n")
123
+
124
+ # Functions.
125
+ for i, node in enumerate(nodes):
126
+ out_edges = _out_edges_in_order(graph, node.qualified_name)
127
+ body = codegen.render_body(node, out_edges, graph)
128
+ parts.append(f"def {node.name}{node.signature}:\n{body}\n")
129
+ if i != len(nodes) - 1:
130
+ parts.append("\n")
131
+
132
+ source = "".join(parts)
133
+ # Ensure exactly one trailing newline.
134
+ return source.rstrip("\n") + "\n"
graphforge/materializer/patterns.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Named regex patterns for ``validate_with_regex`` template.
2
+
3
+ Patterns are referenced by name in the graph (e.g. ``args={"pattern": "EMAIL"}``)
4
+ and resolved here at materialization time. The registry keeps task definitions
5
+ domain-agnostic — a task constraint can name a pattern without leaking the
6
+ regex itself into the graph schema.
7
+
8
+ Add new patterns sparingly; every name here becomes part of the constraint
9
+ vocabulary that tasks can use.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ # name -> (regex string, brief description)
15
+ _PATTERNS: dict[str, str] = {
16
+ "EMAIL": r"[^@\s]+@[^@\s]+\.[^@\s]+",
17
+ "HEXCOLOR": r"#[0-9a-fA-F]{6}",
18
+ "PHONE": r"\+?\d{10,15}",
19
+ "ALPHANUM": r"[A-Za-z0-9]+",
20
+ "URL": r"https?://[^\s]+",
21
+ }
22
+
23
+
24
+ def known_patterns() -> list[str]:
25
+ return sorted(_PATTERNS.keys())
26
+
27
+
28
+ def get_pattern(name: str) -> str | None:
29
+ return _PATTERNS.get(name)
30
+
31
+
32
+ def constant_name(name: str) -> str:
33
+ """Module-level constant name we emit for a given pattern name."""
34
+ return f"_PATTERN_{name}"
graphforge/parser/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Round-trip parser: Python source -> Graph.
2
+
3
+ Responsibilities (PROPOSAL.md §3.4):
4
+
5
+ * Walk an AST per module file.
6
+ * Recover function declarations as :class:`Node` objects.
7
+ * Recover ``from x import y`` lines as cross-module edges (best-effort).
8
+ * Recognize body templates by structural pattern matching against the
9
+ template library, and recover ``body_template`` + ``body_template_args``.
10
+ * Produce a :class:`Graph` identical (per ``structural_hash``) to the one
11
+ that produced the source via :mod:`graphforge.materializer`.
12
+
13
+ The round-trip parser is unit-tested against every body template + every
14
+ constraint pattern. If it fails to round-trip, the materializer emits a
15
+ warning and the graph is treated as canonical.
16
+
17
+ Public surface (TODO):
18
+
19
+ parse_program(files: dict[str, str]) -> Graph
20
+ parse_directory(path: Path) -> Graph
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+
26
+ def parse_program(files: dict[str, str]) -> object: # pragma: no cover — TODO
27
+ raise NotImplementedError("round-trip parser TODO — see PROPOSAL.md §3.4")
graphforge/repo_parser.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Parse a Python repository (directory tree) into a KnowledgeGraph.
2
+
3
+ Usage
4
+ -----
5
+ from graphforge.repo_parser import parse_repo
6
+ kg = parse_repo("/path/to/my_package")
7
+
8
+ What it extracts
9
+ ----------------
10
+ Nodes : repo, package, module, class, function, method
11
+ Edges : contains, calls (same-file), imports, inherits
12
+
13
+ Cross-file call resolution is best-effort: if function A in file X calls
14
+ function B and B appears anywhere in the graph, an edge is added.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import ast
20
+ import os
21
+ from pathlib import Path
22
+ from typing import Any
23
+
24
+ from graphforge.knowledge_graph import KGEdge, KGNode, KnowledgeGraph
25
+
26
+
27
+ # ── helpers ───────────────────────────────────────────────────────────────────
28
+
29
+ def _node_id(node_type: str, file_path: str, *names: str) -> str:
30
+ parts = [node_type, file_path] + list(names)
31
+ return ":".join(p for p in parts if p)
32
+
33
+
34
+ def _sig(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
35
+ args = []
36
+ for arg in node.args.args:
37
+ ann = f": {ast.unparse(arg.annotation)}" if arg.annotation else ""
38
+ args.append(f"{arg.arg}{ann}")
39
+ ret = f" -> {ast.unparse(node.returns)}" if node.returns else ""
40
+ return f"({', '.join(args)}){ret}"
41
+
42
+
43
+ def _source_slice(source_lines: list[str], start: int, end: int) -> str:
44
+ """1-indexed, inclusive."""
45
+ return "\n".join(source_lines[start - 1 : end])
46
+
47
+
48
+ def _direct_calls(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
49
+ """Collect names of directly called functions (Name-style calls only)."""
50
+ calls: set[str] = set()
51
+ for node in ast.walk(func_node):
52
+ if isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
53
+ calls.add(node.func.id)
54
+ return calls
55
+
56
+
57
+ # ── single-file parser ────────────────────────────────────────────────────────
58
+
59
+ def _parse_file(
60
+ file_path: str, # relative to repo root
61
+ abs_path: str,
62
+ kg: KnowledgeGraph,
63
+ parent_id: str,
64
+ ) -> None:
65
+ try:
66
+ source = Path(abs_path).read_text(encoding="utf-8", errors="replace")
67
+ except Exception:
68
+ return
69
+
70
+ try:
71
+ tree = ast.parse(source, filename=abs_path)
72
+ except SyntaxError:
73
+ return
74
+
75
+ lines = source.splitlines()
76
+ mod_id = _node_id("module", file_path)
77
+
78
+ # Module node
79
+ mod_doc = ast.get_docstring(tree) or ""
80
+ kg.add_node(KGNode(
81
+ node_id=mod_id,
82
+ node_type="module",
83
+ name=Path(file_path).stem,
84
+ file_path=file_path,
85
+ line_start=1,
86
+ line_end=len(lines),
87
+ source=source,
88
+ docstring=mod_doc,
89
+ ))
90
+ kg.add_edge(KGEdge("contains", parent_id, mod_id))
91
+
92
+ # Import edges (resolve module names)
93
+ for node in ast.walk(tree):
94
+ if isinstance(node, ast.Import):
95
+ for alias in node.names:
96
+ imp_id = _node_id("module", alias.name.replace(".", "/") + ".py")
97
+ kg.add_edge(KGEdge("imports", mod_id, imp_id))
98
+ elif isinstance(node, ast.ImportFrom) and node.module:
99
+ imp_id = _node_id("module", node.module.replace(".", "/") + ".py")
100
+ kg.add_edge(KGEdge("imports", mod_id, imp_id))
101
+
102
+ # Top-level classes and functions
103
+ func_name_to_id: dict[str, str] = {} # for call resolution within file
104
+
105
+ for stmt in tree.body:
106
+ if isinstance(stmt, ast.ClassDef):
107
+ _parse_class(stmt, file_path, lines, kg, mod_id, func_name_to_id)
108
+ elif isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
109
+ _parse_function(stmt, file_path, lines, kg, mod_id, func_name_to_id)
110
+
111
+ # Same-file call edges
112
+ _resolve_calls(func_name_to_id, kg)
113
+
114
+
115
+ def _parse_class(
116
+ cls_node: ast.ClassDef,
117
+ file_path: str,
118
+ lines: list[str],
119
+ kg: KnowledgeGraph,
120
+ parent_id: str,
121
+ func_name_to_id: dict[str, str],
122
+ ) -> None:
123
+ cls_id = _node_id("class", file_path, cls_node.name)
124
+ doc = ast.get_docstring(cls_node) or ""
125
+ kg.add_node(KGNode(
126
+ node_id=cls_id,
127
+ node_type="class",
128
+ name=cls_node.name,
129
+ file_path=file_path,
130
+ line_start=cls_node.lineno,
131
+ line_end=cls_node.end_lineno,
132
+ source=_source_slice(lines, cls_node.lineno, cls_node.end_lineno),
133
+ docstring=doc,
134
+ ))
135
+ kg.add_edge(KGEdge("contains", parent_id, cls_id))
136
+
137
+ # Inheritance edges
138
+ for base in cls_node.bases:
139
+ if isinstance(base, ast.Name):
140
+ base_id = _node_id("class", file_path, base.id)
141
+ kg.add_edge(KGEdge("inherits", cls_id, base_id))
142
+
143
+ # Methods
144
+ for item in cls_node.body:
145
+ if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
146
+ _parse_method(item, file_path, lines, kg, cls_id, cls_node.name, func_name_to_id)
147
+
148
+
149
+ def _parse_function(
150
+ fn: ast.FunctionDef | ast.AsyncFunctionDef,
151
+ file_path: str,
152
+ lines: list[str],
153
+ kg: KnowledgeGraph,
154
+ parent_id: str,
155
+ func_name_to_id: dict[str, str],
156
+ ) -> None:
157
+ fn_id = _node_id("function", file_path, fn.name)
158
+ doc = ast.get_docstring(fn) or ""
159
+ kg.add_node(KGNode(
160
+ node_id=fn_id,
161
+ node_type="function",
162
+ name=fn.name,
163
+ file_path=file_path,
164
+ line_start=fn.lineno,
165
+ line_end=fn.end_lineno,
166
+ source=_source_slice(lines, fn.lineno, fn.end_lineno),
167
+ docstring=doc,
168
+ metadata={"signature": _sig(fn), "calls": list(_direct_calls(fn))},
169
+ ))
170
+ kg.add_edge(KGEdge("contains", parent_id, fn_id))
171
+ func_name_to_id[fn.name] = fn_id
172
+
173
+
174
+ def _parse_method(
175
+ fn: ast.FunctionDef | ast.AsyncFunctionDef,
176
+ file_path: str,
177
+ lines: list[str],
178
+ kg: KnowledgeGraph,
179
+ parent_id: str,
180
+ class_name: str,
181
+ func_name_to_id: dict[str, str],
182
+ ) -> None:
183
+ method_id = _node_id("method", file_path, class_name, fn.name)
184
+ doc = ast.get_docstring(fn) or ""
185
+ kg.add_node(KGNode(
186
+ node_id=method_id,
187
+ node_type="method",
188
+ name=fn.name,
189
+ file_path=file_path,
190
+ line_start=fn.lineno,
191
+ line_end=fn.end_lineno,
192
+ source=_source_slice(lines, fn.lineno, fn.end_lineno),
193
+ docstring=doc,
194
+ metadata={"signature": _sig(fn), "calls": list(_direct_calls(fn))},
195
+ ))
196
+ kg.add_edge(KGEdge("contains", parent_id, method_id))
197
+ # register under unqualified name too for call resolution
198
+ func_name_to_id[fn.name] = method_id
199
+
200
+
201
+ def _resolve_calls(func_name_to_id: dict[str, str], kg: KnowledgeGraph) -> None:
202
+ """Add calls edges based on direct-call names collected during parse."""
203
+ for fn_id, node in [(nid, n) for nid, n in kg._nodes.items()
204
+ if n.node_type in ("function", "method")]:
205
+ calls: list[str] = node.metadata.get("calls", [])
206
+ for callee_name in calls:
207
+ if callee_name in func_name_to_id:
208
+ callee_id = func_name_to_id[callee_name]
209
+ if callee_id != fn_id:
210
+ kg.add_edge(KGEdge("calls", fn_id, callee_id))
211
+
212
+
213
+ # ── repo walker ───────────────────────────────────────────────────────────────
214
+
215
+ def parse_repo(repo_path: str, exclude_dirs: set[str] | None = None) -> KnowledgeGraph:
216
+ """Walk repo_path recursively and return a KnowledgeGraph.
217
+
218
+ Parameters
219
+ ----------
220
+ repo_path : str
221
+ Absolute or relative path to the root of the repo.
222
+ exclude_dirs : set[str], optional
223
+ Directory names to skip (e.g. {"__pycache__", ".git", "tests"}).
224
+ """
225
+ if exclude_dirs is None:
226
+ exclude_dirs = {"__pycache__", ".git", ".venv", "venv", "env",
227
+ "node_modules", ".mypy_cache", ".pytest_cache", "dist", "build"}
228
+
229
+ abs_root = str(Path(repo_path).resolve())
230
+ kg = KnowledgeGraph(repo_path=repo_path)
231
+
232
+ # Root repo node
233
+ repo_name = Path(abs_root).name
234
+ repo_id = _node_id("repo", "", repo_name)
235
+ kg.add_node(KGNode(
236
+ node_id=repo_id,
237
+ node_type="repo",
238
+ name=repo_name,
239
+ file_path="",
240
+ ))
241
+
242
+ # Walk directory tree
243
+ for dirpath, dirnames, filenames in os.walk(abs_root):
244
+ # Prune excluded dirs in-place (modifies os.walk traversal)
245
+ dirnames[:] = [d for d in dirnames if d not in exclude_dirs]
246
+
247
+ rel_dir = os.path.relpath(dirpath, abs_root)
248
+ if rel_dir == ".":
249
+ rel_dir = ""
250
+
251
+ parent_id = repo_id
252
+ if rel_dir:
253
+ pkg_id = _node_id("package", rel_dir)
254
+ if pkg_id not in kg._nodes:
255
+ kg.add_node(KGNode(
256
+ node_id=pkg_id,
257
+ node_type="package",
258
+ name=Path(rel_dir).name,
259
+ file_path=rel_dir,
260
+ ))
261
+ kg.add_edge(KGEdge("contains", repo_id, pkg_id))
262
+ parent_id = pkg_id
263
+
264
+ for fname in sorted(filenames):
265
+ if not fname.endswith(".py"):
266
+ continue
267
+ rel_file = os.path.join(rel_dir, fname) if rel_dir else fname
268
+ abs_file = os.path.join(dirpath, fname)
269
+ _parse_file(rel_file, abs_file, kg, parent_id)
270
+
271
+ return kg
graphforge/repo_registry.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Registry of training repos with their clone URLs and source paths.
2
+
3
+ Add a new repo by appending to REGISTRY. The pipeline will clone it,
4
+ parse it, and auto-generate tasks from its doctests.
5
+
6
+ Each entry:
7
+ name short identifier used in task_ids
8
+ url git clone URL (depth-1 clone)
9
+ src_hint subdirectory containing the Python package
10
+ (tried as: <clone>/<hint>, <clone>/src/<hint>, <clone>)
11
+ n_tasks max tasks to pull from this repo
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass, field
17
+ from pathlib import Path
18
+
19
+
20
+ @dataclass
21
+ class RepoSpec:
22
+ name: str
23
+ url: str
24
+ src_hint: str
25
+ n_tasks: int = 6
26
+
27
+
28
+ REGISTRY: list[RepoSpec] = [
29
+ # ── string / text ────────────────────────────────────────────────────────
30
+ RepoSpec(
31
+ name="humanize",
32
+ url="https://github.com/jmoiron/humanize.git",
33
+ src_hint="src/humanize",
34
+ n_tasks=6,
35
+ ),
36
+ RepoSpec(
37
+ name="wcwidth",
38
+ url="https://github.com/jquast/wcwidth.git",
39
+ src_hint="wcwidth",
40
+ n_tasks=6,
41
+ ),
42
+ RepoSpec(
43
+ name="inflect",
44
+ url="https://github.com/jaraco/inflect.git",
45
+ src_hint="inflect",
46
+ n_tasks=4,
47
+ ),
48
+
49
+ # ── iteration / functional ───────────────────────────────────────────────
50
+ RepoSpec(
51
+ name="boltons",
52
+ url="https://github.com/mahmoud/boltons.git",
53
+ src_hint="boltons",
54
+ n_tasks=10,
55
+ ),
56
+ RepoSpec(
57
+ name="more-itertools",
58
+ url="https://github.com/more-itertools/more-itertools.git",
59
+ src_hint="more_itertools",
60
+ n_tasks=8,
61
+ ),
62
+ RepoSpec(
63
+ name="toolz",
64
+ url="https://github.com/pytoolz/toolz.git",
65
+ src_hint="toolz",
66
+ n_tasks=6,
67
+ ),
68
+
69
+ # ── data transformation / ETL ────────────────────────────────────────────
70
+ RepoSpec(
71
+ name="petl",
72
+ url="https://github.com/petl-developers/petl.git",
73
+ src_hint="src/petl",
74
+ n_tasks=8,
75
+ ),
76
+ RepoSpec(
77
+ name="pydash",
78
+ url="https://github.com/dgilland/pydash.git",
79
+ src_hint="src/pydash",
80
+ n_tasks=8,
81
+ ),
82
+
83
+ ]
84
+
85
+ # Repos that were evaluated and produced 0 tasks (no literal-eval-able doctests):
86
+ # num2words, parse, dateutil — omitted from REGISTRY
87
+
88
+
89
+ def _find_src(clone_dir: str, hint: str) -> str:
90
+ for candidate in [
91
+ f"{clone_dir}/{hint}",
92
+ f"{clone_dir}/src/{hint}",
93
+ clone_dir,
94
+ ]:
95
+ if Path(candidate).is_dir():
96
+ return candidate
97
+ return clone_dir
98
+
99
+
100
+ def load_all_tasks(
101
+ clone_root: str = "/tmp/train_repos",
102
+ registry: list[RepoSpec] | None = None,
103
+ verbose: bool = True,
104
+ ) -> list:
105
+ """Clone every repo in the registry and return all AutoTask objects.
106
+
107
+ Args:
108
+ clone_root: Directory under which repos are cloned.
109
+ registry: Use a custom registry; defaults to REGISTRY.
110
+ verbose: Print progress.
111
+
112
+ Returns:
113
+ Flat list of AutoTask objects from all repos.
114
+ """
115
+ import subprocess
116
+ from pathlib import Path
117
+ from graphforge.task_generator import generate_tasks
118
+
119
+ specs = registry or REGISTRY
120
+ all_tasks = []
121
+ Path(clone_root).mkdir(parents=True, exist_ok=True)
122
+
123
+ for spec in specs:
124
+ clone_dir = str(Path(clone_root) / spec.name)
125
+ if not Path(clone_dir).exists():
126
+ if verbose:
127
+ print(f"Cloning {spec.name} ...")
128
+ subprocess.check_call(
129
+ ["git", "clone", "--depth", "1", "-q", spec.url, clone_dir]
130
+ )
131
+
132
+ src = _find_src(clone_dir, spec.src_hint)
133
+ try:
134
+ kg, tasks = generate_tasks(src, n_tasks=spec.n_tasks)
135
+ all_tasks.extend(tasks)
136
+ if verbose:
137
+ print(f" {spec.name}: {len(tasks)} tasks "
138
+ f"(DAG {len(kg._nodes)} nodes)")
139
+ except Exception as exc:
140
+ if verbose:
141
+ print(f" {spec.name}: SKIPPED — {exc}")
142
+
143
+ if verbose:
144
+ print(f"\nTotal auto-tasks: {len(all_tasks)}")
145
+ return all_tasks
graphforge/reward/__init__.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reward engine — see :mod:`graphforge.reward.engine`.
2
+
3
+ Per-turn (dense, small) and terminal (sparse, large) reward computation
4
+ following PROPOSAL.md §5.
5
+ """
6
+
7
+ from graphforge.reward.engine import (
8
+ ActionOutcome,
9
+ ALL_BEHAVIORAL_BONUS,
10
+ ALL_STRUCTURAL_BONUS,
11
+ ALPHA_TOKEN_COST,
12
+ BEHAVIORAL_PER_PASS,
13
+ DUPLICATE_ACTION,
14
+ MATERIALIZE_FAIL_PENALTY,
15
+ MUTATION_FAIL,
16
+ PER_TURN_COST,
17
+ SCHEMA_REJECTION,
18
+ STRUCTURAL_PER_SAT,
19
+ TYPE_CHECK_BONUS,
20
+ TOKEN_EFFICIENCY_MAX,
21
+ TerminalReward,
22
+ TurnReward,
23
+ score_terminal,
24
+ score_turn,
25
+ )
26
+
27
+ __all__ = [
28
+ "ALPHA_TOKEN_COST",
29
+ "ALL_BEHAVIORAL_BONUS",
30
+ "ALL_STRUCTURAL_BONUS",
31
+ "ActionOutcome",
32
+ "BEHAVIORAL_PER_PASS",
33
+ "DUPLICATE_ACTION",
34
+ "MATERIALIZE_FAIL_PENALTY",
35
+ "MUTATION_FAIL",
36
+ "PER_TURN_COST",
37
+ "SCHEMA_REJECTION",
38
+ "STRUCTURAL_PER_SAT",
39
+ "TOKEN_EFFICIENCY_MAX",
40
+ "TYPE_CHECK_BONUS",
41
+ "TerminalReward",
42
+ "TurnReward",
43
+ "score_terminal",
44
+ "score_turn",
45
+ ]
graphforge/reward/engine.py ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Reward engine — per-turn (dense, small) and terminal (sparse, large).
2
+
3
+ Implementation follows PROPOSAL.md §5 verbatim. The two halves are pure
4
+ functions over lightweight envelopes so the server can call them without
5
+ threading state through the reward module.
6
+
7
+ Decisions worth flagging:
8
+
9
+ * ``All-behavioral-passing`` bonus is awarded only when there is at least
10
+ one behavioral test. The gate for the token-efficiency bonus, however,
11
+ treats zero behavioral tests as vacuously satisfied (so a tier-0 task
12
+ with no behavioral tests can still earn token-efficiency reward).
13
+ * ``type_checks_ok`` is tri-state: ``True`` / ``False`` / ``None``. ``None``
14
+ means the type-check gate didn't run (e.g. mypy isn't wired yet); the
15
+ +3 bonus is suppressed in that case.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from dataclasses import dataclass, field
21
+ from enum import Enum
22
+
23
+ # Coefficients (PROPOSAL.md §5.1). Override at call time if you want.
24
+ ALPHA_TOKEN_COST: float = 0.0008
25
+ PER_TURN_COST: float = -0.1
26
+ MUTATION_FAIL: float = -2.0
27
+ SCHEMA_REJECTION: float = -2.0
28
+ DUPLICATE_ACTION: float = -1.0
29
+
30
+ # Terminal magnitudes (§5.2)
31
+ STRUCTURAL_PER_SAT: float = 1.0
32
+ BEHAVIORAL_PER_PASS: float = 3.0
33
+ ALL_STRUCTURAL_BONUS: float = 5.0
34
+ ALL_BEHAVIORAL_BONUS: float = 5.0
35
+ TYPE_CHECK_BONUS: float = 3.0
36
+ MATERIALIZE_FAIL_PENALTY: float = -8.0
37
+ TOKEN_EFFICIENCY_MAX: float = 5.0
38
+
39
+
40
+ # ---- per-turn -------------------------------------------------------
41
+
42
+
43
+ class ActionOutcome(str, Enum):
44
+ """Coarse classification used by ``score_turn``.
45
+
46
+ ``SUCCESS`` — mutation or info action returned ``ok=True``.
47
+ ``FAILURE`` — handler raised :class:`ActionError` (rollback path).
48
+ ``MALFORMED`` — pydantic schema rejected the action at parse time.
49
+ """
50
+
51
+ SUCCESS = "success"
52
+ FAILURE = "failure"
53
+ MALFORMED = "malformed"
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class TurnReward:
58
+ base: float # outcome-dependent component
59
+ duplicate: float # 0 or DUPLICATE_ACTION
60
+ per_turn: float # PER_TURN_COST
61
+ token_cost: float # alpha * tokens_returned, negated
62
+
63
+ @property
64
+ def total(self) -> float:
65
+ return self.base + self.duplicate + self.per_turn + self.token_cost
66
+
67
+ def to_dict(self) -> dict[str, float]:
68
+ return {
69
+ "base": self.base,
70
+ "duplicate": self.duplicate,
71
+ "per_turn": self.per_turn,
72
+ "token_cost": self.token_cost,
73
+ "total": self.total,
74
+ }
75
+
76
+
77
+ def score_turn(
78
+ *,
79
+ outcome: ActionOutcome,
80
+ is_duplicate: bool,
81
+ tokens_returned: int,
82
+ alpha: float = ALPHA_TOKEN_COST,
83
+ per_turn_cost: float = PER_TURN_COST,
84
+ ) -> TurnReward:
85
+ if outcome is ActionOutcome.SUCCESS:
86
+ base = 0.0
87
+ elif outcome is ActionOutcome.FAILURE:
88
+ base = MUTATION_FAIL
89
+ else: # MALFORMED
90
+ base = SCHEMA_REJECTION
91
+ return TurnReward(
92
+ base=base,
93
+ duplicate=DUPLICATE_ACTION if is_duplicate else 0.0,
94
+ per_turn=per_turn_cost,
95
+ token_cost=-alpha * max(0, tokens_returned),
96
+ )
97
+
98
+
99
+ # ---- terminal -------------------------------------------------------
100
+
101
+
102
+ @dataclass(frozen=True)
103
+ class TerminalReward:
104
+ structural: float # +1 per structural constraint satisfied
105
+ behavioral: float # +3 per behavioral test passing
106
+ bonus_all_structural: float
107
+ bonus_all_behavioral: float
108
+ bonus_type_checks: float
109
+ penalty_materialize: float # 0 or MATERIALIZE_FAIL_PENALTY
110
+ efficiency: float # gated by all-structural AND all-behavioral
111
+
112
+ components: dict[str, object] = field(default_factory=dict)
113
+
114
+ @property
115
+ def total(self) -> float:
116
+ return (
117
+ self.structural
118
+ + self.behavioral
119
+ + self.bonus_all_structural
120
+ + self.bonus_all_behavioral
121
+ + self.bonus_type_checks
122
+ + self.penalty_materialize
123
+ + self.efficiency
124
+ )
125
+
126
+ def to_dict(self) -> dict[str, object]:
127
+ return {
128
+ "structural": self.structural,
129
+ "behavioral": self.behavioral,
130
+ "bonus_all_structural": self.bonus_all_structural,
131
+ "bonus_all_behavioral": self.bonus_all_behavioral,
132
+ "bonus_type_checks": self.bonus_type_checks,
133
+ "penalty_materialize": self.penalty_materialize,
134
+ "efficiency": self.efficiency,
135
+ "total": self.total,
136
+ "components": self.components,
137
+ }
138
+
139
+
140
+ def score_terminal(
141
+ *,
142
+ n_structural_satisfied: int,
143
+ n_structural_total: int,
144
+ n_behavioral_passing: int,
145
+ n_behavioral_total: int,
146
+ materialization_ok: bool,
147
+ type_checks_ok: bool | None,
148
+ tokens_used: int,
149
+ budget: int,
150
+ ) -> TerminalReward:
151
+ if n_structural_satisfied < 0 or n_structural_total < 0:
152
+ raise ValueError("structural counts must be non-negative")
153
+ if n_behavioral_passing < 0 or n_behavioral_total < 0:
154
+ raise ValueError("behavioral counts must be non-negative")
155
+ if budget <= 0:
156
+ raise ValueError("budget must be positive")
157
+
158
+ structural = STRUCTURAL_PER_SAT * n_structural_satisfied
159
+ behavioral = BEHAVIORAL_PER_PASS * n_behavioral_passing
160
+
161
+ all_structural = (
162
+ n_structural_total > 0 and n_structural_satisfied == n_structural_total
163
+ )
164
+ all_behavioral_present_and_passing = (
165
+ n_behavioral_total > 0 and n_behavioral_passing == n_behavioral_total
166
+ )
167
+ bonus_all_structural = ALL_STRUCTURAL_BONUS if all_structural else 0.0
168
+ bonus_all_behavioral = (
169
+ ALL_BEHAVIORAL_BONUS if all_behavioral_present_and_passing else 0.0
170
+ )
171
+
172
+ if type_checks_ok is True:
173
+ bonus_type_checks = TYPE_CHECK_BONUS
174
+ else:
175
+ bonus_type_checks = 0.0
176
+
177
+ penalty_materialize = (
178
+ 0.0 if materialization_ok else MATERIALIZE_FAIL_PENALTY
179
+ )
180
+
181
+ # Efficiency bonus is gated on all-structural AND all-behavioral satisfied.
182
+ # When n_behavioral_total == 0 the behavioral half is vacuously satisfied
183
+ # for the gate's purposes (otherwise tier-0 tasks could never earn it).
184
+ behavioral_gate_ok = (
185
+ n_behavioral_total == 0
186
+ or n_behavioral_passing == n_behavioral_total
187
+ )
188
+ efficiency = 0.0
189
+ if all_structural and behavioral_gate_ok:
190
+ ratio = max(0.0, (budget - tokens_used) / budget)
191
+ efficiency = TOKEN_EFFICIENCY_MAX * ratio
192
+
193
+ return TerminalReward(
194
+ structural=structural,
195
+ behavioral=behavioral,
196
+ bonus_all_structural=bonus_all_structural,
197
+ bonus_all_behavioral=bonus_all_behavioral,
198
+ bonus_type_checks=bonus_type_checks,
199
+ penalty_materialize=penalty_materialize,
200
+ efficiency=efficiency,
201
+ components={
202
+ "n_structural_satisfied": n_structural_satisfied,
203
+ "n_structural_total": n_structural_total,
204
+ "n_behavioral_passing": n_behavioral_passing,
205
+ "n_behavioral_total": n_behavioral_total,
206
+ "materialization_ok": materialization_ok,
207
+ "type_checks_ok": type_checks_ok,
208
+ "tokens_used": tokens_used,
209
+ "budget": budget,
210
+ },
211
+ )
graphforge/sample_repos/humanize/__init__.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Humanize — convert numbers, file sizes, and times to human-readable strings."""
2
+ from graphforge.sample_repos.humanize.filesize import naturalsize
3
+ from graphforge.sample_repos.humanize.number import (
4
+ apnumber,
5
+ clamp,
6
+ fractional,
7
+ intcomma,
8
+ intword,
9
+ ordinal,
10
+ scientific,
11
+ )
12
+ from graphforge.sample_repos.humanize.time import (
13
+ naturaldate,
14
+ naturalday,
15
+ naturaldelta,
16
+ naturaltime,
17
+ precisedelta,
18
+ )
graphforge/sample_repos/humanize/filesize.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Bits and bytes related humanization."""
2
+
3
+ suffixes = {
4
+ "decimal": ("kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"),
5
+ "binary": ("KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"),
6
+ "gnu": "KMGTPEZY",
7
+ }
8
+
9
+
10
+ def naturalsize(value, binary=False, gnu=False, format="%.1f"):
11
+ """Format a number of bytes as a human-readable file size (e.g. 10 kB).
12
+
13
+ By default, decimal suffixes (kB, MB) are used.
14
+
15
+ Examples:
16
+ >>> naturalsize(3000000)
17
+ '3.0 MB'
18
+ >>> naturalsize(300, False, True)
19
+ '300B'
20
+ >>> naturalsize(3000, True)
21
+ '2.9 KiB'
22
+ """
23
+ if gnu:
24
+ suffix = suffixes["gnu"]
25
+ elif binary:
26
+ suffix = suffixes["binary"]
27
+ else:
28
+ suffix = suffixes["decimal"]
29
+
30
+ base = 1024 if (gnu or binary) else 1000
31
+ bytes_ = float(value)
32
+ abs_bytes = abs(bytes_)
33
+
34
+ if abs_bytes == 1 and not gnu:
35
+ return "%d Byte" % bytes_
36
+ elif abs_bytes < base and not gnu:
37
+ return "%d Bytes" % bytes_
38
+ elif abs_bytes < base and gnu:
39
+ return "%dB" % bytes_
40
+
41
+ for i, s in enumerate(suffix):
42
+ unit = base ** (i + 2)
43
+ if abs_bytes < unit and not gnu:
44
+ return (format + " %s") % ((base * bytes_ / unit), s)
45
+ elif abs_bytes < unit and gnu:
46
+ return (format + "%s") % ((base * bytes_ / unit), s)
47
+ if gnu:
48
+ return (format + "%s") % ((base * bytes_ / unit), s)
49
+ return (format + " %s") % ((base * bytes_ / unit), s)
graphforge/sample_repos/humanize/number.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Humanizing functions for numbers."""
2
+
3
+ import math
4
+ import re
5
+ from fractions import Fraction
6
+
7
+ powers = [10**x for x in (3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 100)]
8
+ human_powers = (
9
+ "thousand", "million", "billion", "trillion", "quadrillion",
10
+ "quintillion", "sextillion", "septillion", "octillion",
11
+ "nonillion", "decillion", "googol",
12
+ )
13
+
14
+
15
+ def ordinal(value):
16
+ """Convert an integer to its ordinal string (1 → '1st', 2 → '2nd', etc.).
17
+
18
+ Examples:
19
+ >>> ordinal(1)
20
+ '1st'
21
+ >>> ordinal(12)
22
+ '12th'
23
+ >>> ordinal(103)
24
+ '103rd'
25
+ """
26
+ try:
27
+ value = int(value)
28
+ except (TypeError, ValueError):
29
+ return value
30
+ t = ("th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th")
31
+ if value % 100 in (11, 12, 13):
32
+ return f"{value}th"
33
+ return f"{value}{t[value % 10]}"
34
+
35
+
36
+ def intcomma(value, ndigits=None):
37
+ """Convert an integer to a string with commas every three digits.
38
+
39
+ Examples:
40
+ >>> intcomma(1000000)
41
+ '1,000,000'
42
+ >>> intcomma(1234567.25)
43
+ '1,234,567.25'
44
+ """
45
+ try:
46
+ if isinstance(value, str):
47
+ float(value.replace(",", ""))
48
+ else:
49
+ float(value)
50
+ except (TypeError, ValueError):
51
+ return value
52
+
53
+ if ndigits:
54
+ orig = "{0:.{1}f}".format(value, ndigits)
55
+ else:
56
+ orig = str(value)
57
+
58
+ new = re.sub(r"^(-?\d+)(\d{3})", r"\g<1>,\g<2>", orig)
59
+ if orig == new:
60
+ return new
61
+ return intcomma(new)
62
+
63
+
64
+ def intword(value, format="%.1f"):
65
+ """Convert a large integer to a friendly text representation.
66
+
67
+ Examples:
68
+ >>> intword(1000000)
69
+ '1.0 million'
70
+ >>> intword(1200000000)
71
+ '1.2 billion'
72
+ """
73
+ try:
74
+ value = int(value)
75
+ except (TypeError, ValueError):
76
+ return value
77
+ if value < powers[0]:
78
+ return str(value)
79
+ for ordinal_idx, power in enumerate(powers[1:], 1):
80
+ if value < power:
81
+ chopped = value / float(powers[ordinal_idx - 1])
82
+ count = math.ceil(chopped)
83
+ label = human_powers[ordinal_idx - 1]
84
+ plural = label + "s" if count != 1 else label
85
+ if float(format % chopped) == float(10**3):
86
+ chopped = value / float(powers[ordinal_idx])
87
+ count = math.ceil(chopped)
88
+ label = human_powers[ordinal_idx]
89
+ plural = label + "s" if count != 1 else label
90
+ return (format + " %s") % (chopped, plural)
91
+ return (format + " %s") % (chopped, plural)
92
+ return str(value)
93
+
94
+
95
+ def apnumber(value):
96
+ """Convert integers 0–9 to their AP-style word equivalents.
97
+
98
+ Examples:
99
+ >>> apnumber(5)
100
+ 'five'
101
+ >>> apnumber(10)
102
+ '10'
103
+ """
104
+ words = ("zero", "one", "two", "three", "four",
105
+ "five", "six", "seven", "eight", "nine")
106
+ try:
107
+ value = int(value)
108
+ except (TypeError, ValueError):
109
+ return value
110
+ if not 0 <= value < 10:
111
+ return str(value)
112
+ return words[value]
113
+
114
+
115
+ def fractional(value):
116
+ """Convert a float to a human-readable fractional string.
117
+
118
+ Examples:
119
+ >>> fractional(0.3)
120
+ '3/10'
121
+ >>> fractional(1.3)
122
+ '1 3/10'
123
+ >>> fractional(1)
124
+ '1'
125
+ """
126
+ try:
127
+ number = float(value)
128
+ except (TypeError, ValueError):
129
+ return value
130
+ whole = int(number)
131
+ frac = Fraction(number - whole).limit_denominator(1000)
132
+ n, d = frac.numerator, frac.denominator
133
+ if whole and not n and d == 1:
134
+ return f"{whole:.0f}"
135
+ elif not whole:
136
+ return f"{n:.0f}/{d:.0f}"
137
+ return f"{whole:.0f} {n:.0f}/{d:.0f}"
138
+
139
+
140
+ def scientific(value, precision=2):
141
+ """Return a number in scientific notation (e.g. 5.00 x 10²).
142
+
143
+ Examples:
144
+ >>> scientific(500)
145
+ '5.00 x 10²'
146
+ >>> scientific(0.3)
147
+ '3.00 x 10⁻¹'
148
+ """
149
+ exponents = {
150
+ "0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴",
151
+ "5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹",
152
+ "+": "⁺", "-": "⁻",
153
+ }
154
+ negative = False
155
+ try:
156
+ if "-" in str(value):
157
+ value = str(value).replace("-", "")
158
+ negative = True
159
+ if isinstance(value, str):
160
+ value = float(value)
161
+ fmt = "{:.%se}" % str(int(precision))
162
+ n = fmt.format(value)
163
+ except (ValueError, TypeError):
164
+ return value
165
+ part1, part2 = n.split("e")
166
+ part2 = part2.replace("-0", "-").replace("+0", "")
167
+ new_part2 = []
168
+ if negative:
169
+ new_part2.append(exponents["-"])
170
+ for char in part2:
171
+ new_part2.append(exponents[char])
172
+ return part1 + " x 10" + "".join(new_part2)
173
+
174
+
175
+ def clamp(value, format="{:}", floor=None, ceil=None, floor_token="<", ceil_token=">"):
176
+ """Return a number formatted and clamped between floor and ceil.
177
+
178
+ Examples:
179
+ >>> clamp(123.456)
180
+ '123.456'
181
+ >>> clamp(0.001, floor=0.01)
182
+ '<0.01'
183
+ >>> clamp(999, ceil=100)
184
+ '>100'
185
+ """
186
+ if value is None:
187
+ return None
188
+ if floor is not None and value < floor:
189
+ value, token = floor, floor_token
190
+ elif ceil is not None and value > ceil:
191
+ value, token = ceil, ceil_token
192
+ else:
193
+ token = ""
194
+ if isinstance(format, str):
195
+ return token + format.format(value)
196
+ elif callable(format):
197
+ return token + format(value)
198
+ raise ValueError("format must be a string or callable")
graphforge/sample_repos/humanize/time.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Time humanizing functions."""
2
+
3
+ import datetime as dt
4
+ import math
5
+ from enum import Enum
6
+ from functools import total_ordering
7
+
8
+
9
+ @total_ordering
10
+ class Unit(Enum):
11
+ MICROSECONDS = 0
12
+ MILLISECONDS = 1
13
+ SECONDS = 2
14
+ MINUTES = 3
15
+ HOURS = 4
16
+ DAYS = 5
17
+ MONTHS = 6
18
+ YEARS = 7
19
+
20
+ def __lt__(self, other):
21
+ if self.__class__ is other.__class__:
22
+ return self.value < other.value
23
+ return NotImplemented
24
+
25
+
26
+ def _now():
27
+ return dt.datetime.now()
28
+
29
+
30
+ def _abs_timedelta(delta):
31
+ if delta.days < 0:
32
+ now = _now()
33
+ return now - (now + delta)
34
+ return delta
35
+
36
+
37
+ def _date_and_delta(value, *, now=None):
38
+ if not now:
39
+ now = _now()
40
+ if isinstance(value, dt.datetime):
41
+ date = value
42
+ delta = now - value
43
+ elif isinstance(value, dt.timedelta):
44
+ date = now - value
45
+ delta = value
46
+ else:
47
+ try:
48
+ value = int(value)
49
+ delta = dt.timedelta(seconds=value)
50
+ date = now - delta
51
+ except (ValueError, TypeError):
52
+ return None, value
53
+ return date, _abs_timedelta(delta)
54
+
55
+
56
+ def naturaldelta(value, months=True, minimum_unit="seconds") -> str:
57
+ """Return a natural representation of a timedelta or number of seconds.
58
+
59
+ Does not include tense (use naturaltime for past/future).
60
+
61
+ Examples:
62
+ >>> import datetime as dt
63
+ >>> naturaldelta(dt.timedelta(seconds=90))
64
+ 'a minute'
65
+ >>> naturaldelta(dt.timedelta(hours=2))
66
+ '2 hours'
67
+ >>> naturaldelta(dt.timedelta(days=400))
68
+ 'a year'
69
+ """
70
+ tmp = Unit[minimum_unit.upper()]
71
+ if tmp not in (Unit.SECONDS, Unit.MILLISECONDS, Unit.MICROSECONDS):
72
+ raise ValueError(f"Minimum unit '{minimum_unit}' not supported")
73
+ minimum_unit = tmp
74
+
75
+ if isinstance(value, dt.timedelta):
76
+ delta = value
77
+ else:
78
+ try:
79
+ value = int(value)
80
+ delta = dt.timedelta(seconds=value)
81
+ except (ValueError, TypeError):
82
+ return value
83
+
84
+ seconds = abs(delta.seconds)
85
+ days = abs(delta.days)
86
+ years = days // 365
87
+ days = days % 365
88
+ months_count = int(days // 30.5)
89
+
90
+ if not years and days < 1:
91
+ if seconds == 0:
92
+ return "a moment"
93
+ elif seconds == 1:
94
+ return "a second"
95
+ elif seconds < 60:
96
+ return f"{seconds} seconds" if seconds > 1 else "a second"
97
+ elif 60 <= seconds < 120:
98
+ return "a minute"
99
+ elif 120 <= seconds < 3600:
100
+ minutes = seconds // 60
101
+ return f"{minutes} minutes"
102
+ elif 3600 <= seconds < 7200:
103
+ return "an hour"
104
+ else:
105
+ hours = seconds // 3600
106
+ return f"{hours} hours"
107
+ elif years == 0:
108
+ if days == 1:
109
+ return "a day"
110
+ if not months or not months_count:
111
+ return f"{days} days"
112
+ elif months_count == 1:
113
+ return "a month"
114
+ return f"{months_count} months"
115
+ elif years == 1:
116
+ if not months_count and not days:
117
+ return "a year"
118
+ elif not months_count:
119
+ return f"1 year, {days} days" if days > 1 else "1 year, a day"
120
+ elif months_count == 1:
121
+ return "1 year, 1 month"
122
+ return f"1 year, {months_count} months"
123
+ return f"{years} years"
124
+
125
+
126
+ def naturaltime(value, future=False, months=True, minimum_unit="seconds", when=None) -> str:
127
+ """Return a natural representation of a time relative to now.
128
+
129
+ Examples:
130
+ >>> import datetime as dt
131
+ >>> naturaltime(dt.timedelta(seconds=30))
132
+ '30 seconds ago'
133
+ >>> naturaltime(dt.timedelta(hours=1), future=True)
134
+ 'an hour from now'
135
+ """
136
+ now = when or _now()
137
+ date, delta = _date_and_delta(value, now=now)
138
+ if date is None:
139
+ return value
140
+ if isinstance(value, (dt.datetime, dt.timedelta)):
141
+ future = date > now
142
+ ago = "%s from now" if future else "%s ago"
143
+ delta_str = naturaldelta(delta, months, minimum_unit)
144
+ if delta_str == "a moment":
145
+ return "now"
146
+ return ago % delta_str
147
+
148
+
149
+ def naturalday(value, format="%b %d") -> str:
150
+ """Return 'today', 'tomorrow', 'yesterday', or a formatted date string.
151
+
152
+ Examples:
153
+ >>> import datetime as dt
154
+ >>> naturalday(dt.date.today())
155
+ 'today'
156
+ """
157
+ try:
158
+ value = dt.date(value.year, value.month, value.day)
159
+ except (AttributeError, OverflowError, ValueError):
160
+ return value
161
+ delta = value - dt.date.today()
162
+ if delta.days == 0:
163
+ return "today"
164
+ elif delta.days == 1:
165
+ return "tomorrow"
166
+ elif delta.days == -1:
167
+ return "yesterday"
168
+ return value.strftime(format)
169
+
170
+
171
+ def naturaldate(value) -> str:
172
+ """Like naturalday, but appends year for dates more than ~5 months away."""
173
+ try:
174
+ value = dt.date(value.year, value.month, value.day)
175
+ except (AttributeError, OverflowError, ValueError):
176
+ return value
177
+ delta = _abs_timedelta(value - dt.date.today())
178
+ if delta.days >= 5 * 365 / 12:
179
+ return naturalday(value, "%b %d %Y")
180
+ return naturalday(value)
181
+
182
+
183
+ def precisedelta(value, minimum_unit="seconds", suppress=(), format="%0.2f") -> str:
184
+ """Return a precise, human-readable representation of a timedelta.
185
+
186
+ Examples:
187
+ >>> import datetime as dt
188
+ >>> precisedelta(dt.timedelta(seconds=3633, days=2))
189
+ '2 days and 1 hour and 33 seconds'
190
+ """
191
+ date, delta = _date_and_delta(value)
192
+ if date is None:
193
+ return value
194
+
195
+ suppress_units = {Unit[s.upper()] for s in suppress}
196
+ min_unit = Unit[minimum_unit.upper()]
197
+
198
+ days = delta.days
199
+ secs = delta.seconds
200
+
201
+ years, days = divmod(days, 365)
202
+ months_count = int(days // 30.5)
203
+ days = days % 30
204
+
205
+ hours, secs = divmod(secs, 3600)
206
+ minutes, secs = divmod(secs, 60)
207
+
208
+ parts = []
209
+ for count, singular, plural in [
210
+ (years, "year", "years"),
211
+ (months_count, "month", "months"),
212
+ (days, "day", "days"),
213
+ (hours, "hour", "hours"),
214
+ (minutes, "minute", "minutes"),
215
+ (secs, "second", "seconds"),
216
+ ]:
217
+ if count > 0:
218
+ label = singular if count == 1 else plural
219
+ parts.append(f"{count} {label}")
220
+
221
+ if not parts:
222
+ return "0 seconds"
223
+ if len(parts) == 1:
224
+ return parts[0]
225
+ return " and ".join(parts)
graphforge/sample_repos/task_manager/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Task Manager — a small synthetic package used as the training repo."""
graphforge/sample_repos/task_manager/api.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """High-level API layer that wires models, storage, and validators together."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from graphforge.sample_repos.task_manager.models import Task
6
+ from graphforge.sample_repos.task_manager.storage import TaskStore
7
+ from graphforge.sample_repos.task_manager.validators import validate_priority, validate_tags, validate_title
8
+
9
+ _store = TaskStore()
10
+
11
+
12
+ def create_task(
13
+ title: str,
14
+ priority: str = "medium",
15
+ tags: list[str] | None = None,
16
+ ) -> Task:
17
+ """Create and persist a new task.
18
+
19
+ Raises ValueError if title or tags are invalid.
20
+ """
21
+ if not validate_title(title):
22
+ raise ValueError(f"Invalid title: {title!r}")
23
+ resolved_tags = tags or []
24
+ if not validate_tags(resolved_tags):
25
+ raise ValueError(f"Invalid tags: {resolved_tags!r}")
26
+ task = Task(title=title, priority=priority, tags=resolved_tags)
27
+ _store.add(task)
28
+ return task
29
+
30
+
31
+ def get_all_tasks() -> list[Task]:
32
+ """Return every task in the store."""
33
+ return _store.all()
34
+
35
+
36
+ def complete_task(title: str) -> bool:
37
+ """Mark a task done by title. Returns True if found, False otherwise."""
38
+ task = _store.find_by_title(title)
39
+ if task:
40
+ task.complete()
41
+ return True
42
+ return False
43
+
44
+
45
+ def reset_store() -> None:
46
+ """Clear the store — used by tests between runs."""
47
+ global _store
48
+ _store = TaskStore()
graphforge/sample_repos/task_manager/models.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Domain models for the task manager."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from datetime import date
6
+ from typing import Optional
7
+
8
+
9
+ class Task:
10
+ """A single task in the task manager."""
11
+
12
+ def __init__(
13
+ self,
14
+ title: str,
15
+ priority: str,
16
+ tags: list[str],
17
+ due_date: Optional[date] = None,
18
+ ) -> None:
19
+ self.title = title
20
+ self.priority = priority # expected: "low" | "medium" | "high"
21
+ self.tags = tags
22
+ self.due_date = due_date
23
+ self.done = False
24
+
25
+ def complete(self) -> None:
26
+ """Mark this task as done."""
27
+ self.done = True
28
+
29
+ def to_dict(self) -> dict:
30
+ return {
31
+ "title": self.title,
32
+ "priority": self.priority,
33
+ "tags": self.tags,
34
+ "done": self.done,
35
+ "due_date": str(self.due_date) if self.due_date else None,
36
+ }
37
+
38
+
39
+ class User:
40
+ """A user who owns tasks."""
41
+
42
+ def __init__(self, username: str, email: str) -> None:
43
+ self.username = username
44
+ self.email = email
45
+
46
+ def display(self) -> str:
47
+ return f"{self.username} <{self.email}>"
graphforge/sample_repos/task_manager/storage.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """In-memory task storage."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ from graphforge.sample_repos.task_manager.models import Task
8
+
9
+
10
+ class TaskStore:
11
+ """Simple in-memory list-backed store for Task objects."""
12
+
13
+ def __init__(self) -> None:
14
+ self._tasks: list[Task] = []
15
+
16
+ def add(self, task: Task) -> None:
17
+ """Append task to the store."""
18
+ self._tasks.append(task)
19
+
20
+ def all(self) -> list[Task]:
21
+ """Return all tasks."""
22
+ return list(self._tasks)
23
+
24
+ def find_by_title(self, title: str) -> Optional[Task]:
25
+ """Return the first task whose title matches, or None."""
26
+ for t in self._tasks:
27
+ if t.title == title:
28
+ return t
29
+ return None
30
+
31
+ def find_done(self) -> list[Task]:
32
+ """Return all completed tasks."""
33
+ return [t for t in self._tasks if t.done]
34
+
35
+ def find_pending(self) -> list[Task]:
36
+ """Return all incomplete tasks."""
37
+ return [t for t in self._tasks if not t.done]
graphforge/sample_repos/task_manager/validators.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Input validation functions for the task manager."""
2
+
3
+ from __future__ import annotations
4
+
5
+ VALID_PRIORITIES = {"low", "medium", "high"}
6
+
7
+
8
+ def validate_title(title: str) -> bool:
9
+ """Return True if title is a non-empty string <= 200 chars."""
10
+ return isinstance(title, str) and 0 < len(title) <= 200
11
+
12
+
13
+ def validate_tags(tags: object) -> bool:
14
+ """Return True if tags is a list of strings."""
15
+ return isinstance(tags, list) and all(isinstance(t, str) for t in tags)
16
+
17
+
18
+ def validate_email(email: str) -> bool:
19
+ """Return True if email looks like a valid address (contains @ and .)."""
20
+ return isinstance(email, str) and "@" in email and "." in email.split("@")[-1]
21
+
22
+
23
+ def validate_priority(priority: str) -> bool:
24
+ """Return True if priority is one of 'low', 'medium', or 'high'."""
25
+ return priority in VALID_PRIORITIES
graphforge/server/__init__.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI OpenEnv server.
2
+
3
+ Endpoints (PROPOSAL.md §6.1):
4
+
5
+ POST /reset -> create a fresh episode, return initial observation
6
+ POST /step -> apply an Action, return (observation, reward, done, info)
7
+ GET /state -> snapshot the current episode state for debugging
8
+ POST /close -> tear down the episode
9
+
10
+ The server is a thin shell: it owns episode state (graph, task spec,
11
+ action history, token counter, turn counter, materialization cache) and
12
+ delegates the work to the dispatcher, reward engine, and validators.
13
+
14
+ The training-side OpenEnv client calls this over HTTP at localhost:8000.
15
+ """
16
+
17
+ from graphforge.server.app import app
18
+
19
+ __all__ = ["app"]
graphforge/server/app.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI application — the OpenEnv server.
2
+
3
+ Endpoints (PROPOSAL.md §6.1):
4
+
5
+ POST /reset { task_id?: str | None, seed?: int }
6
+ -> { episode_id, observation }
7
+ POST /step { episode_id, action: Action }
8
+ -> { observation, reward, done, info }
9
+ GET /state?episode_id=...
10
+ -> { ... full snapshot ... }
11
+ POST /close { episode_id }
12
+ -> { closed: bool }
13
+
14
+ The handlers are thin: routing, request validation, episode lookup. The
15
+ actual per-step orchestration lives in :mod:`graphforge.server.runner`.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from typing import Any, Optional
21
+
22
+ from fastapi import FastAPI, HTTPException
23
+ from pydantic import BaseModel
24
+
25
+ from graphforge.actions.schema import Action
26
+ from graphforge.server.episode import GLOBAL_STORE, Episode, EpisodeStore
27
+ from graphforge.server.runner import step as runner_step
28
+ from graphforge.tasks import default_task, get_task
29
+
30
+ app = FastAPI(
31
+ title="GraphForge OpenEnv server",
32
+ version="0.0.1",
33
+ description="See graphforge.server for the wire shape.",
34
+ )
35
+
36
+
37
+ # ---- request / response models --------------------------------------
38
+
39
+
40
+ class ResetRequest(BaseModel):
41
+ task_id: Optional[str] = None
42
+ seed: Optional[int] = None # reserved for variant generation, unused for tier-0
43
+
44
+
45
+ class StepRequest(BaseModel):
46
+ episode_id: str
47
+ # ``Action`` is itself an Annotated discriminated union; no need to
48
+ # re-declare the discriminator on this field.
49
+ action: Action
50
+
51
+
52
+ class CloseRequest(BaseModel):
53
+ episode_id: str
54
+
55
+
56
+ # ---- store wiring (overridable for tests) ---------------------------
57
+
58
+
59
+ def _store() -> EpisodeStore:
60
+ return GLOBAL_STORE
61
+
62
+
63
+ # ---- helpers --------------------------------------------------------
64
+
65
+
66
+ def _require_episode(episode_id: str) -> Episode:
67
+ ep = _store().get(episode_id)
68
+ if ep is None:
69
+ raise HTTPException(status_code=404, detail=f"unknown episode_id: {episode_id!r}")
70
+ return ep
71
+
72
+
73
+ def _initial_observation(ep: Episode) -> dict[str, Any]:
74
+ return {
75
+ "episode_id": ep.id,
76
+ "task": ep.task.visible_payload(),
77
+ "turns_total": 0,
78
+ "tokens_used_total": 0,
79
+ "budget": ep.task.budget,
80
+ "episode_cap": ep.task.episode_cap,
81
+ }
82
+
83
+
84
+ # ---- endpoints ------------------------------------------------------
85
+
86
+
87
+ @app.post("/reset")
88
+ def reset(req: ResetRequest) -> dict:
89
+ if req.task_id is None:
90
+ task = default_task()
91
+ else:
92
+ t = get_task(req.task_id)
93
+ if t is None:
94
+ raise HTTPException(status_code=404, detail=f"unknown task_id: {req.task_id!r}")
95
+ task = t
96
+ ep = Episode.new(task=task)
97
+ _store().put(ep)
98
+ return {
99
+ "episode_id": ep.id,
100
+ "observation": _initial_observation(ep),
101
+ }
102
+
103
+
104
+ @app.post("/step")
105
+ def step(req: StepRequest) -> dict:
106
+ ep = _require_episode(req.episode_id)
107
+ return runner_step(ep, req.action)
108
+
109
+
110
+ @app.get("/state")
111
+ def state(episode_id: str) -> dict:
112
+ ep = _require_episode(episode_id)
113
+ return ep.state_snapshot()
114
+
115
+
116
+ @app.post("/close")
117
+ def close(req: CloseRequest) -> dict:
118
+ closed = _store().drop(req.episode_id)
119
+ return {"closed": closed}
120
+
121
+
122
+ @app.get("/healthz")
123
+ def healthz() -> dict:
124
+ return {"status": "ok", "version": app.version}
graphforge/server/episode.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Episode state — one per active OpenEnv session.
2
+
3
+ The server holds episodes in an in-memory dict keyed by ``episode_id``.
4
+ Episodes are entirely self-contained: they own a :class:`Graph`, a
5
+ :class:`Task`, and the running history. There is no leakage between
6
+ episodes (PROPOSAL.md §6.2 — "episode isolation").
7
+
8
+ Token accounting is a server-side concern. We use a coarse character-based
9
+ estimate (``len(json) // 4``) until a real tokenizer is wired in. The
10
+ estimate is consistent across baseline and trained runs because both go
11
+ through the same envelope.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ import uuid
18
+ from dataclasses import dataclass, field
19
+ from typing import Any
20
+
21
+ from graphforge.actions.dispatcher import ActionResult
22
+ from graphforge.graph.schema import Graph
23
+ from graphforge.reward.engine import ActionOutcome, TurnReward
24
+ from graphforge.tasks.schema import Task
25
+
26
+
27
+ # ---- token estimation -----------------------------------------------
28
+
29
+
30
+ def estimate_tokens(payload: Any) -> int:
31
+ """Coarse token estimate over a JSON-serializable payload.
32
+
33
+ ~4 chars / token is the GPT-style rule of thumb. The exact tokenizer
34
+ matters for training-time reward magnitudes; this estimate is a
35
+ placeholder that's monotone in the size of the payload, which is
36
+ enough to drive the 'prefer cheap queries over expensive ones' shaping
37
+ while we wait on the real Qwen tokenizer.
38
+ """
39
+ try:
40
+ s = json.dumps(payload, default=str)
41
+ except Exception:
42
+ s = str(payload)
43
+ return max(0, len(s) // 4)
44
+
45
+
46
+ # ---- history records ------------------------------------------------
47
+
48
+
49
+ @dataclass
50
+ class TurnRecord:
51
+ turn: int
52
+ action_kind: str
53
+ action_args: dict[str, Any]
54
+ outcome: str # ActionOutcome value
55
+ ok: bool
56
+ reward: float
57
+ payload: dict[str, Any] = field(default_factory=dict)
58
+ is_duplicate: bool = False
59
+ tokens_returned: int = 0
60
+
61
+
62
+ # ---- episode --------------------------------------------------------
63
+
64
+
65
+ @dataclass
66
+ class Episode:
67
+ id: str
68
+ task: Task
69
+ graph: Graph = field(default_factory=Graph.empty)
70
+ history: list[TurnRecord] = field(default_factory=list)
71
+ tokens_used: int = 0
72
+ turns: int = 0
73
+ terminated: bool = False
74
+ terminal_reward: float | None = None
75
+ terminal_payload: dict[str, Any] | None = None
76
+
77
+ @classmethod
78
+ def new(cls, task: Task) -> "Episode":
79
+ return cls(id=str(uuid.uuid4()), task=task)
80
+
81
+ # ----- duplicate detection ---------------------------------------
82
+
83
+ def is_duplicate(self, kind: str, args: dict[str, Any]) -> bool:
84
+ """True iff an identical (kind, args) action was tried this episode."""
85
+ for r in self.history:
86
+ if r.action_kind == kind and r.action_args == args:
87
+ return True
88
+ return False
89
+
90
+ # ----- bookkeeping -----------------------------------------------
91
+
92
+ def record_turn(
93
+ self,
94
+ kind: str,
95
+ args: dict[str, Any],
96
+ result: ActionResult,
97
+ outcome: ActionOutcome,
98
+ turn_reward: TurnReward,
99
+ is_duplicate: bool,
100
+ tokens_returned: int,
101
+ ) -> TurnRecord:
102
+ rec = TurnRecord(
103
+ turn=self.turns,
104
+ action_kind=kind,
105
+ action_args=args,
106
+ outcome=outcome.value,
107
+ ok=result.ok,
108
+ reward=turn_reward.total,
109
+ payload=result.payload,
110
+ is_duplicate=is_duplicate,
111
+ tokens_returned=tokens_returned,
112
+ )
113
+ self.history.append(rec)
114
+ self.turns += 1
115
+ self.tokens_used += tokens_returned
116
+ return rec
117
+
118
+ # ----- snapshot --------------------------------------------------
119
+
120
+ def state_snapshot(self) -> dict[str, Any]:
121
+ return {
122
+ "episode_id": self.id,
123
+ "task": self.task.visible_payload(),
124
+ "turns": self.turns,
125
+ "tokens_used": self.tokens_used,
126
+ "budget": self.task.budget,
127
+ "episode_cap": self.task.episode_cap,
128
+ "terminated": self.terminated,
129
+ "graph": {
130
+ "modules": [m.model_dump() for m in self.graph.modules],
131
+ "nodes": [n.model_dump() for n in self.graph.nodes],
132
+ "edges": [e.model_dump() for e in self.graph.edges],
133
+ },
134
+ "history": [
135
+ {
136
+ "turn": r.turn,
137
+ "action_kind": r.action_kind,
138
+ "ok": r.ok,
139
+ "reward": r.reward,
140
+ }
141
+ for r in self.history
142
+ ],
143
+ "terminal_reward": self.terminal_reward,
144
+ }
145
+
146
+
147
+ # ---- in-memory store ------------------------------------------------
148
+
149
+
150
+ class EpisodeStore:
151
+ """Thin wrapper around a dict so we can swap in a TTL cache later."""
152
+
153
+ def __init__(self) -> None:
154
+ self._eps: dict[str, Episode] = {}
155
+
156
+ def put(self, ep: Episode) -> None:
157
+ self._eps[ep.id] = ep
158
+
159
+ def get(self, episode_id: str) -> Episode | None:
160
+ return self._eps.get(episode_id)
161
+
162
+ def drop(self, episode_id: str) -> bool:
163
+ return self._eps.pop(episode_id, None) is not None
164
+
165
+ def __len__(self) -> int:
166
+ return len(self._eps)
167
+
168
+
169
+ # Singleton store. The server module holds onto this for the lifetime of
170
+ # the process. Tests can construct their own EpisodeStore for isolation.
171
+ GLOBAL_STORE = EpisodeStore()
graphforge/server/runner.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Episode runner — the per-step orchestration the server endpoints use.
2
+
3
+ Pulls together dispatcher, reward engine, constraint checker, and episode
4
+ state. Kept separate from the FastAPI app so it can be unit-tested without
5
+ spinning up an HTTP server.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from typing import Any
11
+
12
+ from graphforge.actions import dispatch
13
+ from graphforge.actions.schema import Action, Submit
14
+ from graphforge.constraints import evaluate_all
15
+ from graphforge.materializer import materialize
16
+ from graphforge.reward.engine import (
17
+ ActionOutcome,
18
+ TurnReward,
19
+ score_terminal,
20
+ score_turn,
21
+ )
22
+ from graphforge.server.episode import (
23
+ Episode,
24
+ TurnRecord,
25
+ estimate_tokens,
26
+ )
27
+ from graphforge.validator import full_check
28
+
29
+
30
+ def _classify_outcome(action: Action, ok: bool) -> ActionOutcome:
31
+ # Schema rejection happens before this function (caught by FastAPI's
32
+ # pydantic validation). What we see here is a successfully-parsed
33
+ # action that either succeeded or failed at handler-time.
34
+ return ActionOutcome.SUCCESS if ok else ActionOutcome.FAILURE
35
+
36
+
37
+ def _render_observation(ep: Episode, turn_record: TurnRecord) -> dict[str, Any]:
38
+ return {
39
+ "turn": turn_record.turn,
40
+ "ok": turn_record.ok,
41
+ "outcome": turn_record.outcome,
42
+ "payload": turn_record.payload,
43
+ "reward": turn_record.reward,
44
+ "is_duplicate": turn_record.is_duplicate,
45
+ "tokens_returned": turn_record.tokens_returned,
46
+ "tokens_used_total": ep.tokens_used,
47
+ "turns_total": ep.turns,
48
+ "budget_remaining": max(0, ep.task.budget - ep.tokens_used),
49
+ "episode_cap_remaining": max(0, ep.task.episode_cap - ep.turns),
50
+ }
51
+
52
+
53
+ def step(ep: Episode, action: Action) -> dict[str, Any]:
54
+ """Apply ``action`` to ``ep``. Auto-terminates on submit or cap.
55
+
56
+ Returns a dict in the OpenEnv ``/step`` response shape:
57
+ ``{observation, reward, done, info}``.
58
+ """
59
+ if ep.terminated:
60
+ return {
61
+ "observation": {},
62
+ "reward": 0.0,
63
+ "done": True,
64
+ "info": {"error": "episode_already_terminated"},
65
+ }
66
+
67
+ args = action.model_dump(exclude={"kind"})
68
+ kind = action.kind # type: ignore[attr-defined]
69
+ is_duplicate = ep.is_duplicate(kind, args)
70
+
71
+ result = dispatch(ep.graph, action)
72
+ tokens_returned = estimate_tokens(result.payload)
73
+ outcome = _classify_outcome(action, result.ok)
74
+ turn_reward = score_turn(
75
+ outcome=outcome,
76
+ is_duplicate=is_duplicate,
77
+ tokens_returned=tokens_returned,
78
+ )
79
+ rec = ep.record_turn(
80
+ kind=kind,
81
+ args=args,
82
+ result=result,
83
+ outcome=outcome,
84
+ turn_reward=turn_reward,
85
+ is_duplicate=is_duplicate,
86
+ tokens_returned=tokens_returned,
87
+ )
88
+
89
+ done = False
90
+ info: dict[str, Any] = {}
91
+
92
+ # Terminate on Submit.
93
+ if isinstance(action, Submit):
94
+ done = True
95
+ terminal = _score_terminal(ep)
96
+ ep.terminated = True
97
+ ep.terminal_reward = terminal["total"]
98
+ ep.terminal_payload = terminal
99
+ info["terminal"] = terminal
100
+
101
+ # Terminate on episode cap.
102
+ if not done and ep.turns >= ep.task.episode_cap:
103
+ done = True
104
+ terminal = _score_terminal(ep)
105
+ ep.terminated = True
106
+ ep.terminal_reward = terminal["total"]
107
+ ep.terminal_payload = terminal
108
+ info["terminal"] = terminal
109
+ info["reason"] = "episode_cap_reached"
110
+
111
+ return {
112
+ "observation": _render_observation(ep, rec),
113
+ "reward": rec.reward + (info.get("terminal", {}).get("total", 0.0) if done else 0.0),
114
+ "done": done,
115
+ "info": info,
116
+ }
117
+
118
+
119
+ def _score_terminal(ep: Episode) -> dict[str, Any]:
120
+ """Compute terminal reward + return a serialized payload."""
121
+ sat = evaluate_all(ep.graph, ep.task.all_constraints)
122
+ structural, behavioral = sat.split_by_family()
123
+
124
+ # materialization gate: try to materialize + parse-check.
125
+ materialization_ok = False
126
+ try:
127
+ files = materialize(ep.graph)
128
+ materialization_ok = full_check(files).ok
129
+ except Exception:
130
+ materialization_ok = False
131
+
132
+ reward = score_terminal(
133
+ n_structural_satisfied=len(structural.satisfied),
134
+ n_structural_total=structural.total,
135
+ n_behavioral_passing=len(behavioral.satisfied),
136
+ n_behavioral_total=behavioral.total,
137
+ materialization_ok=materialization_ok,
138
+ type_checks_ok=None, # mypy not wired yet
139
+ tokens_used=ep.tokens_used,
140
+ budget=ep.task.budget,
141
+ )
142
+ out = reward.to_dict()
143
+ out["satisfaction"] = sat.to_dict()
144
+ return out
graphforge/task_generator.py ADDED
@@ -0,0 +1,227 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Auto-generate training tasks from any Python repository.
2
+
3
+ Pipeline
4
+ --------
5
+ 1. Parse the repo with AST → KnowledgeGraph
6
+ 2. Find public functions that have doctest examples (>>> in docstring)
7
+ 3. Extract those examples as runnable assertions
8
+ 4. Replace the function body with `raise NotImplementedError` — the agent
9
+ must re-implement it from the docstring alone
10
+ 5. Return RepoTask objects ready for GRPO training — no hand-writing needed
11
+
12
+ Usage
13
+ -----
14
+ from graphforge.task_generator import generate_tasks
15
+ tasks = generate_tasks("/tmp/humanize/src/humanize", n_tasks=6)
16
+ for t in tasks:
17
+ print(t.task_id, "→", t.description[:60])
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import ast
23
+ import doctest
24
+ import textwrap
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+ from typing import Any
28
+
29
+ from graphforge.knowledge_graph import KGNode, KnowledgeGraph
30
+ from graphforge.repo_parser import parse_repo
31
+
32
+
33
+ # ── Task dataclass (mirrors env.tasks.RepoTask but lives here to avoid circular import) ──
34
+
35
+ @dataclass
36
+ class AutoTask:
37
+ task_id: str
38
+ repo_name: str
39
+ repo_path: str # absolute path to the repo source directory
40
+ description: str
41
+ test_code: str # uses short import: from <repo_name>.<module> import <func>
42
+ stubbed_node_id: str # the node whose body was replaced
43
+ original_source: str # saved so env can restore on reset
44
+ max_turns: int = 12
45
+ difficulty: int = 0
46
+ hints: list[str] = field(default_factory=list)
47
+
48
+
49
+ # ── Doctest extraction ────────────────────────────────────────────────────────
50
+
51
+ def _extract_all_examples(docstring: str) -> list[tuple[str, str]]:
52
+ """Return ALL doctest lines as (source, want) — want is '' for setup lines."""
53
+ if not docstring:
54
+ return []
55
+ parser = doctest.DocTestParser()
56
+ try:
57
+ examples = parser.get_examples(docstring, name="<doc>")
58
+ return [(ex.source.strip(), ex.want.strip()) for ex in examples]
59
+ except Exception:
60
+ return []
61
+
62
+
63
+ def _to_assertion(expr: str, expected: str) -> str | None:
64
+ """Convert one doctest example to a Python assertion.
65
+
66
+ - True/False expected → assert (expr) is True/False
67
+ - Traceback expected → skip
68
+ - Non-literal → skip
69
+ """
70
+ if not expected or expected.startswith("Traceback"):
71
+ return None
72
+ if expected in ("True", "False"):
73
+ return f"assert ({expr}) is {expected}, f'got {{repr({expr})}}'"
74
+ try:
75
+ ast.literal_eval(expected)
76
+ except (ValueError, SyntaxError):
77
+ return None
78
+ return f"assert {expr} == {expected}, f'got {{repr({expr})}}'"
79
+
80
+
81
+ def _build_test_code(func_name: str, module_stem: str, repo_name: str,
82
+ all_examples: list[tuple[str, str]]) -> str | None:
83
+ """Build complete test code including setup lines then assertions."""
84
+ import_line = f"from {repo_name}.{module_stem} import {func_name}"
85
+ setup_lines: list[str] = []
86
+ assertion_lines: list[str] = []
87
+
88
+ for expr, expected in all_examples:
89
+ if not expected:
90
+ setup_lines.append(expr)
91
+ else:
92
+ a = _to_assertion(expr, expected)
93
+ if a and func_name in a: # only keep assertions that call our function
94
+ assertion_lines.append(a)
95
+
96
+ if len(assertion_lines) < 2:
97
+ return None
98
+ parts = [import_line] + setup_lines + assertion_lines
99
+ return "\n".join(parts)
100
+
101
+
102
+ # ── Function stubbing ─────────────────────────────────────────────────────────
103
+
104
+ def _stub_function(source: str) -> str:
105
+ """Replace a function body with `raise NotImplementedError`, keeping signature + docstring."""
106
+ dedented = textwrap.dedent(source)
107
+ try:
108
+ tree = ast.parse(dedented)
109
+ except SyntaxError:
110
+ return source
111
+
112
+ lines = dedented.splitlines()
113
+ for node in ast.walk(tree):
114
+ if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
115
+ continue
116
+
117
+ body = node.body
118
+ indent = " " * (node.col_offset // 4 + 1)
119
+
120
+ # Keep signature lines (everything up to and including the colon)
121
+ sig_end = body[0].lineno - 1 # 0-indexed line where body starts
122
+
123
+ # Keep docstring if present
124
+ if body and isinstance(body[0], ast.Expr) and isinstance(body[0].value, ast.Constant):
125
+ keep_until = body[0].end_lineno # inclusive, 1-indexed
126
+ else:
127
+ keep_until = sig_end
128
+
129
+ kept = "\n".join(lines[:keep_until])
130
+ stub = kept.rstrip() + f"\n{indent}raise NotImplementedError\n"
131
+ return stub
132
+
133
+ return source
134
+
135
+
136
+ # ── Candidate selection ──────────────────────────────────────────────��────────
137
+
138
+ def _score_candidate(node: KGNode, examples: list) -> int:
139
+ """Higher = better training signal. Prefer more examples and longer docstrings."""
140
+ return len(examples) * 3 + min(len(node.docstring or ""), 200) // 20
141
+
142
+
143
+ def _find_candidates(kg: KnowledgeGraph, repo_name: str) -> list[tuple[KGNode, str, int]]:
144
+ """Return (node, test_code, score) for all viable candidates."""
145
+ candidates = []
146
+ for node in kg.all_nodes("function"):
147
+ if node.name.startswith("_"):
148
+ continue
149
+ if not node.docstring or not node.source:
150
+ continue
151
+ module_stem = Path(node.file_path).stem if node.file_path else None
152
+ if not module_stem:
153
+ continue
154
+
155
+ examples = _extract_all_examples(node.docstring)
156
+ if not examples:
157
+ continue
158
+
159
+ test_code = _build_test_code(node.name, module_stem, repo_name, examples)
160
+ if not test_code:
161
+ continue
162
+
163
+ score = _score_candidate(node, examples)
164
+ candidates.append((node, test_code, score))
165
+
166
+ candidates.sort(key=lambda x: x[2], reverse=True)
167
+ return candidates
168
+
169
+
170
+ # ── Main entry point ──────────────────────────────────────────────────────────
171
+
172
+ def generate_tasks(
173
+ repo_source_dir: str,
174
+ n_tasks: int = 4,
175
+ max_turns: int = 12,
176
+ ) -> tuple[KnowledgeGraph, list[AutoTask]]:
177
+ """Parse a Python repo directory and auto-generate training tasks.
178
+
179
+ Args:
180
+ repo_source_dir: Path to the Python package source directory.
181
+ e.g. '/tmp/humanize/src/humanize'
182
+ n_tasks: How many tasks to generate (picks highest-scoring candidates).
183
+ max_turns: Max turns per episode.
184
+
185
+ Returns:
186
+ (kg, tasks) — the Knowledge Graph and the list of AutoTask objects.
187
+ """
188
+ repo_source_dir = str(Path(repo_source_dir).resolve())
189
+ repo_name = Path(repo_source_dir).name
190
+ kg = parse_repo(repo_source_dir)
191
+
192
+ candidates = _find_candidates(kg, repo_name)
193
+ if not candidates:
194
+ raise ValueError(
195
+ f"No suitable candidates found in {repo_source_dir}. "
196
+ "Make sure functions have doctest examples (>>> in docstring)."
197
+ )
198
+
199
+ selected = candidates[:n_tasks]
200
+ tasks: list[AutoTask] = []
201
+
202
+ for node, test_code, score in selected:
203
+ stubbed = _stub_function(node.source)
204
+ desc = textwrap.dedent(f"""\
205
+ Implement the function `{node.name}` in `{node.file_path}`.
206
+
207
+ {node.docstring.strip() if node.docstring else 'No docstring available.'}
208
+ """).strip()
209
+
210
+ task = AutoTask(
211
+ task_id=f"auto.{repo_name}.{node.name}",
212
+ repo_name=repo_name,
213
+ repo_path=repo_source_dir,
214
+ description=desc,
215
+ test_code=test_code,
216
+ stubbed_node_id=node.node_id,
217
+ original_source=node.source,
218
+ max_turns=max_turns,
219
+ difficulty=min(2, max(0, score // 8)),
220
+ hints=[
221
+ f"Look at {node.file_path} to understand the module style.",
222
+ f"The function signature is: {node.name}{node.metadata.get('signature', '(...)')}",
223
+ ],
224
+ )
225
+ tasks.append(task)
226
+
227
+ return kg, tasks
graphforge/tasks/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task bank and variant generator.
2
+
3
+ Tier-0 ships one hand-written task. Tier-1+ tasks and parametric variant
4
+ generation are TODO. See PROPOSAL.md §2.1, §2.3 for the full design.
5
+ """
6
+
7
+ from graphforge.tasks.bank import default_task, get_task, list_tasks
8
+ from graphforge.tasks.schema import Task
9
+
10
+ __all__ = ["Task", "default_task", "get_task", "list_tasks"]
graphforge/tasks/bank.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Tier-0 task bank.
2
+
3
+ A single hand-written task that exercises every implemented subsystem
4
+ end-to-end: build a one-module ``validators`` package with an ``is_email``
5
+ function attached to ``validate_with_regex(EMAIL)``. Tier-1+ tasks land in
6
+ follow-up modules.
7
+
8
+ Variant generation (PROPOSAL.md §2.3 — ~50 concrete variants per template
9
+ × domain vocabulary) is also TODO; for now we hand-author tasks until the
10
+ env's reward-signal shape is validated end-to-end.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from graphforge.constraints.schema import (
16
+ AcyclicImports,
17
+ Materializes,
18
+ ModuleCount,
19
+ ModuleResponsibility,
20
+ ModuleSizeMax,
21
+ NodeAbsent,
22
+ NodeExists,
23
+ )
24
+ from graphforge.tasks.schema import Task
25
+
26
+
27
+ TIER_0_EMAIL_VALIDATOR = Task(
28
+ id="t0.email_validator",
29
+ tier=0,
30
+ description=(
31
+ "Build a tiny single-module package called 'validators'. It should "
32
+ "expose a function `is_email(s: str) -> bool` that returns True for "
33
+ "well-formed email addresses and False otherwise. Use the "
34
+ "`validate_with_regex` body template with the EMAIL pattern. The "
35
+ "module must materialize cleanly to runnable Python."
36
+ ),
37
+ visible_constraints=[
38
+ ModuleCount(n=1),
39
+ ModuleResponsibility(module="validators", responsibility="validation"),
40
+ NodeExists(name="is_email", module="validators"),
41
+ Materializes(),
42
+ ],
43
+ hidden_constraints=[
44
+ # The visible constraints already pin most of this; the hidden set
45
+ # adds shape constraints the agent must infer from the description.
46
+ ModuleSizeMax(module="validators", n=1),
47
+ NodeAbsent(name="main", module="validators"),
48
+ AcyclicImports(),
49
+ ],
50
+ behavioral_test_names=[], # tier-0 has no behavioral tests
51
+ budget=4000,
52
+ episode_cap=20,
53
+ )
54
+
55
+
56
+ _TASKS: dict[str, Task] = {
57
+ TIER_0_EMAIL_VALIDATOR.id: TIER_0_EMAIL_VALIDATOR,
58
+ }
59
+
60
+
61
+ def list_tasks() -> list[Task]:
62
+ return list(_TASKS.values())
63
+
64
+
65
+ def get_task(task_id: str) -> Task | None:
66
+ return _TASKS.get(task_id)
67
+
68
+
69
+ def default_task() -> Task:
70
+ """The task `/reset` picks when no ``task_id`` is specified."""
71
+ return TIER_0_EMAIL_VALIDATOR
graphforge/tasks/schema.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Task data model.
2
+
3
+ A *task* is the agent-facing unit of work. The visible portion is what the
4
+ agent sees at reset — natural-language description plus the visible subset
5
+ of constraints. The hidden portion drives reward but is invisible to the
6
+ policy, forcing the agent to interpret the description rather than mechanically
7
+ satisfying a fully-revealed checklist (PROPOSAL.md §2.1).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pydantic import BaseModel, ConfigDict, Field
13
+
14
+ from graphforge.constraints.schema import Constraint
15
+
16
+
17
+ class Task(BaseModel):
18
+ model_config = ConfigDict(extra="forbid", frozen=True)
19
+
20
+ id: str = Field(..., min_length=1)
21
+ tier: int = Field(..., ge=0, le=3)
22
+ description: str = Field(..., min_length=1)
23
+ visible_constraints: list[Constraint] = Field(default_factory=list)
24
+ hidden_constraints: list[Constraint] = Field(default_factory=list)
25
+ # Behavioral test names are visible to the agent at reset; bodies live in
26
+ # the test runner (TODO) and are hidden. Empty for tier-0.
27
+ behavioral_test_names: list[str] = Field(default_factory=list)
28
+ budget: int = Field(..., gt=0)
29
+ episode_cap: int = Field(..., gt=0)
30
+
31
+ @property
32
+ def all_constraints(self) -> list[Constraint]:
33
+ return list(self.visible_constraints) + list(self.hidden_constraints)
34
+
35
+ def visible_payload(self) -> dict[str, object]:
36
+ """Subset of the task that's exposed to the agent at reset."""
37
+ return {
38
+ "id": self.id,
39
+ "tier": self.tier,
40
+ "description": self.description,
41
+ "visible_constraints": [c.model_dump() for c in self.visible_constraints],
42
+ "behavioral_test_names": list(self.behavioral_test_names),
43
+ "budget": self.budget,
44
+ "episode_cap": self.episode_cap,
45
+ }