Spaces:
Sleeping
Sleeping
NagaNithin-V commited on
Commit ·
7952f32
1
Parent(s): dead589
Deploy GraphForge OpenEnv — AST-parsed KG code-editing environment
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitignore +5 -0
- Dockerfile +27 -0
- README.md +31 -5
- env/__init__.py +34 -0
- env/actions.py +90 -0
- env/ast_parser.py +249 -0
- env/client.py +36 -0
- env/environment.py +467 -0
- env/models.py +46 -0
- env/server.py +44 -0
- env/tasks.py +363 -0
- graphforge/__init__.py +24 -0
- graphforge/actions/__init__.py +15 -0
- graphforge/actions/dispatcher.py +442 -0
- graphforge/actions/errors.py +44 -0
- graphforge/actions/schema.py +180 -0
- graphforge/actions/signature.py +116 -0
- graphforge/behavioral/__init__.py +25 -0
- graphforge/constraints/__init__.py +49 -0
- graphforge/constraints/checker.py +141 -0
- graphforge/constraints/schema.py +129 -0
- graphforge/graph/__init__.py +23 -0
- graphforge/graph/schema.py +308 -0
- graphforge/knowledge_graph.py +233 -0
- graphforge/materializer/__init__.py +20 -0
- graphforge/materializer/codegen.py +169 -0
- graphforge/materializer/materialize.py +134 -0
- graphforge/materializer/patterns.py +34 -0
- graphforge/parser/__init__.py +27 -0
- graphforge/repo_parser.py +271 -0
- graphforge/repo_registry.py +145 -0
- graphforge/reward/__init__.py +45 -0
- graphforge/reward/engine.py +211 -0
- graphforge/sample_repos/humanize/__init__.py +18 -0
- graphforge/sample_repos/humanize/filesize.py +49 -0
- graphforge/sample_repos/humanize/number.py +198 -0
- graphforge/sample_repos/humanize/time.py +225 -0
- graphforge/sample_repos/task_manager/__init__.py +1 -0
- graphforge/sample_repos/task_manager/api.py +48 -0
- graphforge/sample_repos/task_manager/models.py +47 -0
- graphforge/sample_repos/task_manager/storage.py +37 -0
- graphforge/sample_repos/task_manager/validators.py +25 -0
- graphforge/server/__init__.py +19 -0
- graphforge/server/app.py +124 -0
- graphforge/server/episode.py +171 -0
- graphforge/server/runner.py +144 -0
- graphforge/task_generator.py +227 -0
- graphforge/tasks/__init__.py +10 -0
- graphforge/tasks/bank.py +71 -0
- graphforge/tasks/schema.py +45 -0
.gitignore
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
.env
|
| 5 |
+
*.egg-info/
|
Dockerfile
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Space Dockerfile.
|
| 2 |
+
# Mirrors the root Dockerfile, exists separately because HF Spaces looks for
|
| 3 |
+
# the Dockerfile inside the Space root by default.
|
| 4 |
+
|
| 5 |
+
FROM python:3.11-slim
|
| 6 |
+
|
| 7 |
+
WORKDIR /app
|
| 8 |
+
|
| 9 |
+
COPY pyproject.toml ./
|
| 10 |
+
COPY graphforge ./graphforge
|
| 11 |
+
COPY env ./env
|
| 12 |
+
COPY openenv.yaml ./
|
| 13 |
+
|
| 14 |
+
RUN pip install --no-cache-dir \
|
| 15 |
+
"pydantic>=2.6" \
|
| 16 |
+
"fastapi>=0.110" \
|
| 17 |
+
"uvicorn[standard]>=0.27" \
|
| 18 |
+
"httpx>=0.27" \
|
| 19 |
+
"openenv-core>=0.1.0" \
|
| 20 |
+
"pyyaml>=6.0"
|
| 21 |
+
|
| 22 |
+
ENV PYTHONUNBUFFERED=1
|
| 23 |
+
ENV PYTHONPATH=/app
|
| 24 |
+
|
| 25 |
+
EXPOSE 7860
|
| 26 |
+
|
| 27 |
+
CMD ["uvicorn", "env.server:app", "--host", "0.0.0.0", "--port", "7860"]
|
README.md
CHANGED
|
@@ -1,12 +1,38 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: purple
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
license: mit
|
| 9 |
-
short_description: A graph-first code-editing RL environment for Python repos.
|
| 10 |
---
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: GraphForge OpenEnv
|
| 3 |
+
emoji: 🧱
|
| 4 |
+
colorFrom: indigo
|
| 5 |
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 8000
|
| 8 |
pinned: false
|
| 9 |
license: mit
|
|
|
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# GraphForge — OpenEnv server
|
| 13 |
+
|
| 14 |
+
Live deployment of the GraphForge environment for the Meta PyTorch OpenEnv
|
| 15 |
+
Hackathon. The server hosts the OpenEnv-compliant `/reset`, `/step`, `/state`
|
| 16 |
+
endpoints over HTTP. Anything that speaks the OpenEnv client protocol (or
|
| 17 |
+
plain JSON) can drive episodes.
|
| 18 |
+
|
| 19 |
+
See the main project repo for the architecture overview, training notebook,
|
| 20 |
+
plots, and writeup.
|
| 21 |
+
|
| 22 |
+
## Endpoints
|
| 23 |
+
|
| 24 |
+
```
|
| 25 |
+
POST /reset → GraphForgeObservation
|
| 26 |
+
POST /step { ... } → { observation, reward, done }
|
| 27 |
+
GET /state → GraphForgeState
|
| 28 |
+
GET /healthz
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
## Quick smoke test
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
EID=$(curl -s -X POST $SPACE_URL/reset | python3 -c "import sys,json; print(json.load(sys.stdin)['episode_id'])")
|
| 35 |
+
curl -s -X POST $SPACE_URL/step -H 'content-type: application/json' \
|
| 36 |
+
-d '{"kind": "add_module", "payload": {"name": "validators", "responsibility": "validation"}}' \
|
| 37 |
+
| python3 -m json.tool
|
| 38 |
+
```
|
env/__init__.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-turn repo-editing OpenEnv environment.
|
| 2 |
+
|
| 3 |
+
Public surface:
|
| 4 |
+
RepoEditAction, RepoEditObservation, RepoEditState — wire models
|
| 5 |
+
RepoEditEnvironment — OpenEnv environment
|
| 6 |
+
RepoEditEnv — HTTP client
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from env.actions import (
|
| 10 |
+
AddNodeAction,
|
| 11 |
+
InspectAction,
|
| 12 |
+
QueryAction,
|
| 13 |
+
RemoveNodeAction,
|
| 14 |
+
RepoEditAction,
|
| 15 |
+
SubmitAction,
|
| 16 |
+
UpdateNodeAction,
|
| 17 |
+
)
|
| 18 |
+
from env.client import RepoEditEnv
|
| 19 |
+
from env.environment import RepoEditEnvironment
|
| 20 |
+
from env.models import RepoEditObservation, RepoEditState
|
| 21 |
+
|
| 22 |
+
__all__ = [
|
| 23 |
+
"AddNodeAction",
|
| 24 |
+
"InspectAction",
|
| 25 |
+
"QueryAction",
|
| 26 |
+
"RemoveNodeAction",
|
| 27 |
+
"RepoEditAction",
|
| 28 |
+
"RepoEditEnv",
|
| 29 |
+
"RepoEditEnvironment",
|
| 30 |
+
"RepoEditObservation",
|
| 31 |
+
"RepoEditState",
|
| 32 |
+
"SubmitAction",
|
| 33 |
+
"UpdateNodeAction",
|
| 34 |
+
]
|
env/actions.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Action schema for the multi-turn repo-editing environment.
|
| 2 |
+
|
| 3 |
+
All actions are expressed as JSON dicts with a "kind" discriminator.
|
| 4 |
+
The agent emits one action per turn inside <action>...</action> XML tags.
|
| 5 |
+
|
| 6 |
+
Actions
|
| 7 |
+
-------
|
| 8 |
+
query Search the knowledge graph for relevant nodes.
|
| 9 |
+
inspect View the full source of a specific node.
|
| 10 |
+
add_node Insert a new function or class into a module/class.
|
| 11 |
+
update_node Replace the source of an existing node.
|
| 12 |
+
remove_node Delete a node from the graph.
|
| 13 |
+
submit Apply all pending changes, run tests, end the episode.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
from typing import Literal
|
| 19 |
+
|
| 20 |
+
from pydantic import BaseModel, ConfigDict
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
_cfg = ConfigDict(extra="forbid")
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class QueryAction(BaseModel):
|
| 27 |
+
model_config = _cfg
|
| 28 |
+
kind: Literal["query"] = "query"
|
| 29 |
+
keywords: str
|
| 30 |
+
node_type: str = "all" # "all" | "function" | "class" | "module" | "method"
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class InspectAction(BaseModel):
|
| 34 |
+
model_config = _cfg
|
| 35 |
+
kind: Literal["inspect"] = "inspect"
|
| 36 |
+
node_id: str
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class AddNodeAction(BaseModel):
|
| 40 |
+
model_config = _cfg
|
| 41 |
+
kind: Literal["add_node"] = "add_node"
|
| 42 |
+
parent_id: str # node_id of the parent (module or class)
|
| 43 |
+
name: str # name of the new function/class
|
| 44 |
+
node_type: str # "function" | "class"
|
| 45 |
+
code: str # full source of the new node (incl. def/class line)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class UpdateNodeAction(BaseModel):
|
| 49 |
+
model_config = _cfg
|
| 50 |
+
kind: Literal["update_node"] = "update_node"
|
| 51 |
+
node_id: str # which node to replace
|
| 52 |
+
new_code: str # full replacement source (incl. def/class line)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class RemoveNodeAction(BaseModel):
|
| 56 |
+
model_config = _cfg
|
| 57 |
+
kind: Literal["remove_node"] = "remove_node"
|
| 58 |
+
node_id: str
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
class SubmitAction(BaseModel):
|
| 62 |
+
model_config = _cfg
|
| 63 |
+
kind: Literal["submit"] = "submit"
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
RepoEditAction = (
|
| 67 |
+
QueryAction
|
| 68 |
+
| InspectAction
|
| 69 |
+
| AddNodeAction
|
| 70 |
+
| UpdateNodeAction
|
| 71 |
+
| RemoveNodeAction
|
| 72 |
+
| SubmitAction
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def parse_action(raw: dict) -> RepoEditAction:
|
| 77 |
+
"""Dispatch raw dict to the correct action model."""
|
| 78 |
+
kind = raw.get("kind", "")
|
| 79 |
+
mapping = {
|
| 80 |
+
"query": QueryAction,
|
| 81 |
+
"inspect": InspectAction,
|
| 82 |
+
"add_node": AddNodeAction,
|
| 83 |
+
"update_node": UpdateNodeAction,
|
| 84 |
+
"remove_node": RemoveNodeAction,
|
| 85 |
+
"submit": SubmitAction,
|
| 86 |
+
}
|
| 87 |
+
cls = mapping.get(kind)
|
| 88 |
+
if cls is None:
|
| 89 |
+
raise ValueError(f"Unknown action kind: {kind!r}. Valid: {list(mapping)}")
|
| 90 |
+
return cls.model_validate(raw)
|
env/ast_parser.py
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""AST-based DAG parser and code injection utilities.
|
| 2 |
+
|
| 3 |
+
parse_source(source, module_name) -> CodeDAG
|
| 4 |
+
Parses a Python source string and returns a structured DAG with nodes
|
| 5 |
+
(module, function, imported_module) and typed edges (contains, calls, imports).
|
| 6 |
+
|
| 7 |
+
inject_function_body(source, func_name, new_body) -> str
|
| 8 |
+
Replaces the body of func_name in source with new_body, preserving the
|
| 9 |
+
def line and any docstring. Used by the environment's step() method.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import ast
|
| 15 |
+
from dataclasses import dataclass, field
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
# ── DAG data model ────────────────────────────────────────────────────────────
|
| 19 |
+
|
| 20 |
+
@dataclass
|
| 21 |
+
class DAGNode:
|
| 22 |
+
name: str
|
| 23 |
+
node_type: str # "module" | "function" | "class" | "imported_module"
|
| 24 |
+
signature: str = ""
|
| 25 |
+
is_stub: bool = False
|
| 26 |
+
body_summary: str = ""
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
@dataclass
|
| 30 |
+
class DAGEdge:
|
| 31 |
+
edge_type: str # "contains" | "calls" | "imports"
|
| 32 |
+
source: str
|
| 33 |
+
target: str
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class FunctionInfo:
|
| 38 |
+
name: str
|
| 39 |
+
signature: str
|
| 40 |
+
is_stub: bool
|
| 41 |
+
start_line: int # 1-indexed
|
| 42 |
+
end_line: int # 1-indexed, inclusive
|
| 43 |
+
has_docstring: bool
|
| 44 |
+
docstring_end_line: int # 1-indexed; == start_line when no docstring
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
@dataclass
|
| 48 |
+
class CodeDAG:
|
| 49 |
+
module_name: str
|
| 50 |
+
nodes: list[DAGNode] = field(default_factory=list)
|
| 51 |
+
edges: list[DAGEdge] = field(default_factory=list)
|
| 52 |
+
function_infos: dict[str, FunctionInfo] = field(default_factory=dict)
|
| 53 |
+
|
| 54 |
+
def callers_of(self, func_name: str) -> list[str]:
|
| 55 |
+
return [e.source for e in self.edges if e.edge_type == "calls" and e.target == func_name]
|
| 56 |
+
|
| 57 |
+
def callees_of(self, func_name: str) -> list[str]:
|
| 58 |
+
return [e.target for e in self.edges if e.edge_type == "calls" and e.source == func_name]
|
| 59 |
+
|
| 60 |
+
def stub_functions(self) -> list[str]:
|
| 61 |
+
return [n.name for n in self.nodes if n.node_type == "function" and n.is_stub]
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
# ── helpers ───────────────────────────────────────────────────────────────────
|
| 65 |
+
|
| 66 |
+
def _signature(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
| 67 |
+
parts = []
|
| 68 |
+
for arg in node.args.args:
|
| 69 |
+
ann = f": {ast.unparse(arg.annotation)}" if arg.annotation else ""
|
| 70 |
+
parts.append(f"{arg.arg}{ann}")
|
| 71 |
+
ret = f" -> {ast.unparse(node.returns)}" if node.returns else ""
|
| 72 |
+
return f"({', '.join(parts)}){ret}"
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def _is_stub(node: ast.FunctionDef | ast.AsyncFunctionDef, source: str) -> bool:
|
| 76 |
+
func_src = "\n".join(source.splitlines()[node.lineno - 1:node.end_lineno])
|
| 77 |
+
if "# STUB" in func_src:
|
| 78 |
+
return True
|
| 79 |
+
# body that is just "raise NotImplementedError"
|
| 80 |
+
stmts = [s for s in node.body
|
| 81 |
+
if not (isinstance(s, ast.Expr) and isinstance(s.value, ast.Constant))]
|
| 82 |
+
if len(stmts) == 1 and isinstance(stmts[0], ast.Raise):
|
| 83 |
+
exc = stmts[0].exc
|
| 84 |
+
if isinstance(exc, ast.Name) and exc.id == "NotImplementedError":
|
| 85 |
+
return True
|
| 86 |
+
if isinstance(exc, ast.Call) and isinstance(exc.func, ast.Name) and exc.func.id == "NotImplementedError":
|
| 87 |
+
return True
|
| 88 |
+
return False
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def _extract_calls(node: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
|
| 92 |
+
calls: set[str] = set()
|
| 93 |
+
for child in ast.walk(node):
|
| 94 |
+
if isinstance(child, ast.Call):
|
| 95 |
+
if isinstance(child.func, ast.Name):
|
| 96 |
+
calls.add(child.func.id)
|
| 97 |
+
return calls
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
# ── main parser ───────────────────────────────────────────────────────────────
|
| 101 |
+
|
| 102 |
+
def parse_source(source: str, module_name: str = "module") -> CodeDAG:
|
| 103 |
+
"""Parse Python source into a CodeDAG."""
|
| 104 |
+
tree = ast.parse(source)
|
| 105 |
+
dag = CodeDAG(module_name=module_name)
|
| 106 |
+
dag.nodes.append(DAGNode(name=module_name, node_type="module"))
|
| 107 |
+
|
| 108 |
+
func_names: set[str] = set()
|
| 109 |
+
|
| 110 |
+
# imports
|
| 111 |
+
for node in ast.walk(tree):
|
| 112 |
+
if isinstance(node, ast.Import):
|
| 113 |
+
for alias in node.names:
|
| 114 |
+
imp = alias.asname or alias.name
|
| 115 |
+
dag.nodes.append(DAGNode(name=imp, node_type="imported_module"))
|
| 116 |
+
dag.edges.append(DAGEdge("imports", module_name, imp))
|
| 117 |
+
elif isinstance(node, ast.ImportFrom) and node.module:
|
| 118 |
+
dag.nodes.append(DAGNode(name=node.module, node_type="imported_module"))
|
| 119 |
+
dag.edges.append(DAGEdge("imports", module_name, node.module))
|
| 120 |
+
|
| 121 |
+
# top-level functions and classes
|
| 122 |
+
for node in tree.body:
|
| 123 |
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 124 |
+
sig = _signature(node)
|
| 125 |
+
stub = _is_stub(node, source)
|
| 126 |
+
has_doc = (
|
| 127 |
+
bool(node.body)
|
| 128 |
+
and isinstance(node.body[0], ast.Expr)
|
| 129 |
+
and isinstance(node.body[0].value, ast.Constant)
|
| 130 |
+
)
|
| 131 |
+
doc_end = node.body[0].end_lineno if has_doc else node.lineno
|
| 132 |
+
|
| 133 |
+
dag.nodes.append(DAGNode(
|
| 134 |
+
name=node.name,
|
| 135 |
+
node_type="function",
|
| 136 |
+
signature=sig,
|
| 137 |
+
is_stub=stub,
|
| 138 |
+
body_summary="STUB — needs implementation" if stub else "(implemented)",
|
| 139 |
+
))
|
| 140 |
+
dag.edges.append(DAGEdge("contains", module_name, node.name))
|
| 141 |
+
dag.function_infos[node.name] = FunctionInfo(
|
| 142 |
+
name=node.name,
|
| 143 |
+
signature=sig,
|
| 144 |
+
is_stub=stub,
|
| 145 |
+
start_line=node.lineno,
|
| 146 |
+
end_line=node.end_lineno,
|
| 147 |
+
has_docstring=has_doc,
|
| 148 |
+
docstring_end_line=doc_end,
|
| 149 |
+
)
|
| 150 |
+
func_names.add(node.name)
|
| 151 |
+
|
| 152 |
+
elif isinstance(node, ast.ClassDef):
|
| 153 |
+
dag.nodes.append(DAGNode(name=node.name, node_type="class"))
|
| 154 |
+
dag.edges.append(DAGEdge("contains", module_name, node.name))
|
| 155 |
+
for item in node.body:
|
| 156 |
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 157 |
+
qname = f"{node.name}.{item.name}"
|
| 158 |
+
dag.nodes.append(DAGNode(
|
| 159 |
+
name=qname,
|
| 160 |
+
node_type="function",
|
| 161 |
+
signature=_signature(item),
|
| 162 |
+
is_stub=_is_stub(item, source),
|
| 163 |
+
))
|
| 164 |
+
dag.edges.append(DAGEdge("contains", node.name, qname))
|
| 165 |
+
func_names.add(qname)
|
| 166 |
+
|
| 167 |
+
# call edges (same-module only)
|
| 168 |
+
for node in tree.body:
|
| 169 |
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 170 |
+
for callee in _extract_calls(node):
|
| 171 |
+
if callee in func_names and callee != node.name:
|
| 172 |
+
dag.edges.append(DAGEdge("calls", node.name, callee))
|
| 173 |
+
|
| 174 |
+
return dag
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
# ── code injection ────────────────────────────────────────────────────────────
|
| 178 |
+
|
| 179 |
+
def inject_function_body(source: str, func_name: str, new_body: str) -> str:
|
| 180 |
+
"""Replace the body of func_name in source with new_body.
|
| 181 |
+
|
| 182 |
+
Preserves the def line and any docstring. new_body should be the raw body
|
| 183 |
+
text (with or without indentation — we normalise it).
|
| 184 |
+
"""
|
| 185 |
+
tree = ast.parse(source)
|
| 186 |
+
lines = source.splitlines(keepends=True)
|
| 187 |
+
|
| 188 |
+
for node in tree.body:
|
| 189 |
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 190 |
+
continue
|
| 191 |
+
if node.name != func_name:
|
| 192 |
+
continue
|
| 193 |
+
|
| 194 |
+
# Determine where to keep up to (def line + optional docstring)
|
| 195 |
+
has_doc = (
|
| 196 |
+
bool(node.body)
|
| 197 |
+
and isinstance(node.body[0], ast.Expr)
|
| 198 |
+
and isinstance(node.body[0].value, ast.Constant)
|
| 199 |
+
)
|
| 200 |
+
keep_until = node.body[0].end_lineno if has_doc else node.lineno
|
| 201 |
+
# keep_until is 1-indexed; lines[:keep_until] gives 0..keep_until-1
|
| 202 |
+
|
| 203 |
+
before = lines[:keep_until]
|
| 204 |
+
after = lines[node.end_lineno:] # everything after the function
|
| 205 |
+
|
| 206 |
+
# Normalise body indent: strip common leading whitespace, then re-add 4 spaces.
|
| 207 |
+
raw_lines = new_body.splitlines()
|
| 208 |
+
# find minimum indent of non-empty lines
|
| 209 |
+
min_indent = min(
|
| 210 |
+
(len(l) - len(l.lstrip()) for l in raw_lines if l.strip()),
|
| 211 |
+
default=0,
|
| 212 |
+
)
|
| 213 |
+
body_lines: list[str] = []
|
| 214 |
+
for raw_line in raw_lines:
|
| 215 |
+
if raw_line.strip():
|
| 216 |
+
body_lines.append(" " + raw_line[min_indent:] + "\n")
|
| 217 |
+
else:
|
| 218 |
+
body_lines.append("\n")
|
| 219 |
+
|
| 220 |
+
if not body_lines:
|
| 221 |
+
body_lines = [" pass\n"]
|
| 222 |
+
|
| 223 |
+
return "".join(before + body_lines + after)
|
| 224 |
+
|
| 225 |
+
raise ValueError(f"Function {func_name!r} not found in source")
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# ── DAG → text description (for prompts) ─────────────────────────────────────
|
| 229 |
+
|
| 230 |
+
def dag_to_text(dag: CodeDAG) -> str:
|
| 231 |
+
"""Render the DAG as a concise human-readable block for the agent prompt."""
|
| 232 |
+
lines: list[str] = [f"## Module: {dag.module_name}", "", "### Nodes"]
|
| 233 |
+
|
| 234 |
+
for n in dag.nodes:
|
| 235 |
+
if n.node_type == "module":
|
| 236 |
+
lines.append(f"- [MODULE] {n.name}")
|
| 237 |
+
elif n.node_type == "function":
|
| 238 |
+
status = "[ STUB ]" if n.is_stub else "[ready ]"
|
| 239 |
+
lines.append(f"- [FUNC] {status} {n.name}{n.signature}")
|
| 240 |
+
elif n.node_type == "class":
|
| 241 |
+
lines.append(f"- [CLASS] {n.name}")
|
| 242 |
+
elif n.node_type == "imported_module":
|
| 243 |
+
lines.append(f"- [IMPORT] {n.name}")
|
| 244 |
+
|
| 245 |
+
lines += ["", "### Edges"]
|
| 246 |
+
for e in dag.edges:
|
| 247 |
+
lines.append(f"- {e.source} --{e.edge_type}--> {e.target}")
|
| 248 |
+
|
| 249 |
+
return "\n".join(lines)
|
env/client.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HTTP client for the repo-editing environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
import httpx
|
| 8 |
+
|
| 9 |
+
from env.models import RepoEditObservation, RepoEditState
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class RepoEditEnv:
|
| 13 |
+
def __init__(self, base_url: str = "http://localhost:8000", timeout: float = 60.0) -> None:
|
| 14 |
+
self._client = httpx.Client(base_url=base_url.rstrip("/"), timeout=timeout)
|
| 15 |
+
|
| 16 |
+
def reset(self, task_id: str | None = None) -> RepoEditObservation:
|
| 17 |
+
params = {"task_id": task_id} if task_id else {}
|
| 18 |
+
r = self._client.post("/reset", params=params)
|
| 19 |
+
r.raise_for_status()
|
| 20 |
+
return RepoEditObservation.model_validate(r.json())
|
| 21 |
+
|
| 22 |
+
def step(self, action_dict: dict[str, Any]) -> dict[str, Any]:
|
| 23 |
+
r = self._client.post("/step", json=action_dict)
|
| 24 |
+
r.raise_for_status()
|
| 25 |
+
return r.json()
|
| 26 |
+
|
| 27 |
+
def state(self) -> RepoEditState:
|
| 28 |
+
r = self._client.get("/state")
|
| 29 |
+
r.raise_for_status()
|
| 30 |
+
return RepoEditState.model_validate(r.json())
|
| 31 |
+
|
| 32 |
+
def __enter__(self) -> "RepoEditEnv":
|
| 33 |
+
return self
|
| 34 |
+
|
| 35 |
+
def __exit__(self, *_: object) -> None:
|
| 36 |
+
self._client.close()
|
env/environment.py
ADDED
|
@@ -0,0 +1,467 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-turn repo-editing OpenEnv environment.
|
| 2 |
+
|
| 3 |
+
Episode flow
|
| 4 |
+
------------
|
| 5 |
+
reset() Parse the target repo into a KnowledgeGraph. Return an observation
|
| 6 |
+
containing the full graph overview and the task description.
|
| 7 |
+
|
| 8 |
+
step() The agent emits one RepoEditAction per turn:
|
| 9 |
+
- query → search results (information, no graph mutation)
|
| 10 |
+
- inspect → full node source (information)
|
| 11 |
+
- add_node → insert new function/class into the live graph
|
| 12 |
+
- update_node → replace a node's source in the live graph
|
| 13 |
+
- remove_node → delete a node
|
| 14 |
+
- submit → materialise all changes back to disk (temp), run tests,
|
| 15 |
+
compute reward, end episode
|
| 16 |
+
|
| 17 |
+
Reward structure (sparse — designed for long-horizon RL)
|
| 18 |
+
---------------------------------------------------------
|
| 19 |
+
Per-turn cost : -0.05 (forces efficiency)
|
| 20 |
+
Malformed action : -0.2
|
| 21 |
+
On submit
|
| 22 |
+
all tests pass : +1.0
|
| 23 |
+
partial pass : +0.5 * (n_pass / n_total)
|
| 24 |
+
compile error : 0.0
|
| 25 |
+
Episode cap hit : 0.0
|
| 26 |
+
|
| 27 |
+
This sparse reward deliberately requires the agent to plan, navigate, and
|
| 28 |
+
execute across many turns — it cannot succeed by guessing on the first turn.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
from __future__ import annotations
|
| 32 |
+
|
| 33 |
+
import ast
|
| 34 |
+
import json
|
| 35 |
+
import os
|
| 36 |
+
import re
|
| 37 |
+
import sys
|
| 38 |
+
import tempfile
|
| 39 |
+
import textwrap
|
| 40 |
+
import traceback
|
| 41 |
+
import uuid
|
| 42 |
+
from pathlib import Path
|
| 43 |
+
from typing import Any
|
| 44 |
+
|
| 45 |
+
from env.actions import (
|
| 46 |
+
AddNodeAction,
|
| 47 |
+
InspectAction,
|
| 48 |
+
QueryAction,
|
| 49 |
+
RemoveNodeAction,
|
| 50 |
+
RepoEditAction,
|
| 51 |
+
SubmitAction,
|
| 52 |
+
UpdateNodeAction,
|
| 53 |
+
parse_action,
|
| 54 |
+
)
|
| 55 |
+
from env.models import RepoEditObservation, RepoEditState
|
| 56 |
+
from env.tasks import SAMPLE_REPOS_DIR, TASK_BANK, RepoTask, all_task_ids, get_task
|
| 57 |
+
from graphforge.knowledge_graph import KGEdge, KGNode, KnowledgeGraph
|
| 58 |
+
from graphforge.repo_parser import parse_repo, _node_id
|
| 59 |
+
|
| 60 |
+
try:
|
| 61 |
+
from openenv.core import Environment # type: ignore
|
| 62 |
+
_HAS_OPENENV = True
|
| 63 |
+
except Exception:
|
| 64 |
+
_HAS_OPENENV = False
|
| 65 |
+
from typing import Generic, TypeVar
|
| 66 |
+
A = TypeVar("A")
|
| 67 |
+
O = TypeVar("O")
|
| 68 |
+
S = TypeVar("S")
|
| 69 |
+
|
| 70 |
+
class Environment(Generic[A, O, S]): # type: ignore[no-redef]
|
| 71 |
+
def reset(self) -> O: ...
|
| 72 |
+
def step(self, action: A) -> tuple[O, float, bool]: ...
|
| 73 |
+
def get_state(self) -> S: ...
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# ── constants ─────────────────────────────────────────────────────────────────
|
| 77 |
+
|
| 78 |
+
PER_TURN_COST = -0.05
|
| 79 |
+
MALFORMED_PENALTY = -0.2
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# ── materialiser (graph → disk) ───────────────────────────────────────────────
|
| 83 |
+
|
| 84 |
+
def _materialise_changes(
|
| 85 |
+
kg: KnowledgeGraph,
|
| 86 |
+
repo_src_path: Path,
|
| 87 |
+
tmp_dir: str,
|
| 88 |
+
) -> dict[str, str]:
|
| 89 |
+
"""Write mutated module sources to tmp_dir. Returns {rel_path: source}."""
|
| 90 |
+
files: dict[str, str] = {}
|
| 91 |
+
for node in kg.all_nodes("module"):
|
| 92 |
+
if not node.file_path:
|
| 93 |
+
continue
|
| 94 |
+
# Re-assemble module source from its children's current sources
|
| 95 |
+
# For simplicity: use the node.source field (which we keep in sync)
|
| 96 |
+
files[node.file_path] = node.source
|
| 97 |
+
dest = Path(tmp_dir) / node.file_path
|
| 98 |
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
| 99 |
+
dest.write_text(node.source, encoding="utf-8")
|
| 100 |
+
# Copy non-py files (like __init__.py markers) from original
|
| 101 |
+
for root, _, fnames in os.walk(str(repo_src_path)):
|
| 102 |
+
for fname in fnames:
|
| 103 |
+
if fname.endswith(".py"):
|
| 104 |
+
continue
|
| 105 |
+
src = Path(root) / fname
|
| 106 |
+
rel = src.relative_to(repo_src_path)
|
| 107 |
+
dst = Path(tmp_dir) / rel
|
| 108 |
+
dst.parent.mkdir(parents=True, exist_ok=True)
|
| 109 |
+
dst.write_bytes(src.read_bytes())
|
| 110 |
+
return files
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# ── code injection into module source ─────────────────────────────────────────
|
| 114 |
+
|
| 115 |
+
def _apply_add_node(
|
| 116 |
+
module_source: str,
|
| 117 |
+
code: str,
|
| 118 |
+
class_name: str | None = None,
|
| 119 |
+
) -> str:
|
| 120 |
+
"""Insert code into module_source.
|
| 121 |
+
|
| 122 |
+
If class_name is given, the code is indented and appended inside the class
|
| 123 |
+
body. Otherwise it is appended at module level.
|
| 124 |
+
"""
|
| 125 |
+
new_code = textwrap.dedent(code).strip()
|
| 126 |
+
if class_name is None:
|
| 127 |
+
return module_source.rstrip() + "\n\n\n" + new_code + "\n"
|
| 128 |
+
|
| 129 |
+
# Insert indented method just before the end of the class block
|
| 130 |
+
indented = "\n".join(" " + line for line in new_code.splitlines())
|
| 131 |
+
# Find the class definition via AST and splice
|
| 132 |
+
try:
|
| 133 |
+
tree = ast.parse(module_source)
|
| 134 |
+
lines = module_source.splitlines(keepends=True)
|
| 135 |
+
for node in tree.body:
|
| 136 |
+
if isinstance(node, ast.ClassDef) and node.name == class_name:
|
| 137 |
+
insert_at = node.end_lineno # 1-indexed, inclusive last line of class
|
| 138 |
+
before = "".join(lines[:insert_at])
|
| 139 |
+
after = "".join(lines[insert_at:])
|
| 140 |
+
return before.rstrip() + "\n\n" + indented + "\n" + after
|
| 141 |
+
except Exception:
|
| 142 |
+
pass
|
| 143 |
+
# Fallback: append at module level
|
| 144 |
+
return module_source.rstrip() + "\n\n\n" + indented + "\n"
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
def _apply_update_node(
|
| 148 |
+
module_source: str,
|
| 149 |
+
old_source: str,
|
| 150 |
+
new_code: str,
|
| 151 |
+
) -> str:
|
| 152 |
+
"""Replace old_source verbatim in module_source with new_code."""
|
| 153 |
+
new_code_clean = textwrap.dedent(new_code).strip()
|
| 154 |
+
if old_source in module_source:
|
| 155 |
+
return module_source.replace(old_source, new_code_clean, 1)
|
| 156 |
+
# Fallback: try stripping indentation differences
|
| 157 |
+
return module_source + "\n\n# PATCHED\n" + new_code_clean + "\n"
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def _apply_remove_node(module_source: str, old_source: str) -> str:
|
| 161 |
+
if old_source in module_source:
|
| 162 |
+
return module_source.replace(old_source, "", 1)
|
| 163 |
+
return module_source
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def _validate_python(source: str) -> tuple[bool, str]:
|
| 167 |
+
try:
|
| 168 |
+
ast.parse(source)
|
| 169 |
+
return True, ""
|
| 170 |
+
except SyntaxError as exc:
|
| 171 |
+
return False, str(exc)
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
# ── environment ───────────────────────────────────────────────────────────────
|
| 175 |
+
|
| 176 |
+
class RepoEditEnvironment(
|
| 177 |
+
Environment[RepoEditAction, RepoEditObservation, RepoEditState]
|
| 178 |
+
):
|
| 179 |
+
"""Multi-turn OpenEnv environment for repository-level code editing.
|
| 180 |
+
|
| 181 |
+
The agent receives a Knowledge Graph of a real Python repo and must
|
| 182 |
+
navigate it to find the right location, then apply the correct edit.
|
| 183 |
+
Reward is sparse: only granted on a passing submit().
|
| 184 |
+
"""
|
| 185 |
+
|
| 186 |
+
def __init__(self, task_id: str | None = None) -> None:
|
| 187 |
+
self._configured_task_id = task_id
|
| 188 |
+
self._task: RepoTask | None = None
|
| 189 |
+
self._kg: KnowledgeGraph | None = None
|
| 190 |
+
self._episode_id: str | None = None
|
| 191 |
+
self._turn: int = 0
|
| 192 |
+
self._done: bool = False
|
| 193 |
+
self._total_reward: float = 0.0
|
| 194 |
+
self._history: list[dict[str, Any]] = []
|
| 195 |
+
|
| 196 |
+
# ----- OpenEnv contract ---------------------------------------------------
|
| 197 |
+
|
| 198 |
+
def reset(self, task_id: str | None = None, task: Any = None) -> RepoEditObservation:
|
| 199 |
+
"""Reset the environment.
|
| 200 |
+
|
| 201 |
+
Pass either task_id (looks up TASK_BANK) or a task object directly
|
| 202 |
+
(supports AutoTask from graphforge.task_generator).
|
| 203 |
+
"""
|
| 204 |
+
if task is not None:
|
| 205 |
+
tid = task.task_id
|
| 206 |
+
else:
|
| 207 |
+
tid = task_id or self._configured_task_id or _pick_random_task()
|
| 208 |
+
task = TASK_BANK.get(tid)
|
| 209 |
+
if task is None:
|
| 210 |
+
raise ValueError(f"Unknown task_id: {tid!r}. Available: {all_task_ids()}")
|
| 211 |
+
|
| 212 |
+
# Resolve the repo path: use task.repo_path if set, else fall back to sample_repos/
|
| 213 |
+
if getattr(task, "repo_path", None):
|
| 214 |
+
repo_path = task.repo_path
|
| 215 |
+
else:
|
| 216 |
+
repo_path = str(SAMPLE_REPOS_DIR / task.repo_name)
|
| 217 |
+
|
| 218 |
+
self._task = task
|
| 219 |
+
self._kg = parse_repo(repo_path)
|
| 220 |
+
self._episode_id = str(uuid.uuid4())[:8]
|
| 221 |
+
self._turn = 0
|
| 222 |
+
self._done = False
|
| 223 |
+
self._total_reward = 0.0
|
| 224 |
+
self._history = []
|
| 225 |
+
|
| 226 |
+
return RepoEditObservation(
|
| 227 |
+
episode_id=self._episode_id,
|
| 228 |
+
task_id=tid,
|
| 229 |
+
turn=0,
|
| 230 |
+
max_turns=task.max_turns,
|
| 231 |
+
graph_overview=self._kg.overview(),
|
| 232 |
+
task_description=task.description,
|
| 233 |
+
action_result="Episode started. Use query/inspect to navigate, then add_node/update_node to edit, then submit.",
|
| 234 |
+
done=False,
|
| 235 |
+
)
|
| 236 |
+
|
| 237 |
+
def step(self, action: RepoEditAction) -> tuple[RepoEditObservation, float, bool]:
|
| 238 |
+
if self._task is None or self._kg is None:
|
| 239 |
+
raise RuntimeError("step() called before reset()")
|
| 240 |
+
if self._done:
|
| 241 |
+
return self._terminal_obs("Episode already done."), 0.0, True
|
| 242 |
+
|
| 243 |
+
self._turn += 1
|
| 244 |
+
turn_reward = PER_TURN_COST
|
| 245 |
+
|
| 246 |
+
# Dispatch
|
| 247 |
+
try:
|
| 248 |
+
result_text, extra_reward, done = self._dispatch(action)
|
| 249 |
+
turn_reward += extra_reward
|
| 250 |
+
except Exception as exc:
|
| 251 |
+
result_text = f"[ERROR] {exc}"
|
| 252 |
+
turn_reward += MALFORMED_PENALTY
|
| 253 |
+
done = False
|
| 254 |
+
|
| 255 |
+
self._total_reward += turn_reward
|
| 256 |
+
|
| 257 |
+
# Episode cap
|
| 258 |
+
if not done and self._turn >= self._task.max_turns:
|
| 259 |
+
done = True
|
| 260 |
+
result_text += f"\n[Episode cap reached: {self._task.max_turns} turns]"
|
| 261 |
+
|
| 262 |
+
self._done = done
|
| 263 |
+
self._history.append({
|
| 264 |
+
"turn": self._turn,
|
| 265 |
+
"action_kind": getattr(action, "kind", "unknown"),
|
| 266 |
+
"reward": turn_reward,
|
| 267 |
+
})
|
| 268 |
+
|
| 269 |
+
obs = RepoEditObservation(
|
| 270 |
+
episode_id=self._episode_id,
|
| 271 |
+
task_id=self._task.task_id,
|
| 272 |
+
turn=self._turn,
|
| 273 |
+
max_turns=self._task.max_turns,
|
| 274 |
+
graph_overview=self._kg.overview(),
|
| 275 |
+
task_description=self._task.description,
|
| 276 |
+
action_result=result_text,
|
| 277 |
+
turn_reward=turn_reward,
|
| 278 |
+
total_reward=self._total_reward,
|
| 279 |
+
done=done,
|
| 280 |
+
)
|
| 281 |
+
return obs, turn_reward, done
|
| 282 |
+
|
| 283 |
+
def get_state(self) -> RepoEditState:
|
| 284 |
+
return RepoEditState(
|
| 285 |
+
episode_id=self._episode_id,
|
| 286 |
+
task_id=self._task.task_id if self._task else None,
|
| 287 |
+
turn=self._turn,
|
| 288 |
+
done=self._done,
|
| 289 |
+
total_reward=self._total_reward,
|
| 290 |
+
)
|
| 291 |
+
|
| 292 |
+
@property
|
| 293 |
+
def state(self) -> RepoEditState:
|
| 294 |
+
return self.get_state()
|
| 295 |
+
|
| 296 |
+
# ----- action dispatch ----------------------------------------------------
|
| 297 |
+
|
| 298 |
+
def _dispatch(
|
| 299 |
+
self, action: RepoEditAction
|
| 300 |
+
) -> tuple[str, float, bool]:
|
| 301 |
+
"""Returns (result_text, extra_reward, done)."""
|
| 302 |
+
kg = self._kg
|
| 303 |
+
assert kg is not None
|
| 304 |
+
|
| 305 |
+
if isinstance(action, QueryAction):
|
| 306 |
+
nt = None if action.node_type == "all" else action.node_type
|
| 307 |
+
results = kg.search(action.keywords, node_type=nt)
|
| 308 |
+
if not results:
|
| 309 |
+
return f"No nodes found for query: {action.keywords!r}", 0.0, False
|
| 310 |
+
lines = [f"Found {len(results)} node(s) matching {action.keywords!r}:"]
|
| 311 |
+
for n in results[:10]:
|
| 312 |
+
lines.append(f" {n.node_id} ({n.file_path}:{n.line_start})")
|
| 313 |
+
return "\n".join(lines), 0.0, False
|
| 314 |
+
|
| 315 |
+
if isinstance(action, InspectAction):
|
| 316 |
+
detail = kg.node_detail(action.node_id)
|
| 317 |
+
return detail, 0.0, False
|
| 318 |
+
|
| 319 |
+
if isinstance(action, AddNodeAction):
|
| 320 |
+
parent = kg.get_node(action.parent_id)
|
| 321 |
+
if parent is None:
|
| 322 |
+
return f"[ERROR] parent_id {action.parent_id!r} not found.", MALFORMED_PENALTY, False
|
| 323 |
+
ok, err = _validate_python(action.code)
|
| 324 |
+
if not ok:
|
| 325 |
+
return f"[SYNTAX ERROR in your code] {err}", MALFORMED_PENALTY, False
|
| 326 |
+
|
| 327 |
+
# Append to parent module's source
|
| 328 |
+
module_node = _find_module_for(kg, action.parent_id)
|
| 329 |
+
if module_node is None:
|
| 330 |
+
return f"[ERROR] could not find module for parent {action.parent_id!r}", MALFORMED_PENALTY, False
|
| 331 |
+
|
| 332 |
+
parent_node = kg.get_node(action.parent_id)
|
| 333 |
+
class_name = parent_node.name if parent_node and parent_node.node_type == "class" else None
|
| 334 |
+
module_node.source = _apply_add_node(module_node.source, action.code, class_name=class_name)
|
| 335 |
+
|
| 336 |
+
# Register the new node in the KG
|
| 337 |
+
ntype = action.node_type if action.node_type in ("function", "class", "method") else "function"
|
| 338 |
+
new_id = _node_id(ntype, module_node.file_path, action.name)
|
| 339 |
+
new_node = KGNode(
|
| 340 |
+
node_id=new_id,
|
| 341 |
+
node_type=ntype,
|
| 342 |
+
name=action.name,
|
| 343 |
+
file_path=module_node.file_path,
|
| 344 |
+
line_start=module_node.line_end,
|
| 345 |
+
line_end=module_node.line_end + action.code.count("\n") + 1,
|
| 346 |
+
source=textwrap.dedent(action.code).strip(),
|
| 347 |
+
)
|
| 348 |
+
kg.insert_node(action.parent_id, new_node)
|
| 349 |
+
return f"Added {ntype} `{action.name}` to `{module_node.file_path}`.\nNew node_id: {new_id}", 0.0, False
|
| 350 |
+
|
| 351 |
+
if isinstance(action, UpdateNodeAction):
|
| 352 |
+
target = kg.get_node(action.node_id)
|
| 353 |
+
if target is None:
|
| 354 |
+
return f"[ERROR] node_id {action.node_id!r} not found.", MALFORMED_PENALTY, False
|
| 355 |
+
ok, err = _validate_python(action.new_code)
|
| 356 |
+
if not ok:
|
| 357 |
+
return f"[SYNTAX ERROR in your code] {err}", MALFORMED_PENALTY, False
|
| 358 |
+
|
| 359 |
+
module_node = _find_module_for(kg, action.node_id)
|
| 360 |
+
if module_node is None:
|
| 361 |
+
return f"[ERROR] could not find module for {action.node_id!r}", MALFORMED_PENALTY, False
|
| 362 |
+
|
| 363 |
+
old_source = target.source
|
| 364 |
+
module_node.source = _apply_update_node(module_node.source, old_source, action.new_code)
|
| 365 |
+
target.source = textwrap.dedent(action.new_code).strip()
|
| 366 |
+
return f"Updated `{action.node_id}`.", 0.0, False
|
| 367 |
+
|
| 368 |
+
if isinstance(action, RemoveNodeAction):
|
| 369 |
+
target = kg.get_node(action.node_id)
|
| 370 |
+
if target is None:
|
| 371 |
+
return f"[ERROR] node_id {action.node_id!r} not found.", MALFORMED_PENALTY, False
|
| 372 |
+
module_node = _find_module_for(kg, action.node_id)
|
| 373 |
+
if module_node:
|
| 374 |
+
module_node.source = _apply_remove_node(module_node.source, target.source)
|
| 375 |
+
kg.remove_node(action.node_id)
|
| 376 |
+
return f"Removed `{action.node_id}`.", 0.0, False
|
| 377 |
+
|
| 378 |
+
if isinstance(action, SubmitAction):
|
| 379 |
+
return self._run_submit()
|
| 380 |
+
|
| 381 |
+
return f"[ERROR] unrecognised action type: {type(action)}", MALFORMED_PENALTY, False
|
| 382 |
+
|
| 383 |
+
def _run_submit(self) -> tuple[str, float, bool]:
|
| 384 |
+
"""Write modified sources to a temp dir, run tests there, clean up."""
|
| 385 |
+
kg = self._kg
|
| 386 |
+
task = self._task
|
| 387 |
+
assert kg is not None and task is not None
|
| 388 |
+
|
| 389 |
+
reward, msg = _run_tests_in_tempdir(kg, task.test_code, task.repo_name)
|
| 390 |
+
return f"[SUBMIT RESULT]\n{msg}", reward, True
|
| 391 |
+
|
| 392 |
+
def _terminal_obs(self, msg: str) -> RepoEditObservation:
|
| 393 |
+
return RepoEditObservation(
|
| 394 |
+
episode_id=self._episode_id,
|
| 395 |
+
task_id=self._task.task_id if self._task else None,
|
| 396 |
+
turn=self._turn,
|
| 397 |
+
max_turns=self._task.max_turns if self._task else 0,
|
| 398 |
+
graph_overview="",
|
| 399 |
+
task_description="",
|
| 400 |
+
action_result=msg,
|
| 401 |
+
done=True,
|
| 402 |
+
total_reward=self._total_reward,
|
| 403 |
+
)
|
| 404 |
+
|
| 405 |
+
|
| 406 |
+
# ── helpers ───────────────────────────────────────────────────────────────────
|
| 407 |
+
|
| 408 |
+
def _find_module_for(kg: KnowledgeGraph, node_id: str) -> KGNode | None:
|
| 409 |
+
"""Walk up the parent chain until we hit a module node."""
|
| 410 |
+
current_id = node_id
|
| 411 |
+
seen: set[str] = set()
|
| 412 |
+
while current_id and current_id not in seen:
|
| 413 |
+
seen.add(current_id)
|
| 414 |
+
node = kg.get_node(current_id)
|
| 415 |
+
if node and node.node_type == "module":
|
| 416 |
+
return node
|
| 417 |
+
parent = kg.parent_of(current_id)
|
| 418 |
+
if parent is None:
|
| 419 |
+
break
|
| 420 |
+
current_id = parent.node_id
|
| 421 |
+
return None
|
| 422 |
+
|
| 423 |
+
|
| 424 |
+
def _run_tests_in_tempdir(
|
| 425 |
+
kg: KnowledgeGraph, test_code: str, pkg_name: str
|
| 426 |
+
) -> tuple[float, str]:
|
| 427 |
+
"""Write mutated module sources to a temp dir, import from there, run tests.
|
| 428 |
+
|
| 429 |
+
This works for ANY Python repo — no hardcoded package paths needed.
|
| 430 |
+
The test_code must use short imports: `from <pkg_name>.<module> import ...`
|
| 431 |
+
"""
|
| 432 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
| 433 |
+
pkg_dir = Path(tmpdir) / pkg_name
|
| 434 |
+
pkg_dir.mkdir(parents=True)
|
| 435 |
+
(pkg_dir / "__init__.py").write_text("")
|
| 436 |
+
|
| 437 |
+
# Write each module's current (potentially mutated) source
|
| 438 |
+
for node in kg.all_nodes("module"):
|
| 439 |
+
if not node.file_path or node.file_path == "__init__.py":
|
| 440 |
+
continue
|
| 441 |
+
dest = pkg_dir / node.file_path
|
| 442 |
+
dest.parent.mkdir(parents=True, exist_ok=True)
|
| 443 |
+
dest.write_text(node.source, encoding="utf-8")
|
| 444 |
+
|
| 445 |
+
# Remove any stale cached copies of this package
|
| 446 |
+
stale = [k for k in sys.modules if k == pkg_name or k.startswith(pkg_name + ".")]
|
| 447 |
+
for k in stale:
|
| 448 |
+
del sys.modules[k]
|
| 449 |
+
|
| 450 |
+
sys.path.insert(0, tmpdir)
|
| 451 |
+
try:
|
| 452 |
+
exec(compile(test_code, "<tests>", "exec"), {}) # noqa: S102
|
| 453 |
+
return 1.0, "✓ All tests passed!"
|
| 454 |
+
except AssertionError as exc:
|
| 455 |
+
return 0.0, f"✗ Test failed: {exc}"
|
| 456 |
+
except Exception:
|
| 457 |
+
return 0.0, f"✗ Exception during tests:\n{traceback.format_exc(limit=5)}"
|
| 458 |
+
finally:
|
| 459 |
+
sys.path.remove(tmpdir)
|
| 460 |
+
stale = [k for k in sys.modules if k == pkg_name or k.startswith(pkg_name + ".")]
|
| 461 |
+
for k in stale:
|
| 462 |
+
del sys.modules[k]
|
| 463 |
+
|
| 464 |
+
|
| 465 |
+
def _pick_random_task() -> str:
|
| 466 |
+
import random
|
| 467 |
+
return random.choice(all_task_ids())
|
env/models.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Pydantic wire models for the multi-turn repo-editing environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any, Optional
|
| 6 |
+
|
| 7 |
+
from pydantic import BaseModel, ConfigDict, Field
|
| 8 |
+
|
| 9 |
+
_cfg = ConfigDict(extra="ignore")
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
class RepoEditObservation(BaseModel):
|
| 13 |
+
"""What the env returns after reset() or step().
|
| 14 |
+
|
| 15 |
+
Contains the current graph overview + the result of the last action.
|
| 16 |
+
The agent should read action_result carefully before deciding the next step.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
model_config = _cfg
|
| 20 |
+
|
| 21 |
+
episode_id: Optional[str] = None
|
| 22 |
+
task_id: Optional[str] = None
|
| 23 |
+
turn: int = 0
|
| 24 |
+
max_turns: int = 15
|
| 25 |
+
|
| 26 |
+
graph_overview: str = "" # compact text view of the entire repo KG
|
| 27 |
+
task_description: str = "" # what the agent needs to accomplish
|
| 28 |
+
action_result: str = "" # feedback from the last action
|
| 29 |
+
|
| 30 |
+
turn_reward: float = 0.0
|
| 31 |
+
total_reward: float = 0.0
|
| 32 |
+
done: bool = False
|
| 33 |
+
|
| 34 |
+
info: dict[str, Any] = Field(default_factory=dict)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
class RepoEditState(BaseModel):
|
| 38 |
+
"""Episode-level state snapshot."""
|
| 39 |
+
|
| 40 |
+
model_config = _cfg
|
| 41 |
+
|
| 42 |
+
episode_id: Optional[str] = None
|
| 43 |
+
task_id: Optional[str] = None
|
| 44 |
+
turn: int = 0
|
| 45 |
+
done: bool = False
|
| 46 |
+
total_reward: float = 0.0
|
env/server.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI server for the multi-turn repo-editing environment."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
from fastapi import FastAPI, HTTPException
|
| 8 |
+
|
| 9 |
+
from env.actions import RepoEditAction, parse_action
|
| 10 |
+
from env.environment import RepoEditEnvironment
|
| 11 |
+
from env.models import RepoEditObservation, RepoEditState
|
| 12 |
+
|
| 13 |
+
_env = RepoEditEnvironment()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def _make_app() -> FastAPI:
|
| 17 |
+
app = FastAPI(title="Repo-Edit OpenEnv", version="0.3.0")
|
| 18 |
+
|
| 19 |
+
@app.post("/reset", response_model=RepoEditObservation)
|
| 20 |
+
def reset(task_id: str | None = None) -> RepoEditObservation:
|
| 21 |
+
return _env.reset(task_id=task_id)
|
| 22 |
+
|
| 23 |
+
@app.post("/step")
|
| 24 |
+
def step(action_dict: dict[str, Any]) -> dict[str, Any]:
|
| 25 |
+
try:
|
| 26 |
+
action = parse_action(action_dict)
|
| 27 |
+
obs, reward, done = _env.step(action)
|
| 28 |
+
except (ValueError, RuntimeError) as exc:
|
| 29 |
+
raise HTTPException(status_code=400, detail=str(exc)) from exc
|
| 30 |
+
return {"observation": obs.model_dump(), "reward": reward, "done": done}
|
| 31 |
+
|
| 32 |
+
@app.get("/state", response_model=RepoEditState)
|
| 33 |
+
def state() -> RepoEditState:
|
| 34 |
+
return _env.get_state()
|
| 35 |
+
|
| 36 |
+
@app.get("/healthz")
|
| 37 |
+
def healthz() -> dict[str, Any]:
|
| 38 |
+
return {"status": "ok"}
|
| 39 |
+
|
| 40 |
+
return app
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
app = _make_app()
|
| 44 |
+
__all__ = ["app"]
|
env/tasks.py
ADDED
|
@@ -0,0 +1,363 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-turn repo-editing tasks.
|
| 2 |
+
|
| 3 |
+
Each Task specifies:
|
| 4 |
+
- A target repo to work on (points to a sample_repos/ subdir)
|
| 5 |
+
- A natural-language description of the change to make
|
| 6 |
+
- A set of test functions (Python code strings) that verify the change
|
| 7 |
+
- The maximum number of turns allowed
|
| 8 |
+
|
| 9 |
+
Training tasks are deliberately structured to require multi-step navigation:
|
| 10 |
+
1. The agent must QUERY the graph to find relevant nodes
|
| 11 |
+
2. INSPECT nodes to understand the existing code
|
| 12 |
+
3. ADD or UPDATE nodes to implement the change
|
| 13 |
+
4. SUBMIT to trigger compilation + test execution
|
| 14 |
+
|
| 15 |
+
This sparse reward structure forces the agent to develop structured planning
|
| 16 |
+
and state tracking across long trajectories — the core theme of this project.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
|
| 21 |
+
import importlib.util
|
| 22 |
+
import sys
|
| 23 |
+
import textwrap
|
| 24 |
+
import traceback
|
| 25 |
+
from dataclasses import dataclass, field
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
SAMPLE_REPOS_DIR = Path(__file__).resolve().parent.parent / "graphforge" / "sample_repos"
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
@dataclass
|
| 33 |
+
class RepoTask:
|
| 34 |
+
task_id: str
|
| 35 |
+
repo_name: str # package name (used as tempdir subdir)
|
| 36 |
+
description: str # natural-language task for the agent
|
| 37 |
+
test_code: str # Python assertions using short imports
|
| 38 |
+
max_turns: int = 15
|
| 39 |
+
difficulty: int = 0 # 0=easy, 1=medium, 2=hard
|
| 40 |
+
hints: list[str] = field(default_factory=list)
|
| 41 |
+
repo_path: str | None = None # if set, full path to repo source dir
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
TASK_BANK: dict[str, RepoTask] = {}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _reg(task: RepoTask) -> RepoTask:
|
| 48 |
+
TASK_BANK[task.task_id] = task
|
| 49 |
+
return task
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
# ── Task 0: add validate_due_date ────────────────────────────────────────────
|
| 53 |
+
|
| 54 |
+
_reg(RepoTask(
|
| 55 |
+
task_id="t0.validate_due_date",
|
| 56 |
+
repo_name="task_manager",
|
| 57 |
+
description=textwrap.dedent("""\
|
| 58 |
+
Add a function `validate_due_date(due_date) -> bool` to `validators.py`.
|
| 59 |
+
|
| 60 |
+
The function should return True if:
|
| 61 |
+
- due_date is None (no deadline), OR
|
| 62 |
+
- due_date is a datetime.date instance
|
| 63 |
+
|
| 64 |
+
It should return False for any other type (strings, integers, etc.).
|
| 65 |
+
""").strip(),
|
| 66 |
+
test_code=textwrap.dedent("""\
|
| 67 |
+
from datetime import date
|
| 68 |
+
from task_manager.validators import validate_due_date
|
| 69 |
+
assert validate_due_date(None) is True, "None is valid (no deadline)"
|
| 70 |
+
assert validate_due_date(date(2025, 1, 1)) is True, "date object is valid"
|
| 71 |
+
assert validate_due_date("2025-01-01") is False, "string is not valid"
|
| 72 |
+
assert validate_due_date(20250101) is False, "int is not valid"
|
| 73 |
+
assert validate_due_date([]) is False, "list is not valid"
|
| 74 |
+
""").strip(),
|
| 75 |
+
max_turns=12,
|
| 76 |
+
hints=[
|
| 77 |
+
"Look in validators.py to see the style of existing validators.",
|
| 78 |
+
"The function signature should be: def validate_due_date(due_date) -> bool",
|
| 79 |
+
"Import datetime.date inside the function or at the top of validators.py.",
|
| 80 |
+
],
|
| 81 |
+
))
|
| 82 |
+
|
| 83 |
+
# ── Task 1: add Task.is_overdue ───────────────────────────────────────────────
|
| 84 |
+
|
| 85 |
+
_reg(RepoTask(
|
| 86 |
+
task_id="t1.is_overdue",
|
| 87 |
+
repo_name="task_manager",
|
| 88 |
+
description=textwrap.dedent("""\
|
| 89 |
+
Add a method `is_overdue(self, today: date) -> bool` to the `Task`
|
| 90 |
+
class in `models.py`.
|
| 91 |
+
|
| 92 |
+
The method should return True if:
|
| 93 |
+
- the task has a due_date AND
|
| 94 |
+
- today is strictly after the due_date AND
|
| 95 |
+
- the task is not yet done
|
| 96 |
+
|
| 97 |
+
It should return False if there is no due_date, or if the task is done,
|
| 98 |
+
or if today <= due_date.
|
| 99 |
+
""").strip(),
|
| 100 |
+
test_code=textwrap.dedent("""\
|
| 101 |
+
from datetime import date
|
| 102 |
+
from task_manager.models import Task
|
| 103 |
+
|
| 104 |
+
t_past = Task("x", "low", [], due_date=date(2020, 1, 1))
|
| 105 |
+
t_future = Task("y", "low", [], due_date=date(2099, 1, 1))
|
| 106 |
+
t_none = Task("z", "low", [], due_date=None)
|
| 107 |
+
t_done = Task("d", "low", [], due_date=date(2020, 1, 1))
|
| 108 |
+
t_done.complete()
|
| 109 |
+
|
| 110 |
+
today = date.today()
|
| 111 |
+
assert t_past.is_overdue(today) is True, "past due date → overdue"
|
| 112 |
+
assert t_future.is_overdue(today) is False, "future due date → not overdue"
|
| 113 |
+
assert t_none.is_overdue(today) is False, "no due date → not overdue"
|
| 114 |
+
assert t_done.is_overdue(today) is False, "done task → not overdue"
|
| 115 |
+
""").strip(),
|
| 116 |
+
max_turns=15,
|
| 117 |
+
difficulty=1,
|
| 118 |
+
hints=[
|
| 119 |
+
"The Task class is in models.py.",
|
| 120 |
+
"The method should check self.due_date, today, and self.done.",
|
| 121 |
+
],
|
| 122 |
+
))
|
| 123 |
+
|
| 124 |
+
# ── Task 2: add TaskStore.find_by_tag ─────────────────────────────────────────
|
| 125 |
+
|
| 126 |
+
_reg(RepoTask(
|
| 127 |
+
task_id="t2.find_by_tag",
|
| 128 |
+
repo_name="task_manager",
|
| 129 |
+
description=textwrap.dedent("""\
|
| 130 |
+
Add a method `find_by_tag(self, tag: str) -> list[Task]` to the
|
| 131 |
+
`TaskStore` class in `storage.py`.
|
| 132 |
+
|
| 133 |
+
The method should return a list of all tasks that have `tag` in
|
| 134 |
+
their `tags` list. Return an empty list if no tasks match.
|
| 135 |
+
""").strip(),
|
| 136 |
+
test_code=textwrap.dedent("""\
|
| 137 |
+
from task_manager.models import Task
|
| 138 |
+
from task_manager.storage import TaskStore
|
| 139 |
+
|
| 140 |
+
store = TaskStore()
|
| 141 |
+
store.add(Task("t1", "high", ["python", "backend"], None))
|
| 142 |
+
store.add(Task("t2", "low", ["frontend"], None))
|
| 143 |
+
store.add(Task("t3", "medium", ["python"], None))
|
| 144 |
+
|
| 145 |
+
result = store.find_by_tag("python")
|
| 146 |
+
assert len(result) == 2, f"Expected 2, got {len(result)}"
|
| 147 |
+
titles = {t.title for t in result}
|
| 148 |
+
assert titles == {"t1", "t3"}, f"Wrong titles: {titles}"
|
| 149 |
+
|
| 150 |
+
empty = store.find_by_tag("devops")
|
| 151 |
+
assert empty == [], f"Expected [], got {empty}"
|
| 152 |
+
""").strip(),
|
| 153 |
+
max_turns=15,
|
| 154 |
+
difficulty=1,
|
| 155 |
+
))
|
| 156 |
+
|
| 157 |
+
# ── Task 3 (hard): enforce priority validation in api.create_task ─────────────
|
| 158 |
+
|
| 159 |
+
_reg(RepoTask(
|
| 160 |
+
task_id="t3.enforce_priority",
|
| 161 |
+
repo_name="task_manager",
|
| 162 |
+
description=textwrap.dedent("""\
|
| 163 |
+
Update the `create_task` function in `api.py` so that it validates
|
| 164 |
+
the `priority` argument using `validate_priority` from `validators.py`.
|
| 165 |
+
|
| 166 |
+
If the priority is invalid, raise `ValueError` with a clear message.
|
| 167 |
+
The existing validations for title and tags must still work.
|
| 168 |
+
|
| 169 |
+
Note: `validate_priority` already exists in validators.py.
|
| 170 |
+
You must import and call it inside `create_task`.
|
| 171 |
+
""").strip(),
|
| 172 |
+
test_code=textwrap.dedent("""\
|
| 173 |
+
from task_manager import api as _api
|
| 174 |
+
_api.reset_store() # clean state between runs
|
| 175 |
+
|
| 176 |
+
# valid priority passes through
|
| 177 |
+
t = _api.create_task("Buy milk", priority="high")
|
| 178 |
+
assert t.priority == "high"
|
| 179 |
+
|
| 180 |
+
# invalid priority raises ValueError
|
| 181 |
+
raised = False
|
| 182 |
+
try:
|
| 183 |
+
_api.create_task("Bad task", priority="urgent")
|
| 184 |
+
except ValueError:
|
| 185 |
+
raised = True
|
| 186 |
+
assert raised, "create_task should raise ValueError for invalid priority"
|
| 187 |
+
|
| 188 |
+
# title validation still works
|
| 189 |
+
raised2 = False
|
| 190 |
+
try:
|
| 191 |
+
_api.create_task("", priority="low")
|
| 192 |
+
except ValueError:
|
| 193 |
+
raised2 = True
|
| 194 |
+
assert raised2, "create_task should still reject empty title"
|
| 195 |
+
""").strip(),
|
| 196 |
+
max_turns=18,
|
| 197 |
+
difficulty=2,
|
| 198 |
+
hints=[
|
| 199 |
+
"api.py already imports validate_title and validate_tags from validators.",
|
| 200 |
+
"You need to also import validate_priority and call it in create_task.",
|
| 201 |
+
],
|
| 202 |
+
))
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
# ── Humanize tasks (real-world library) ──────────────────────────────────────
|
| 206 |
+
|
| 207 |
+
_reg(RepoTask(
|
| 208 |
+
task_id="t4.intpercent",
|
| 209 |
+
repo_name="humanize",
|
| 210 |
+
description=textwrap.dedent("""\
|
| 211 |
+
Add a function `intpercent(value: float, decimal_places: int = 1) -> str`
|
| 212 |
+
to `number.py`.
|
| 213 |
+
|
| 214 |
+
The function should convert a fraction to a percentage string:
|
| 215 |
+
0.0 → "0.0%"
|
| 216 |
+
0.5 → "50.0%"
|
| 217 |
+
0.753 → "75.3%"
|
| 218 |
+
1.0 → "100.0%"
|
| 219 |
+
|
| 220 |
+
Use `decimal_places` to control how many digits appear after the decimal.
|
| 221 |
+
If decimal_places=0, return an integer percentage with no decimal point.
|
| 222 |
+
""").strip(),
|
| 223 |
+
test_code=textwrap.dedent("""\
|
| 224 |
+
from humanize.number import intpercent
|
| 225 |
+
assert intpercent(0.0) == "0.0%", f"got {intpercent(0.0)!r}"
|
| 226 |
+
assert intpercent(0.5) == "50.0%", f"got {intpercent(0.5)!r}"
|
| 227 |
+
assert intpercent(0.753) == "75.3%", f"got {intpercent(0.753)!r}"
|
| 228 |
+
assert intpercent(1.0) == "100.0%", f"got {intpercent(1.0)!r}"
|
| 229 |
+
assert intpercent(0.5, decimal_places=0) == "50%", f"got {intpercent(0.5, decimal_places=0)!r}"
|
| 230 |
+
""").strip(),
|
| 231 |
+
max_turns=12,
|
| 232 |
+
difficulty=0,
|
| 233 |
+
hints=[
|
| 234 |
+
"Look at number.py — the existing functions show the style to follow.",
|
| 235 |
+
"Use f-string formatting: f'{value * 100:.{decimal_places}f}%'",
|
| 236 |
+
],
|
| 237 |
+
))
|
| 238 |
+
|
| 239 |
+
_reg(RepoTask(
|
| 240 |
+
task_id="t5.naturalfilecount",
|
| 241 |
+
repo_name="humanize",
|
| 242 |
+
description=textwrap.dedent("""\
|
| 243 |
+
Add a function `naturalfilecount(n: int) -> str` to `filesize.py`.
|
| 244 |
+
|
| 245 |
+
The function should return a human-readable file count:
|
| 246 |
+
0 → "no files"
|
| 247 |
+
1 → "1 file"
|
| 248 |
+
2 → "2 files"
|
| 249 |
+
99 → "99 files"
|
| 250 |
+
""").strip(),
|
| 251 |
+
test_code=textwrap.dedent("""\
|
| 252 |
+
from humanize.filesize import naturalfilecount
|
| 253 |
+
assert naturalfilecount(0) == "no files", f"got {naturalfilecount(0)!r}"
|
| 254 |
+
assert naturalfilecount(1) == "1 file", f"got {naturalfilecount(1)!r}"
|
| 255 |
+
assert naturalfilecount(2) == "2 files", f"got {naturalfilecount(2)!r}"
|
| 256 |
+
assert naturalfilecount(99) == "99 files", f"got {naturalfilecount(99)!r}"
|
| 257 |
+
""").strip(),
|
| 258 |
+
max_turns=12,
|
| 259 |
+
difficulty=0,
|
| 260 |
+
hints=[
|
| 261 |
+
"Look at filesize.py — naturalsize is the only function there.",
|
| 262 |
+
"This is a short function: handle n==0, n==1, and n>1 as three cases.",
|
| 263 |
+
],
|
| 264 |
+
))
|
| 265 |
+
|
| 266 |
+
_reg(RepoTask(
|
| 267 |
+
task_id="t6.metric",
|
| 268 |
+
repo_name="humanize",
|
| 269 |
+
description=textwrap.dedent("""\
|
| 270 |
+
Add a function `metric(value: float, unit: str = "") -> str` to `number.py`.
|
| 271 |
+
|
| 272 |
+
The function should format a number using SI metric prefixes:
|
| 273 |
+
1_500_000 → "1.5 M"
|
| 274 |
+
2_000 → "2.0 k"
|
| 275 |
+
500 → "500" (no prefix below 1000)
|
| 276 |
+
|
| 277 |
+
Supported prefixes (largest to smallest): T (10¹²), G (10⁹), M (10⁶), k (10³).
|
| 278 |
+
If a unit is provided, append it after the prefix: metric(1500, "Hz") → "1.5 kHz".
|
| 279 |
+
Always format the scaled number to 1 decimal place.
|
| 280 |
+
""").strip(),
|
| 281 |
+
test_code=textwrap.dedent("""\
|
| 282 |
+
from humanize.number import metric
|
| 283 |
+
assert metric(1_500_000) == "1.5 M", f"got {metric(1_500_000)!r}"
|
| 284 |
+
assert metric(2_000) == "2.0 k", f"got {metric(2_000)!r}"
|
| 285 |
+
assert metric(500) == "500", f"got {metric(500)!r}"
|
| 286 |
+
assert metric(1_500, "Hz") == "1.5 kHz", f"got {metric(1_500, 'Hz')!r}"
|
| 287 |
+
assert metric(2e9, "W") == "2.0 GW", f"got {metric(2e9, 'W')!r}"
|
| 288 |
+
""").strip(),
|
| 289 |
+
max_turns=15,
|
| 290 |
+
difficulty=1,
|
| 291 |
+
hints=[
|
| 292 |
+
"Loop through prefixes from largest to smallest: (1e12,'T'), (1e9,'G'), (1e6,'M'), (1e3,'k').",
|
| 293 |
+
"If abs(value) >= threshold, scale and format; otherwise return str(int(value)).",
|
| 294 |
+
],
|
| 295 |
+
))
|
| 296 |
+
|
| 297 |
+
_reg(RepoTask(
|
| 298 |
+
task_id="t7.age",
|
| 299 |
+
repo_name="humanize",
|
| 300 |
+
description=textwrap.dedent("""\
|
| 301 |
+
Add a function `age(birth_date) -> str` to `time.py`.
|
| 302 |
+
|
| 303 |
+
The function receives a `datetime.date` and returns a human-readable age:
|
| 304 |
+
- If the person is under 1 year old, return "X months old" (use 30-day months).
|
| 305 |
+
- If exactly 1 year, return "1 year old".
|
| 306 |
+
- Otherwise return "X years old".
|
| 307 |
+
|
| 308 |
+
Use `datetime.date.today()` as the reference point.
|
| 309 |
+
Assume birth_date is always a valid date in the past.
|
| 310 |
+
""").strip(),
|
| 311 |
+
test_code=textwrap.dedent("""\
|
| 312 |
+
import datetime as dt
|
| 313 |
+
from humanize.time import age
|
| 314 |
+
|
| 315 |
+
today = dt.date.today()
|
| 316 |
+
dob_25y = today.replace(year=today.year - 25)
|
| 317 |
+
dob_1y = today.replace(year=today.year - 1)
|
| 318 |
+
dob_6m = today - dt.timedelta(days=182)
|
| 319 |
+
dob_2m = today - dt.timedelta(days=61)
|
| 320 |
+
|
| 321 |
+
assert age(dob_25y) == "25 years old", f"got {age(dob_25y)!r}"
|
| 322 |
+
assert age(dob_1y) == "1 year old", f"got {age(dob_1y)!r}"
|
| 323 |
+
assert age(dob_6m) == "6 months old", f"got {age(dob_6m)!r}"
|
| 324 |
+
assert age(dob_2m) == "2 months old", f"got {age(dob_2m)!r}"
|
| 325 |
+
""").strip(),
|
| 326 |
+
max_turns=15,
|
| 327 |
+
difficulty=1,
|
| 328 |
+
hints=[
|
| 329 |
+
"import datetime as dt is already at the top of time.py.",
|
| 330 |
+
"days = (dt.date.today() - birth_date).days; years = days // 365; months = days // 30",
|
| 331 |
+
],
|
| 332 |
+
))
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
# ── test runner ───────────────────────────────────────────────────────────────
|
| 336 |
+
|
| 337 |
+
def run_tests(task: RepoTask) -> tuple[bool, str]:
|
| 338 |
+
"""Execute task.test_code and return (passed, message)."""
|
| 339 |
+
# Reload all task_manager modules to pick up any source-level changes
|
| 340 |
+
_reload_task_manager()
|
| 341 |
+
try:
|
| 342 |
+
exec(compile(task.test_code, "<test>", "exec"), {}) # noqa: S102
|
| 343 |
+
return True, "All assertions passed."
|
| 344 |
+
except AssertionError as exc:
|
| 345 |
+
return False, f"AssertionError: {exc}"
|
| 346 |
+
except Exception:
|
| 347 |
+
return False, traceback.format_exc(limit=5)
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
def _reload_task_manager() -> None:
|
| 351 |
+
"""Force-reload all task_manager submodules so edits take effect."""
|
| 352 |
+
prefix = "graphforge.sample_repos.task_manager"
|
| 353 |
+
to_reload = [k for k in sys.modules if k.startswith(prefix)]
|
| 354 |
+
for mod_name in to_reload:
|
| 355 |
+
del sys.modules[mod_name]
|
| 356 |
+
|
| 357 |
+
|
| 358 |
+
def all_task_ids() -> list[str]:
|
| 359 |
+
return list(TASK_BANK.keys())
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
def get_task(task_id: str) -> RepoTask | None:
|
| 363 |
+
return TASK_BANK.get(task_id)
|
graphforge/__init__.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GraphForge — graph-first code generation environment for long-horizon RL.
|
| 2 |
+
|
| 3 |
+
The agent constructs Python programs by mutating a typed function-call graph;
|
| 4 |
+
source files are a deterministic projection of the canonical graph.
|
| 5 |
+
|
| 6 |
+
Top-level subsystems:
|
| 7 |
+
graph canonical graph schema (Modules, Nodes, Edges)
|
| 8 |
+
actions eleven-action surface, atomic dispatcher with rollback
|
| 9 |
+
types signature parser + edge type-flow validator
|
| 10 |
+
templates ~25-template body library, parameterized
|
| 11 |
+
materializer graph -> Python source
|
| 12 |
+
parser Python source -> graph (round-trip)
|
| 13 |
+
validator parse / import / mypy --strict gate
|
| 14 |
+
behavioral hypothesis-based property test runner
|
| 15 |
+
constraints per-kind constraint checker dispatch
|
| 16 |
+
reward reward engine (per-turn + terminal)
|
| 17 |
+
tasks task bank + variant generator
|
| 18 |
+
server FastAPI OpenEnv server
|
| 19 |
+
training GRPO multi-turn rollout
|
| 20 |
+
|
| 21 |
+
See README.md for design rationale and PROPOSAL.md for the full spec.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
__version__ = "0.0.1"
|
graphforge/actions/__init__.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Action surface for GraphForge.
|
| 2 |
+
|
| 3 |
+
Public API:
|
| 4 |
+
|
| 5 |
+
from graphforge.actions import dispatch, ActionResult
|
| 6 |
+
from graphforge.actions.schema import Action, AddNode, ...
|
| 7 |
+
from graphforge.actions.errors import ActionError
|
| 8 |
+
|
| 9 |
+
See PROPOSAL.md §4 for the full action vocabulary.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from graphforge.actions.dispatcher import ActionResult, dispatch
|
| 13 |
+
from graphforge.actions.errors import ActionError
|
| 14 |
+
|
| 15 |
+
__all__ = ["ActionError", "ActionResult", "dispatch"]
|
graphforge/actions/dispatcher.py
ADDED
|
@@ -0,0 +1,442 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Atomic action dispatcher.
|
| 2 |
+
|
| 3 |
+
Applies an :class:`Action` to a :class:`Graph`. Every mutation is atomic:
|
| 4 |
+
the dispatcher snapshots the graph before the handler runs and restores it on
|
| 5 |
+
any failure. Failures surface as :class:`ActionError` with a stable code, never
|
| 6 |
+
as silent partial state.
|
| 7 |
+
|
| 8 |
+
Information actions (query_*, materialize_*, run_*) are routed but their
|
| 9 |
+
implementations live in their respective subsystems and are stubbed for now.
|
| 10 |
+
``submit`` returns a sentinel so the episode runner can recognize termination.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
from dataclasses import dataclass
|
| 16 |
+
from typing import Any
|
| 17 |
+
|
| 18 |
+
from graphforge.actions import errors as E
|
| 19 |
+
from graphforge.actions.schema import (
|
| 20 |
+
Action,
|
| 21 |
+
AddEdge,
|
| 22 |
+
AddModule,
|
| 23 |
+
AddNode,
|
| 24 |
+
AttachBody,
|
| 25 |
+
MaterializeAndValidate,
|
| 26 |
+
QuerySpec,
|
| 27 |
+
QuerySubgraph,
|
| 28 |
+
QueryTypes,
|
| 29 |
+
RemoveEdge,
|
| 30 |
+
RemoveModule,
|
| 31 |
+
RemoveNode,
|
| 32 |
+
RunBehavioralTests,
|
| 33 |
+
SetNodeModule,
|
| 34 |
+
Submit,
|
| 35 |
+
)
|
| 36 |
+
from graphforge.actions.signature import parse_signature
|
| 37 |
+
from graphforge.graph.schema import (
|
| 38 |
+
ArgMapping,
|
| 39 |
+
Edge,
|
| 40 |
+
Graph,
|
| 41 |
+
Module,
|
| 42 |
+
Node,
|
| 43 |
+
)
|
| 44 |
+
from graphforge.templates import get_template, validate_args
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
# ---- result envelope -------------------------------------------------
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
@dataclass
|
| 51 |
+
class ActionResult:
|
| 52 |
+
"""Envelope returned by :func:`dispatch`."""
|
| 53 |
+
|
| 54 |
+
ok: bool
|
| 55 |
+
payload: dict[str, Any]
|
| 56 |
+
terminal: bool = False
|
| 57 |
+
|
| 58 |
+
@classmethod
|
| 59 |
+
def success(cls, **payload: Any) -> "ActionResult":
|
| 60 |
+
return cls(ok=True, payload=payload, terminal=False)
|
| 61 |
+
|
| 62 |
+
@classmethod
|
| 63 |
+
def failure(cls, err: E.ActionError) -> "ActionResult":
|
| 64 |
+
return cls(ok=False, payload=err.to_dict(), terminal=False)
|
| 65 |
+
|
| 66 |
+
@classmethod
|
| 67 |
+
def terminate(cls, **payload: Any) -> "ActionResult":
|
| 68 |
+
return cls(ok=True, payload=payload, terminal=True)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# ---- dispatcher ------------------------------------------------------
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def dispatch(graph: Graph, action: Action) -> ActionResult:
|
| 75 |
+
"""Apply ``action`` to ``graph`` in place. Atomic on failure.
|
| 76 |
+
|
| 77 |
+
On any handler exception (including :class:`ActionError`) the graph is
|
| 78 |
+
rolled back to the pre-call snapshot.
|
| 79 |
+
"""
|
| 80 |
+
snap = graph.snapshot()
|
| 81 |
+
try:
|
| 82 |
+
return _route(graph, action)
|
| 83 |
+
except E.ActionError as err:
|
| 84 |
+
_restore(graph, snap)
|
| 85 |
+
return ActionResult.failure(err)
|
| 86 |
+
except Exception as exc: # pragma: no cover — unexpected handler bug
|
| 87 |
+
_restore(graph, snap)
|
| 88 |
+
return ActionResult.failure(
|
| 89 |
+
E.ActionError(E.SCHEMA_REJECTION, f"unhandled: {exc}")
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def _restore(graph: Graph, snap: Graph) -> None:
|
| 94 |
+
graph.modules = snap.modules
|
| 95 |
+
graph.nodes = snap.nodes
|
| 96 |
+
graph.edges = snap.edges
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def _route(graph: Graph, action: Action) -> ActionResult:
|
| 100 |
+
# Mutations
|
| 101 |
+
if isinstance(action, AddModule):
|
| 102 |
+
return _h_add_module(graph, action)
|
| 103 |
+
if isinstance(action, RemoveModule):
|
| 104 |
+
return _h_remove_module(graph, action)
|
| 105 |
+
if isinstance(action, AddNode):
|
| 106 |
+
return _h_add_node(graph, action)
|
| 107 |
+
if isinstance(action, RemoveNode):
|
| 108 |
+
return _h_remove_node(graph, action)
|
| 109 |
+
if isinstance(action, SetNodeModule):
|
| 110 |
+
return _h_set_node_module(graph, action)
|
| 111 |
+
if isinstance(action, AttachBody):
|
| 112 |
+
return _h_attach_body(graph, action)
|
| 113 |
+
if isinstance(action, AddEdge):
|
| 114 |
+
return _h_add_edge(graph, action)
|
| 115 |
+
if isinstance(action, RemoveEdge):
|
| 116 |
+
return _h_remove_edge(graph, action)
|
| 117 |
+
# Information (delegated; stubs for now)
|
| 118 |
+
if isinstance(action, QuerySpec):
|
| 119 |
+
return _h_query_spec(graph, action)
|
| 120 |
+
if isinstance(action, QuerySubgraph):
|
| 121 |
+
return _h_query_subgraph(graph, action)
|
| 122 |
+
if isinstance(action, QueryTypes):
|
| 123 |
+
return _h_query_types(graph, action)
|
| 124 |
+
if isinstance(action, MaterializeAndValidate):
|
| 125 |
+
return _h_materialize(graph, action)
|
| 126 |
+
if isinstance(action, RunBehavioralTests):
|
| 127 |
+
return _h_run_tests(graph, action)
|
| 128 |
+
if isinstance(action, Submit):
|
| 129 |
+
return _h_submit(graph, action)
|
| 130 |
+
raise E.ActionError(E.SCHEMA_REJECTION, f"unknown action: {type(action).__name__}")
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# ---- mutation handlers ----------------------------------------------
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def _h_add_module(graph: Graph, a: AddModule) -> ActionResult:
|
| 137 |
+
if graph.find_module(a.name) is not None:
|
| 138 |
+
raise E.ActionError(
|
| 139 |
+
E.NAME_COLLISION, f"module {a.name!r} already exists", name=a.name
|
| 140 |
+
)
|
| 141 |
+
graph.modules.append(Module(name=a.name, responsibility=a.responsibility))
|
| 142 |
+
return ActionResult.success(added_module=a.name)
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _h_remove_module(graph: Graph, a: RemoveModule) -> ActionResult:
|
| 146 |
+
mod = graph.find_module(a.name)
|
| 147 |
+
if mod is None:
|
| 148 |
+
raise E.ActionError(E.UNKNOWN_MODULE, f"module {a.name!r} does not exist", name=a.name)
|
| 149 |
+
if any(n.module == a.name for n in graph.nodes):
|
| 150 |
+
raise E.ActionError(
|
| 151 |
+
E.MODULE_NOT_EMPTY,
|
| 152 |
+
f"module {a.name!r} still contains nodes",
|
| 153 |
+
name=a.name,
|
| 154 |
+
node_count=sum(1 for n in graph.nodes if n.module == a.name),
|
| 155 |
+
)
|
| 156 |
+
graph.modules = [m for m in graph.modules if m.name != a.name]
|
| 157 |
+
return ActionResult.success(removed_module=a.name)
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
def _h_add_node(graph: Graph, a: AddNode) -> ActionResult:
|
| 161 |
+
if graph.find_module(a.module) is None:
|
| 162 |
+
raise E.ActionError(E.UNKNOWN_MODULE, f"module {a.module!r} does not exist", name=a.module)
|
| 163 |
+
if graph.find_node(a.name, a.module) is not None:
|
| 164 |
+
raise E.ActionError(
|
| 165 |
+
E.NAME_COLLISION,
|
| 166 |
+
f"node {a.module}.{a.name} already exists",
|
| 167 |
+
name=a.name,
|
| 168 |
+
module=a.module,
|
| 169 |
+
)
|
| 170 |
+
# Surface signature parse — catches errors that the pydantic regex misses.
|
| 171 |
+
try:
|
| 172 |
+
parse_signature(a.signature)
|
| 173 |
+
except ValueError as ve:
|
| 174 |
+
raise E.ActionError(E.SCHEMA_REJECTION, str(ve), signature=a.signature) from ve
|
| 175 |
+
decl_order = max((n.decl_order for n in graph.nodes), default=-1) + 1
|
| 176 |
+
graph.nodes.append(
|
| 177 |
+
Node(
|
| 178 |
+
name=a.name,
|
| 179 |
+
module=a.module,
|
| 180 |
+
signature=a.signature,
|
| 181 |
+
purity=a.purity,
|
| 182 |
+
error_policy=a.error_policy,
|
| 183 |
+
decl_order=decl_order,
|
| 184 |
+
)
|
| 185 |
+
)
|
| 186 |
+
return ActionResult.success(added_node=f"{a.module}.{a.name}", decl_order=decl_order)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def _h_remove_node(graph: Graph, a: RemoveNode) -> ActionResult:
|
| 190 |
+
n = graph.find_node(a.name, a.module)
|
| 191 |
+
if n is None:
|
| 192 |
+
raise E.ActionError(
|
| 193 |
+
E.UNKNOWN_NODE, f"node {a.module}.{a.name} does not exist", name=a.name, module=a.module
|
| 194 |
+
)
|
| 195 |
+
qn = n.qualified_name
|
| 196 |
+
refs = [e for e in graph.edges if e.caller == qn or e.callee == qn]
|
| 197 |
+
if refs:
|
| 198 |
+
raise E.ActionError(
|
| 199 |
+
E.NODE_HAS_REFERENCES,
|
| 200 |
+
f"node {qn} is referenced by {len(refs)} edge(s)",
|
| 201 |
+
name=a.name,
|
| 202 |
+
module=a.module,
|
| 203 |
+
referencing_edges=[(e.caller, e.callee) for e in refs],
|
| 204 |
+
)
|
| 205 |
+
graph.nodes = [m for m in graph.nodes if not (m.name == a.name and m.module == a.module)]
|
| 206 |
+
return ActionResult.success(removed_node=qn)
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
def _h_set_node_module(graph: Graph, a: SetNodeModule) -> ActionResult:
|
| 210 |
+
n = graph.find_node(a.name, a.current_module)
|
| 211 |
+
if n is None:
|
| 212 |
+
raise E.ActionError(
|
| 213 |
+
E.UNKNOWN_NODE,
|
| 214 |
+
f"node {a.current_module}.{a.name} does not exist",
|
| 215 |
+
name=a.name,
|
| 216 |
+
module=a.current_module,
|
| 217 |
+
)
|
| 218 |
+
new_mod = graph.find_module(a.new_module)
|
| 219 |
+
if new_mod is None:
|
| 220 |
+
raise E.ActionError(
|
| 221 |
+
E.UNKNOWN_MODULE,
|
| 222 |
+
f"target module {a.new_module!r} does not exist",
|
| 223 |
+
name=a.new_module,
|
| 224 |
+
)
|
| 225 |
+
if graph.find_node(a.name, a.new_module) is not None:
|
| 226 |
+
raise E.ActionError(
|
| 227 |
+
E.NAME_COLLISION,
|
| 228 |
+
f"node named {a.name!r} already exists in {a.new_module!r}",
|
| 229 |
+
name=a.name,
|
| 230 |
+
module=a.new_module,
|
| 231 |
+
)
|
| 232 |
+
old_qn = n.qualified_name
|
| 233 |
+
new_qn = f"{a.new_module}.{a.name}"
|
| 234 |
+
n.module = a.new_module
|
| 235 |
+
# Rewrite edge endpoints that referred to the old qualified name.
|
| 236 |
+
for e in graph.edges:
|
| 237 |
+
if e.caller == old_qn:
|
| 238 |
+
e.caller = new_qn
|
| 239 |
+
if e.callee == old_qn:
|
| 240 |
+
e.callee = new_qn
|
| 241 |
+
# Post-condition: rewriting must not have introduced an import cycle.
|
| 242 |
+
if graph.has_module_cycle():
|
| 243 |
+
raise E.ActionError(
|
| 244 |
+
E.WOULD_CREATE_CYCLE,
|
| 245 |
+
f"moving {old_qn} -> {new_qn} would create an import cycle",
|
| 246 |
+
from_qn=old_qn,
|
| 247 |
+
to_qn=new_qn,
|
| 248 |
+
)
|
| 249 |
+
return ActionResult.success(moved_node={"from": old_qn, "to": new_qn})
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def _h_attach_body(graph: Graph, a: AttachBody) -> ActionResult:
|
| 253 |
+
n = graph.find_node(a.name, a.module)
|
| 254 |
+
if n is None:
|
| 255 |
+
raise E.ActionError(
|
| 256 |
+
E.UNKNOWN_NODE,
|
| 257 |
+
f"node {a.module}.{a.name} does not exist",
|
| 258 |
+
name=a.name,
|
| 259 |
+
module=a.module,
|
| 260 |
+
)
|
| 261 |
+
spec = get_template(a.template)
|
| 262 |
+
if spec is None:
|
| 263 |
+
raise E.ActionError(
|
| 264 |
+
E.UNKNOWN_TEMPLATE, f"unknown template {a.template!r}", template=a.template
|
| 265 |
+
)
|
| 266 |
+
problems = validate_args(a.template, a.args)
|
| 267 |
+
if problems:
|
| 268 |
+
raise E.ActionError(
|
| 269 |
+
E.TEMPLATE_ARGS_INVALID,
|
| 270 |
+
f"args invalid for template {a.template!r}: {'; '.join(problems)}",
|
| 271 |
+
template=a.template,
|
| 272 |
+
problems=problems,
|
| 273 |
+
)
|
| 274 |
+
out_d = graph.fan_out(n.qualified_name)
|
| 275 |
+
in_d = graph.fan_in(n.qualified_name)
|
| 276 |
+
if not spec.edges_ok(out_d, in_d):
|
| 277 |
+
raise E.ActionError(
|
| 278 |
+
E.TEMPLATE_ARGS_INVALID,
|
| 279 |
+
f"template {a.template!r} requires different edge structure "
|
| 280 |
+
f"(out_d={out_d}, in_d={in_d})",
|
| 281 |
+
template=a.template,
|
| 282 |
+
out_degree=out_d,
|
| 283 |
+
in_degree=in_d,
|
| 284 |
+
)
|
| 285 |
+
n.body_template = a.template
|
| 286 |
+
n.body_template_args = dict(a.args)
|
| 287 |
+
return ActionResult.success(
|
| 288 |
+
attached={"node": n.qualified_name, "template": a.template}
|
| 289 |
+
)
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
def _h_add_edge(graph: Graph, a: AddEdge) -> ActionResult:
|
| 293 |
+
caller = graph.find_node_qualified(a.caller)
|
| 294 |
+
callee = graph.find_node_qualified(a.callee)
|
| 295 |
+
if caller is None:
|
| 296 |
+
raise E.ActionError(E.UNKNOWN_NODE, f"caller {a.caller!r} does not exist", node=a.caller)
|
| 297 |
+
if callee is None:
|
| 298 |
+
raise E.ActionError(E.UNKNOWN_NODE, f"callee {a.callee!r} does not exist", node=a.callee)
|
| 299 |
+
if graph.find_edge(a.caller, a.callee) is not None:
|
| 300 |
+
raise E.ActionError(
|
| 301 |
+
E.DUPLICATE_EDGE,
|
| 302 |
+
f"edge {a.caller} -> {a.callee} already exists",
|
| 303 |
+
caller=a.caller,
|
| 304 |
+
callee=a.callee,
|
| 305 |
+
)
|
| 306 |
+
# Validate arg_mapping covers all required parameters of callee.
|
| 307 |
+
callee_sig = parse_signature(callee.signature)
|
| 308 |
+
caller_sig = parse_signature(caller.signature)
|
| 309 |
+
mapped_callee = {m.callee_param for m in a.arg_mapping}
|
| 310 |
+
mapped_caller = {m.caller_arg for m in a.arg_mapping}
|
| 311 |
+
missing = set(callee_sig.required_params) - mapped_callee
|
| 312 |
+
if missing:
|
| 313 |
+
raise E.ActionError(
|
| 314 |
+
E.ARG_MAPPING_INVALID,
|
| 315 |
+
f"arg_mapping is missing required callee params: {sorted(missing)}",
|
| 316 |
+
missing=sorted(missing),
|
| 317 |
+
)
|
| 318 |
+
bogus_callee = mapped_callee - set(callee_sig.all_params)
|
| 319 |
+
if bogus_callee:
|
| 320 |
+
raise E.ActionError(
|
| 321 |
+
E.ARG_MAPPING_INVALID,
|
| 322 |
+
f"arg_mapping references unknown callee params: {sorted(bogus_callee)}",
|
| 323 |
+
unknown=sorted(bogus_callee),
|
| 324 |
+
)
|
| 325 |
+
bogus_caller = mapped_caller - set(caller_sig.all_params)
|
| 326 |
+
if bogus_caller:
|
| 327 |
+
raise E.ActionError(
|
| 328 |
+
E.ARG_MAPPING_INVALID,
|
| 329 |
+
f"arg_mapping references unknown caller args: {sorted(bogus_caller)}",
|
| 330 |
+
unknown=sorted(bogus_caller),
|
| 331 |
+
)
|
| 332 |
+
# Add tentatively; check post-condition.
|
| 333 |
+
graph.edges.append(
|
| 334 |
+
Edge(
|
| 335 |
+
caller=a.caller,
|
| 336 |
+
callee=a.callee,
|
| 337 |
+
arg_mapping=[ArgMapping(**m.model_dump()) for m in a.arg_mapping],
|
| 338 |
+
)
|
| 339 |
+
)
|
| 340 |
+
if graph.has_module_cycle():
|
| 341 |
+
raise E.ActionError(
|
| 342 |
+
E.WOULD_CREATE_CYCLE,
|
| 343 |
+
f"adding edge {a.caller} -> {a.callee} would create an import cycle",
|
| 344 |
+
caller=a.caller,
|
| 345 |
+
callee=a.callee,
|
| 346 |
+
)
|
| 347 |
+
return ActionResult.success(added_edge={"caller": a.caller, "callee": a.callee})
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
def _h_remove_edge(graph: Graph, a: RemoveEdge) -> ActionResult:
|
| 351 |
+
e = graph.find_edge(a.caller, a.callee)
|
| 352 |
+
if e is None:
|
| 353 |
+
raise E.ActionError(
|
| 354 |
+
E.UNKNOWN_EDGE,
|
| 355 |
+
f"edge {a.caller} -> {a.callee} does not exist",
|
| 356 |
+
caller=a.caller,
|
| 357 |
+
callee=a.callee,
|
| 358 |
+
)
|
| 359 |
+
graph.edges = [
|
| 360 |
+
x for x in graph.edges if not (x.caller == a.caller and x.callee == a.callee)
|
| 361 |
+
]
|
| 362 |
+
return ActionResult.success(removed_edge={"caller": a.caller, "callee": a.callee})
|
| 363 |
+
|
| 364 |
+
|
| 365 |
+
# ---- info / terminal handlers (stubs) -------------------------------
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
def _h_query_spec(graph: Graph, a: QuerySpec) -> ActionResult:
|
| 369 |
+
# TODO: route to graphforge.constraints once tasks/specs are wired in.
|
| 370 |
+
return ActionResult.success(
|
| 371 |
+
not_implemented="query_spec routed via dispatcher; constraint engine TODO",
|
| 372 |
+
constraint_kind=a.constraint_kind,
|
| 373 |
+
)
|
| 374 |
+
|
| 375 |
+
|
| 376 |
+
def _h_query_subgraph(graph: Graph, a: QuerySubgraph) -> ActionResult:
|
| 377 |
+
scope = a.scope
|
| 378 |
+
if scope.startswith("module:"):
|
| 379 |
+
mod = scope[len("module:") :]
|
| 380 |
+
nodes = [n.model_dump() for n in graph.nodes_in_module(mod)]
|
| 381 |
+
edges = [
|
| 382 |
+
e.model_dump()
|
| 383 |
+
for e in graph.edges
|
| 384 |
+
if e.caller.split(".")[0] == mod and e.callee.split(".")[0] == mod
|
| 385 |
+
]
|
| 386 |
+
return ActionResult.success(scope=scope, nodes=nodes, edges=edges)
|
| 387 |
+
if scope.startswith("neighbors:"):
|
| 388 |
+
qn = scope[len("neighbors:") :]
|
| 389 |
+
return ActionResult.success(
|
| 390 |
+
scope=scope,
|
| 391 |
+
callers=graph.callers_of(qn),
|
| 392 |
+
callees=graph.callees_of(qn),
|
| 393 |
+
)
|
| 394 |
+
if scope.startswith("path:"):
|
| 395 |
+
# TODO: shortest-path search over call graph.
|
| 396 |
+
return ActionResult.success(
|
| 397 |
+
scope=scope, not_implemented="path search TODO"
|
| 398 |
+
)
|
| 399 |
+
raise E.ActionError(E.SCHEMA_REJECTION, f"unrecognized subgraph scope {scope!r}")
|
| 400 |
+
|
| 401 |
+
|
| 402 |
+
def _h_query_types(graph: Graph, a: QueryTypes) -> ActionResult:
|
| 403 |
+
# TODO: delegate to graphforge.types.
|
| 404 |
+
return ActionResult.success(
|
| 405 |
+
scope=a.scope, not_implemented="type engine TODO"
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
|
| 409 |
+
def _h_materialize(graph: Graph, a: MaterializeAndValidate) -> ActionResult:
|
| 410 |
+
"""Project the graph to source and run the parse-only validator gate.
|
| 411 |
+
|
| 412 |
+
Heavier validation gates (mypy --strict, import-resolution, behavioral
|
| 413 |
+
tests) are added to this action's report as their subsystems land.
|
| 414 |
+
"""
|
| 415 |
+
from graphforge.materializer import materialize as _materialize
|
| 416 |
+
from graphforge.validator import full_check
|
| 417 |
+
|
| 418 |
+
try:
|
| 419 |
+
files = _materialize(graph)
|
| 420 |
+
except ValueError as ve:
|
| 421 |
+
# Codegen rejected the graph (e.g. unknown pattern, template/edge
|
| 422 |
+
# structure mismatch missed by the dispatcher's preconditions).
|
| 423 |
+
raise E.ActionError(
|
| 424 |
+
E.SCHEMA_REJECTION, f"materialization failed: {ve}"
|
| 425 |
+
) from ve
|
| 426 |
+
report = full_check(files)
|
| 427 |
+
return ActionResult.success(
|
| 428 |
+
files=list(files.keys()),
|
| 429 |
+
bytes_total=sum(len(s) for s in files.values()),
|
| 430 |
+
report=report.to_dict(),
|
| 431 |
+
)
|
| 432 |
+
|
| 433 |
+
|
| 434 |
+
def _h_run_tests(graph: Graph, a: RunBehavioralTests) -> ActionResult:
|
| 435 |
+
# TODO: delegate to graphforge.behavioral.
|
| 436 |
+
raise E.ActionError(
|
| 437 |
+
E.SCHEMA_REJECTION, "run_behavioral_tests is not yet implemented"
|
| 438 |
+
)
|
| 439 |
+
|
| 440 |
+
|
| 441 |
+
def _h_submit(graph: Graph, a: Submit) -> ActionResult:
|
| 442 |
+
return ActionResult.terminate(submitted=True)
|
graphforge/actions/errors.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Structured action errors.
|
| 2 |
+
|
| 3 |
+
Every failure mode in the action dispatcher surfaces as an :class:`ActionError`
|
| 4 |
+
with a stable ``code`` so the agent can be trained against deterministic error
|
| 5 |
+
strings (see PROPOSAL.md §4.4 — "failures return structured errors describing
|
| 6 |
+
the cause"). Codes are kept short and stable across versions.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ActionError(Exception):
|
| 15 |
+
"""Raised by action handlers; caught and reported by the dispatcher."""
|
| 16 |
+
|
| 17 |
+
def __init__(self, code: str, message: str, **details: Any) -> None:
|
| 18 |
+
super().__init__(f"[{code}] {message}")
|
| 19 |
+
self.code = code
|
| 20 |
+
self.message = message
|
| 21 |
+
self.details = details
|
| 22 |
+
|
| 23 |
+
def to_dict(self) -> dict[str, Any]:
|
| 24 |
+
return {"error": self.code, "message": self.message, **self.details}
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ---- canonical codes -------------------------------------------------
|
| 28 |
+
# Schema layer
|
| 29 |
+
SCHEMA_REJECTION = "schema_rejection"
|
| 30 |
+
# Pre-condition layer
|
| 31 |
+
UNKNOWN_MODULE = "unknown_module"
|
| 32 |
+
UNKNOWN_NODE = "unknown_node"
|
| 33 |
+
UNKNOWN_EDGE = "unknown_edge"
|
| 34 |
+
NAME_COLLISION = "name_collision"
|
| 35 |
+
MODULE_NOT_EMPTY = "module_not_empty"
|
| 36 |
+
NODE_HAS_REFERENCES = "node_has_references"
|
| 37 |
+
DUPLICATE_EDGE = "duplicate_edge"
|
| 38 |
+
UNKNOWN_TEMPLATE = "unknown_template"
|
| 39 |
+
TEMPLATE_ARGS_INVALID = "template_args_invalid"
|
| 40 |
+
RESPONSIBILITY_MISMATCH = "responsibility_mismatch"
|
| 41 |
+
ARG_MAPPING_INVALID = "arg_mapping_invalid"
|
| 42 |
+
# Post-condition layer
|
| 43 |
+
WOULD_CREATE_CYCLE = "would_create_cycle"
|
| 44 |
+
TYPE_MISMATCH = "type_mismatch"
|
graphforge/actions/schema.py
ADDED
|
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Action message schemas.
|
| 2 |
+
|
| 3 |
+
These are the wire shapes accepted by the dispatcher. Every action is a
|
| 4 |
+
discriminated-union member keyed on ``kind``.
|
| 5 |
+
|
| 6 |
+
The action vocabulary mirrors PROPOSAL.md §4. Total surface:
|
| 7 |
+
|
| 8 |
+
Graph mutations
|
| 9 |
+
add_module, remove_module
|
| 10 |
+
add_node, remove_node, set_node_module, attach_body
|
| 11 |
+
add_edge, remove_edge
|
| 12 |
+
Information
|
| 13 |
+
query_spec, query_subgraph, query_types,
|
| 14 |
+
materialize_and_validate, run_behavioral_tests
|
| 15 |
+
Terminal
|
| 16 |
+
submit
|
| 17 |
+
|
| 18 |
+
Note: the proposal abstract states "eleven actions"; the section-4 listing
|
| 19 |
+
contains fourteen. We implement the section-4 set; the abstract count will
|
| 20 |
+
be corrected in the next revision of PROPOSAL.md.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
from typing import Annotated, Literal, Optional, Union
|
| 26 |
+
|
| 27 |
+
from pydantic import BaseModel, ConfigDict, Field
|
| 28 |
+
|
| 29 |
+
from graphforge.graph.schema import ArgMapping, ErrorPolicy, Purity, ResponsibilityTag
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# Common config: forbid unknown fields, fail loudly on schema drift.
|
| 33 |
+
_cfg = ConfigDict(extra="forbid")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ---- mutations -------------------------------------------------------
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class AddModule(BaseModel):
|
| 40 |
+
model_config = _cfg
|
| 41 |
+
kind: Literal["add_module"] = "add_module"
|
| 42 |
+
name: str
|
| 43 |
+
responsibility: ResponsibilityTag
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
class RemoveModule(BaseModel):
|
| 47 |
+
model_config = _cfg
|
| 48 |
+
kind: Literal["remove_module"] = "remove_module"
|
| 49 |
+
name: str
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class AddNode(BaseModel):
|
| 53 |
+
model_config = _cfg
|
| 54 |
+
kind: Literal["add_node"] = "add_node"
|
| 55 |
+
name: str
|
| 56 |
+
module: str
|
| 57 |
+
signature: str
|
| 58 |
+
purity: Purity = "impure"
|
| 59 |
+
error_policy: ErrorPolicy = "none"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class RemoveNode(BaseModel):
|
| 63 |
+
model_config = _cfg
|
| 64 |
+
kind: Literal["remove_node"] = "remove_node"
|
| 65 |
+
name: str
|
| 66 |
+
module: str
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class SetNodeModule(BaseModel):
|
| 70 |
+
model_config = _cfg
|
| 71 |
+
kind: Literal["set_node_module"] = "set_node_module"
|
| 72 |
+
name: str
|
| 73 |
+
current_module: str
|
| 74 |
+
new_module: str
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class AttachBody(BaseModel):
|
| 78 |
+
model_config = _cfg
|
| 79 |
+
kind: Literal["attach_body"] = "attach_body"
|
| 80 |
+
name: str
|
| 81 |
+
module: str
|
| 82 |
+
template: str
|
| 83 |
+
args: dict[str, object] = Field(default_factory=dict)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
class AddEdge(BaseModel):
|
| 87 |
+
model_config = _cfg
|
| 88 |
+
kind: Literal["add_edge"] = "add_edge"
|
| 89 |
+
caller: str
|
| 90 |
+
callee: str
|
| 91 |
+
arg_mapping: list[ArgMapping] = Field(default_factory=list)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
class RemoveEdge(BaseModel):
|
| 95 |
+
model_config = _cfg
|
| 96 |
+
kind: Literal["remove_edge"] = "remove_edge"
|
| 97 |
+
caller: str
|
| 98 |
+
callee: str
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
# ---- information actions --------------------------------------------
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
class QuerySpec(BaseModel):
|
| 105 |
+
model_config = _cfg
|
| 106 |
+
kind: Literal["query_spec"] = "query_spec"
|
| 107 |
+
constraint_kind: Optional[str] = None
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
class QuerySubgraph(BaseModel):
|
| 111 |
+
model_config = _cfg
|
| 112 |
+
kind: Literal["query_subgraph"] = "query_subgraph"
|
| 113 |
+
scope: str # "module:<name>" | "neighbors:<qualified>" | "path:<from>:<to>"
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
class QueryTypes(BaseModel):
|
| 117 |
+
model_config = _cfg
|
| 118 |
+
kind: Literal["query_types"] = "query_types"
|
| 119 |
+
scope: str # "all" | "module:<name>" | "node:<qualified>"
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
class MaterializeAndValidate(BaseModel):
|
| 123 |
+
model_config = _cfg
|
| 124 |
+
kind: Literal["materialize_and_validate"] = "materialize_and_validate"
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
class RunBehavioralTests(BaseModel):
|
| 128 |
+
model_config = _cfg
|
| 129 |
+
kind: Literal["run_behavioral_tests"] = "run_behavioral_tests"
|
| 130 |
+
materialized: bool = True
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# ---- terminal --------------------------------------------------------
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
class Submit(BaseModel):
|
| 137 |
+
model_config = _cfg
|
| 138 |
+
kind: Literal["submit"] = "submit"
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# ---- discriminated union --------------------------------------------
|
| 142 |
+
|
| 143 |
+
Action = Annotated[
|
| 144 |
+
Union[
|
| 145 |
+
AddModule,
|
| 146 |
+
RemoveModule,
|
| 147 |
+
AddNode,
|
| 148 |
+
RemoveNode,
|
| 149 |
+
SetNodeModule,
|
| 150 |
+
AttachBody,
|
| 151 |
+
AddEdge,
|
| 152 |
+
RemoveEdge,
|
| 153 |
+
QuerySpec,
|
| 154 |
+
QuerySubgraph,
|
| 155 |
+
QueryTypes,
|
| 156 |
+
MaterializeAndValidate,
|
| 157 |
+
RunBehavioralTests,
|
| 158 |
+
Submit,
|
| 159 |
+
],
|
| 160 |
+
Field(discriminator="kind"),
|
| 161 |
+
]
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
__all__ = [
|
| 165 |
+
"Action",
|
| 166 |
+
"AddModule",
|
| 167 |
+
"RemoveModule",
|
| 168 |
+
"AddNode",
|
| 169 |
+
"RemoveNode",
|
| 170 |
+
"SetNodeModule",
|
| 171 |
+
"AttachBody",
|
| 172 |
+
"AddEdge",
|
| 173 |
+
"RemoveEdge",
|
| 174 |
+
"QuerySpec",
|
| 175 |
+
"QuerySubgraph",
|
| 176 |
+
"QueryTypes",
|
| 177 |
+
"MaterializeAndValidate",
|
| 178 |
+
"RunBehavioralTests",
|
| 179 |
+
"Submit",
|
| 180 |
+
]
|
graphforge/actions/signature.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cheap signature parser.
|
| 2 |
+
|
| 3 |
+
Used by the dispatcher to validate ``add_edge`` arg-mappings against the
|
| 4 |
+
callee's parameter list. Real type flow validation (caller_arg type vs
|
| 5 |
+
callee_param type) is the type engine; this module only extracts parameter
|
| 6 |
+
*names* from a signature string of the form::
|
| 7 |
+
|
| 8 |
+
(a: int, b: str = "x", *, c: bool) -> bool
|
| 9 |
+
|
| 10 |
+
Annotations are tolerated as opaque text. Defaults are tolerated and treated
|
| 11 |
+
as making the parameter optional.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import re
|
| 17 |
+
from dataclasses import dataclass
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@dataclass(frozen=True)
|
| 21 |
+
class Parameter:
|
| 22 |
+
name: str
|
| 23 |
+
annotation: str | None
|
| 24 |
+
has_default: bool
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass(frozen=True)
|
| 28 |
+
class ParsedSignature:
|
| 29 |
+
parameters: list[Parameter]
|
| 30 |
+
return_annotation: str
|
| 31 |
+
|
| 32 |
+
@property
|
| 33 |
+
def required_params(self) -> list[str]:
|
| 34 |
+
return [p.name for p in self.parameters if not p.has_default]
|
| 35 |
+
|
| 36 |
+
@property
|
| 37 |
+
def all_params(self) -> list[str]:
|
| 38 |
+
return [p.name for p in self.parameters]
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
_SIG_RE = re.compile(r"^\s*\((?P<params>.*)\)\s*->\s*(?P<ret>.+?)\s*$", re.DOTALL)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def parse_signature(sig: str) -> ParsedSignature:
|
| 45 |
+
"""Parse a function signature string. Lenient — caller validates more deeply.
|
| 46 |
+
|
| 47 |
+
Raises ``ValueError`` on signatures that fail surface checks. The schema
|
| 48 |
+
layer (Node validator) already requires ``(`` and ``->``; this is the
|
| 49 |
+
secondary parse used at dispatch time.
|
| 50 |
+
"""
|
| 51 |
+
m = _SIG_RE.match(sig)
|
| 52 |
+
if not m:
|
| 53 |
+
raise ValueError(f"could not parse signature: {sig!r}")
|
| 54 |
+
raw_params = m.group("params").strip()
|
| 55 |
+
ret = m.group("ret").strip()
|
| 56 |
+
|
| 57 |
+
params: list[Parameter] = []
|
| 58 |
+
if raw_params:
|
| 59 |
+
for piece in _split_top_level(raw_params, ","):
|
| 60 |
+
piece = piece.strip()
|
| 61 |
+
if not piece or piece in {"*", "/"}:
|
| 62 |
+
continue
|
| 63 |
+
if piece.startswith("**"):
|
| 64 |
+
piece = piece[2:].lstrip()
|
| 65 |
+
elif piece.startswith("*"):
|
| 66 |
+
piece = piece[1:].lstrip()
|
| 67 |
+
has_default = False
|
| 68 |
+
if "=" in piece:
|
| 69 |
+
# split off default at top-level '=' (ignore ones inside [..]).
|
| 70 |
+
head, default = _split_default(piece)
|
| 71 |
+
piece = head.strip()
|
| 72 |
+
has_default = default is not None
|
| 73 |
+
name = piece
|
| 74 |
+
annotation: str | None = None
|
| 75 |
+
if ":" in piece:
|
| 76 |
+
name, annotation = piece.split(":", 1)
|
| 77 |
+
name = name.strip()
|
| 78 |
+
annotation = annotation.strip()
|
| 79 |
+
if not name.isidentifier():
|
| 80 |
+
raise ValueError(f"unparseable parameter {piece!r} in {sig!r}")
|
| 81 |
+
params.append(Parameter(name=name, annotation=annotation, has_default=has_default))
|
| 82 |
+
|
| 83 |
+
return ParsedSignature(parameters=params, return_annotation=ret)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def _split_top_level(s: str, sep: str) -> list[str]:
|
| 87 |
+
"""Split ``s`` on ``sep`` at bracket-depth 0."""
|
| 88 |
+
out: list[str] = []
|
| 89 |
+
depth = 0
|
| 90 |
+
buf: list[str] = []
|
| 91 |
+
for ch in s:
|
| 92 |
+
if ch in "([{":
|
| 93 |
+
depth += 1
|
| 94 |
+
elif ch in ")]}":
|
| 95 |
+
depth -= 1
|
| 96 |
+
if ch == sep and depth == 0:
|
| 97 |
+
out.append("".join(buf))
|
| 98 |
+
buf = []
|
| 99 |
+
else:
|
| 100 |
+
buf.append(ch)
|
| 101 |
+
if buf:
|
| 102 |
+
out.append("".join(buf))
|
| 103 |
+
return out
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def _split_default(piece: str) -> tuple[str, str | None]:
|
| 107 |
+
"""Split off ``= default`` at bracket-depth 0. Returns (head, default | None)."""
|
| 108 |
+
depth = 0
|
| 109 |
+
for i, ch in enumerate(piece):
|
| 110 |
+
if ch in "([{":
|
| 111 |
+
depth += 1
|
| 112 |
+
elif ch in ")]}":
|
| 113 |
+
depth -= 1
|
| 114 |
+
elif ch == "=" and depth == 0:
|
| 115 |
+
return piece[:i], piece[i + 1 :]
|
| 116 |
+
return piece, None
|
graphforge/behavioral/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Behavioral test runner.
|
| 2 |
+
|
| 3 |
+
Responsibilities (PROPOSAL.md §2.1, §6.2):
|
| 4 |
+
|
| 5 |
+
* Run a property-based test suite (hypothesis) against materialized code,
|
| 6 |
+
in a sandboxed subprocess with timeout + memory limit.
|
| 7 |
+
* Tests are part of the task definition; their bodies are *hidden* from
|
| 8 |
+
the agent. The agent sees only test names and pass/fail at submission.
|
| 9 |
+
* Distinguish failures (assertion) from errors (timeout, crash) — both
|
| 10 |
+
count as test failures, but they're surfaced separately for diagnostics.
|
| 11 |
+
|
| 12 |
+
Public surface (TODO):
|
| 13 |
+
|
| 14 |
+
run_tests(files, tests, timeout=12.0) -> dict[str, TestResult]
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def run_tests( # pragma: no cover — TODO
|
| 21 |
+
files: dict[str, str],
|
| 22 |
+
tests: list[object],
|
| 23 |
+
timeout: float = 12.0,
|
| 24 |
+
) -> dict[str, object]:
|
| 25 |
+
raise NotImplementedError("behavioral runner TODO — see PROPOSAL.md §6.2")
|
graphforge/constraints/__init__.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constraint vocabulary and dispatch.
|
| 2 |
+
|
| 3 |
+
Three families (PROPOSAL.md §2.2):
|
| 4 |
+
|
| 5 |
+
* Structural — node_exists, edge_exists, module_count, acyclic_imports,
|
| 6 |
+
fan_in_max, fan_out_max, dag_depth_max, internal_only, …
|
| 7 |
+
* Type / signature — signature_matches, return_type, arg_type,
|
| 8 |
+
type_consistency, no_any_types, pure_function (TODO)
|
| 9 |
+
* Behavioral / materialization — materializes, imports_resolve,
|
| 10 |
+
type_checks, behavioral_test_passes, error_handling_present|absent
|
| 11 |
+
|
| 12 |
+
Currently shipped: tier-0 subset of structural + ``materializes``. Additional
|
| 13 |
+
kinds land as new discriminated members in :mod:`schema` and matching
|
| 14 |
+
``_check_*`` functions in :mod:`checker`.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from graphforge.constraints.checker import (
|
| 18 |
+
SatisfactionReport,
|
| 19 |
+
check,
|
| 20 |
+
evaluate_all,
|
| 21 |
+
)
|
| 22 |
+
from graphforge.constraints.schema import (
|
| 23 |
+
AcyclicImports,
|
| 24 |
+
Constraint,
|
| 25 |
+
EdgeExists,
|
| 26 |
+
Materializes,
|
| 27 |
+
ModuleCount,
|
| 28 |
+
ModuleResponsibility,
|
| 29 |
+
ModuleSizeMax,
|
| 30 |
+
NodeAbsent,
|
| 31 |
+
NodeExists,
|
| 32 |
+
STRUCTURAL_KINDS,
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
__all__ = [
|
| 36 |
+
"AcyclicImports",
|
| 37 |
+
"Constraint",
|
| 38 |
+
"EdgeExists",
|
| 39 |
+
"Materializes",
|
| 40 |
+
"ModuleCount",
|
| 41 |
+
"ModuleResponsibility",
|
| 42 |
+
"ModuleSizeMax",
|
| 43 |
+
"NodeAbsent",
|
| 44 |
+
"NodeExists",
|
| 45 |
+
"STRUCTURAL_KINDS",
|
| 46 |
+
"SatisfactionReport",
|
| 47 |
+
"check",
|
| 48 |
+
"evaluate_all",
|
| 49 |
+
]
|
graphforge/constraints/checker.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constraint checker dispatch.
|
| 2 |
+
|
| 3 |
+
Each constraint kind has a small ``_check_*`` function. ``check`` routes by
|
| 4 |
+
isinstance and ``evaluate_all`` reports which constraints from a list are
|
| 5 |
+
satisfied or not.
|
| 6 |
+
|
| 7 |
+
Behavioral / materialization constraints (currently just ``materializes``)
|
| 8 |
+
delegate to the materializer and validator subsystems.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
from dataclasses import dataclass, field
|
| 14 |
+
|
| 15 |
+
from graphforge.constraints.schema import (
|
| 16 |
+
AcyclicImports,
|
| 17 |
+
Constraint,
|
| 18 |
+
EdgeExists,
|
| 19 |
+
Materializes,
|
| 20 |
+
ModuleCount,
|
| 21 |
+
ModuleResponsibility,
|
| 22 |
+
ModuleSizeMax,
|
| 23 |
+
NodeAbsent,
|
| 24 |
+
NodeExists,
|
| 25 |
+
STRUCTURAL_KINDS,
|
| 26 |
+
)
|
| 27 |
+
from graphforge.graph.schema import Graph
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
@dataclass
|
| 31 |
+
class SatisfactionReport:
|
| 32 |
+
satisfied: list[Constraint] = field(default_factory=list)
|
| 33 |
+
unsatisfied: list[Constraint] = field(default_factory=list)
|
| 34 |
+
|
| 35 |
+
@property
|
| 36 |
+
def total(self) -> int:
|
| 37 |
+
return len(self.satisfied) + len(self.unsatisfied)
|
| 38 |
+
|
| 39 |
+
@property
|
| 40 |
+
def all_satisfied(self) -> bool:
|
| 41 |
+
return self.total > 0 and not self.unsatisfied
|
| 42 |
+
|
| 43 |
+
def split_by_family(self) -> tuple["SatisfactionReport", "SatisfactionReport"]:
|
| 44 |
+
"""Split into (structural, behavioral) sub-reports.
|
| 45 |
+
|
| 46 |
+
Useful for the reward engine, which scores the two families with
|
| 47 |
+
different magnitudes per PROPOSAL.md §5.2.
|
| 48 |
+
"""
|
| 49 |
+
sr = SatisfactionReport()
|
| 50 |
+
br = SatisfactionReport()
|
| 51 |
+
for c in self.satisfied:
|
| 52 |
+
(sr if c.kind in STRUCTURAL_KINDS else br).satisfied.append(c)
|
| 53 |
+
for c in self.unsatisfied:
|
| 54 |
+
(sr if c.kind in STRUCTURAL_KINDS else br).unsatisfied.append(c)
|
| 55 |
+
return sr, br
|
| 56 |
+
|
| 57 |
+
def to_dict(self) -> dict[str, object]:
|
| 58 |
+
return {
|
| 59 |
+
"satisfied": [c.model_dump() for c in self.satisfied],
|
| 60 |
+
"unsatisfied": [c.model_dump() for c in self.unsatisfied],
|
| 61 |
+
"total": self.total,
|
| 62 |
+
"all_satisfied": self.all_satisfied,
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
# ---- per-kind checkers ----------------------------------------------
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _check_node_exists(g: Graph, c: NodeExists) -> bool:
|
| 70 |
+
return g.find_node(c.name, c.module) is not None
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _check_node_absent(g: Graph, c: NodeAbsent) -> bool:
|
| 74 |
+
return g.find_node(c.name, c.module) is None
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _check_edge_exists(g: Graph, c: EdgeExists) -> bool:
|
| 78 |
+
return g.find_edge(c.caller, c.callee) is not None
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _check_module_count(g: Graph, c: ModuleCount) -> bool:
|
| 82 |
+
return len(g.modules) == c.n
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _check_module_size_max(g: Graph, c: ModuleSizeMax) -> bool:
|
| 86 |
+
return len(g.nodes_in_module(c.module)) <= c.n
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _check_module_responsibility(g: Graph, c: ModuleResponsibility) -> bool:
|
| 90 |
+
m = g.find_module(c.module)
|
| 91 |
+
return m is not None and m.responsibility == c.responsibility
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def _check_acyclic_imports(g: Graph, _c: AcyclicImports) -> bool:
|
| 95 |
+
return not g.has_module_cycle()
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def _check_materializes(g: Graph, _c: Materializes) -> bool:
|
| 99 |
+
# Imported lazily so that callers who don't use this checker don't pay
|
| 100 |
+
# the cost of pulling the materializer/validator graph.
|
| 101 |
+
from graphforge.materializer import materialize
|
| 102 |
+
from graphforge.validator import full_check
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
files = materialize(g)
|
| 106 |
+
except Exception:
|
| 107 |
+
return False
|
| 108 |
+
return full_check(files).ok
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
# ---- dispatch --------------------------------------------------------
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
def check(graph: Graph, constraint: Constraint) -> bool:
|
| 115 |
+
if isinstance(constraint, NodeExists):
|
| 116 |
+
return _check_node_exists(graph, constraint)
|
| 117 |
+
if isinstance(constraint, NodeAbsent):
|
| 118 |
+
return _check_node_absent(graph, constraint)
|
| 119 |
+
if isinstance(constraint, EdgeExists):
|
| 120 |
+
return _check_edge_exists(graph, constraint)
|
| 121 |
+
if isinstance(constraint, ModuleCount):
|
| 122 |
+
return _check_module_count(graph, constraint)
|
| 123 |
+
if isinstance(constraint, ModuleSizeMax):
|
| 124 |
+
return _check_module_size_max(graph, constraint)
|
| 125 |
+
if isinstance(constraint, ModuleResponsibility):
|
| 126 |
+
return _check_module_responsibility(graph, constraint)
|
| 127 |
+
if isinstance(constraint, AcyclicImports):
|
| 128 |
+
return _check_acyclic_imports(graph, constraint)
|
| 129 |
+
if isinstance(constraint, Materializes):
|
| 130 |
+
return _check_materializes(graph, constraint)
|
| 131 |
+
raise ValueError(f"unknown constraint kind: {constraint!r}")
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
def evaluate_all(graph: Graph, constraints: list[Constraint]) -> SatisfactionReport:
|
| 135 |
+
rep = SatisfactionReport()
|
| 136 |
+
for c in constraints:
|
| 137 |
+
if check(graph, c):
|
| 138 |
+
rep.satisfied.append(c)
|
| 139 |
+
else:
|
| 140 |
+
rep.unsatisfied.append(c)
|
| 141 |
+
return rep
|
graphforge/constraints/schema.py
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constraint schemas (tier-0 subset).
|
| 2 |
+
|
| 3 |
+
Constraints are pydantic discriminated-union members keyed on ``kind``.
|
| 4 |
+
Tier-0 carves out the smallest set sufficient to express a real task and
|
| 5 |
+
exercise the reward engine end-to-end. The remaining vocabulary in
|
| 6 |
+
PROPOSAL.md §2.2 (fan_in_max, dag_depth_max, type_consistency,
|
| 7 |
+
behavioral_test_passes, …) lands on top of this same shape as new
|
| 8 |
+
discriminated members + checker functions.
|
| 9 |
+
|
| 10 |
+
Each constraint member is a pure data record. Behavior lives in
|
| 11 |
+
:mod:`graphforge.constraints.checker`.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
from typing import Annotated, Literal, Union
|
| 17 |
+
|
| 18 |
+
from pydantic import BaseModel, ConfigDict, Field
|
| 19 |
+
|
| 20 |
+
from graphforge.graph.schema import ResponsibilityTag
|
| 21 |
+
|
| 22 |
+
_cfg = ConfigDict(extra="forbid")
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# ---- structural ------------------------------------------------------
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class NodeExists(BaseModel):
|
| 29 |
+
model_config = _cfg
|
| 30 |
+
kind: Literal["node_exists"] = "node_exists"
|
| 31 |
+
name: str
|
| 32 |
+
module: str
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class NodeAbsent(BaseModel):
|
| 36 |
+
model_config = _cfg
|
| 37 |
+
kind: Literal["node_absent"] = "node_absent"
|
| 38 |
+
name: str
|
| 39 |
+
module: str
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class EdgeExists(BaseModel):
|
| 43 |
+
model_config = _cfg
|
| 44 |
+
kind: Literal["edge_exists"] = "edge_exists"
|
| 45 |
+
caller: str # qualified
|
| 46 |
+
callee: str # qualified
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
class ModuleCount(BaseModel):
|
| 50 |
+
model_config = _cfg
|
| 51 |
+
kind: Literal["module_count"] = "module_count"
|
| 52 |
+
n: int = Field(..., ge=0)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class ModuleSizeMax(BaseModel):
|
| 56 |
+
model_config = _cfg
|
| 57 |
+
kind: Literal["module_size_max"] = "module_size_max"
|
| 58 |
+
module: str
|
| 59 |
+
n: int = Field(..., ge=0)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class ModuleResponsibility(BaseModel):
|
| 63 |
+
model_config = _cfg
|
| 64 |
+
kind: Literal["module_responsibility"] = "module_responsibility"
|
| 65 |
+
module: str
|
| 66 |
+
responsibility: ResponsibilityTag
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
class AcyclicImports(BaseModel):
|
| 70 |
+
model_config = _cfg
|
| 71 |
+
kind: Literal["acyclic_imports"] = "acyclic_imports"
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
# ---- behavioral / materialization -----------------------------------
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
class Materializes(BaseModel):
|
| 78 |
+
model_config = _cfg
|
| 79 |
+
kind: Literal["materializes"] = "materializes"
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
# ---- discriminated union --------------------------------------------
|
| 83 |
+
|
| 84 |
+
Constraint = Annotated[
|
| 85 |
+
Union[
|
| 86 |
+
NodeExists,
|
| 87 |
+
NodeAbsent,
|
| 88 |
+
EdgeExists,
|
| 89 |
+
ModuleCount,
|
| 90 |
+
ModuleSizeMax,
|
| 91 |
+
ModuleResponsibility,
|
| 92 |
+
AcyclicImports,
|
| 93 |
+
Materializes,
|
| 94 |
+
],
|
| 95 |
+
Field(discriminator="kind"),
|
| 96 |
+
]
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# Set of kinds considered "structural" for the reward engine's per-constraint
|
| 100 |
+
# +1 magnitude. The "behavioral" family is reserved for property-test results
|
| 101 |
+
# (BehavioralTestPasses, TODO) which earn the higher +3 magnitude. The
|
| 102 |
+
# ``materializes`` constraint is structural for scoring purposes; the more
|
| 103 |
+
# severe "Materialization fails: -8" penalty in PROPOSAL.md §5.2 is an
|
| 104 |
+
# independent gate driven by the materializer raising or returning parse
|
| 105 |
+
# errors, not by this constraint kind.
|
| 106 |
+
STRUCTURAL_KINDS = {
|
| 107 |
+
"node_exists",
|
| 108 |
+
"node_absent",
|
| 109 |
+
"edge_exists",
|
| 110 |
+
"module_count",
|
| 111 |
+
"module_size_max",
|
| 112 |
+
"module_responsibility",
|
| 113 |
+
"acyclic_imports",
|
| 114 |
+
"materializes",
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
__all__ = [
|
| 119 |
+
"AcyclicImports",
|
| 120 |
+
"Constraint",
|
| 121 |
+
"EdgeExists",
|
| 122 |
+
"Materializes",
|
| 123 |
+
"ModuleCount",
|
| 124 |
+
"ModuleResponsibility",
|
| 125 |
+
"ModuleSizeMax",
|
| 126 |
+
"NodeAbsent",
|
| 127 |
+
"NodeExists",
|
| 128 |
+
"STRUCTURAL_KINDS",
|
| 129 |
+
]
|
graphforge/graph/__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Canonical graph schema. See :mod:`graphforge.graph.schema`."""
|
| 2 |
+
|
| 3 |
+
from graphforge.graph.schema import (
|
| 4 |
+
ArgMapping,
|
| 5 |
+
Edge,
|
| 6 |
+
ErrorPolicy,
|
| 7 |
+
Graph,
|
| 8 |
+
Module,
|
| 9 |
+
Node,
|
| 10 |
+
Purity,
|
| 11 |
+
ResponsibilityTag,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
__all__ = [
|
| 15 |
+
"ArgMapping",
|
| 16 |
+
"Edge",
|
| 17 |
+
"ErrorPolicy",
|
| 18 |
+
"Graph",
|
| 19 |
+
"Module",
|
| 20 |
+
"Node",
|
| 21 |
+
"Purity",
|
| 22 |
+
"ResponsibilityTag",
|
| 23 |
+
]
|
graphforge/graph/schema.py
ADDED
|
@@ -0,0 +1,308 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Canonical graph schema.
|
| 2 |
+
|
| 3 |
+
The graph is the single source of truth for an in-progress program. Every
|
| 4 |
+
materialization is a deterministic function of (graph, template library).
|
| 5 |
+
|
| 6 |
+
Wire format mirrors the JSON shape documented in PROPOSAL.md §3.1, exactly:
|
| 7 |
+
|
| 8 |
+
{
|
| 9 |
+
"modules": [{"name": ..., "responsibility": ...}, ...],
|
| 10 |
+
"nodes": [{"name": ..., "module": ..., "signature": ...,
|
| 11 |
+
"body_template": ..., "body_template_args": {...},
|
| 12 |
+
"purity": ..., "error_policy": ..., "decl_order": ...}, ...],
|
| 13 |
+
"edges": [{"caller": "<module>.<name>",
|
| 14 |
+
"callee": "<module>.<name>",
|
| 15 |
+
"arg_mapping": [{"caller_arg": ..., "callee_param": ...}, ...]}, ...]
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
This module enforces shape and well-formedness only. Higher-order invariants
|
| 19 |
+
(unique names, edge endpoints exist, no cycles, type-flow compatibility) are
|
| 20 |
+
enforced by the action dispatcher and the type engine, not the schema, so
|
| 21 |
+
that callers can build partial / invalid graphs and inspect why they fail.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
from __future__ import annotations
|
| 25 |
+
|
| 26 |
+
import hashlib
|
| 27 |
+
import json
|
| 28 |
+
from typing import Literal, Optional
|
| 29 |
+
|
| 30 |
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
| 31 |
+
|
| 32 |
+
# ----------------------------------------------------------------------
|
| 33 |
+
# Enumerated tags
|
| 34 |
+
# ----------------------------------------------------------------------
|
| 35 |
+
|
| 36 |
+
# Responsibility tags constrain which kinds of nodes a module is allowed to
|
| 37 |
+
# host. The canonical set; new tags are added intentionally because tasks
|
| 38 |
+
# encode constraints against this vocabulary.
|
| 39 |
+
ResponsibilityTag = Literal[
|
| 40 |
+
"io",
|
| 41 |
+
"validation",
|
| 42 |
+
"transform",
|
| 43 |
+
"orchestration",
|
| 44 |
+
"storage",
|
| 45 |
+
"formatting",
|
| 46 |
+
"lookup",
|
| 47 |
+
"policy",
|
| 48 |
+
"logging",
|
| 49 |
+
"computation",
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
Purity = Literal["pure", "impure"]
|
| 53 |
+
|
| 54 |
+
# How a function handles errors in its body. "guard" means it includes a
|
| 55 |
+
# guard / try-except. "propagate" means it deliberately lets errors flow up.
|
| 56 |
+
# "none" is the default — no claim either way.
|
| 57 |
+
ErrorPolicy = Literal["guard", "propagate", "none"]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
# ----------------------------------------------------------------------
|
| 61 |
+
# Atomic records
|
| 62 |
+
# ----------------------------------------------------------------------
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
class Module(BaseModel):
|
| 66 |
+
"""A declared module — one Python file at materialization time."""
|
| 67 |
+
|
| 68 |
+
model_config = ConfigDict(extra="forbid", frozen=False)
|
| 69 |
+
|
| 70 |
+
name: str = Field(..., min_length=1)
|
| 71 |
+
responsibility: ResponsibilityTag
|
| 72 |
+
|
| 73 |
+
@field_validator("name")
|
| 74 |
+
@classmethod
|
| 75 |
+
def _name_is_identifier(cls, v: str) -> str:
|
| 76 |
+
if not v.isidentifier():
|
| 77 |
+
raise ValueError(f"module name {v!r} is not a Python identifier")
|
| 78 |
+
if v.startswith("_"):
|
| 79 |
+
raise ValueError(f"module name {v!r} must not start with an underscore")
|
| 80 |
+
return v
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
class Node(BaseModel):
|
| 84 |
+
"""A declared function. ``body_template`` may be unset until attach_body."""
|
| 85 |
+
|
| 86 |
+
model_config = ConfigDict(extra="forbid", frozen=False)
|
| 87 |
+
|
| 88 |
+
name: str = Field(..., min_length=1)
|
| 89 |
+
module: str = Field(..., min_length=1)
|
| 90 |
+
signature: str = Field(..., min_length=2) # e.g., "(x: int) -> bool"
|
| 91 |
+
body_template: Optional[str] = None
|
| 92 |
+
body_template_args: dict[str, object] = Field(default_factory=dict)
|
| 93 |
+
purity: Purity = "impure"
|
| 94 |
+
error_policy: ErrorPolicy = "none"
|
| 95 |
+
decl_order: int = 0
|
| 96 |
+
|
| 97 |
+
@field_validator("name")
|
| 98 |
+
@classmethod
|
| 99 |
+
def _name_is_identifier(cls, v: str) -> str:
|
| 100 |
+
if not v.isidentifier():
|
| 101 |
+
raise ValueError(f"node name {v!r} is not a Python identifier")
|
| 102 |
+
return v
|
| 103 |
+
|
| 104 |
+
@field_validator("signature")
|
| 105 |
+
@classmethod
|
| 106 |
+
def _signature_shape(cls, v: str) -> str:
|
| 107 |
+
# Cheap surface check; the type engine does the real parse.
|
| 108 |
+
if not v.lstrip().startswith("("):
|
| 109 |
+
raise ValueError(f"signature must start with '(': got {v!r}")
|
| 110 |
+
if "->" not in v:
|
| 111 |
+
raise ValueError(f"signature must include '->' return arrow: got {v!r}")
|
| 112 |
+
return v
|
| 113 |
+
|
| 114 |
+
# Convenience -----------------------------------------------------
|
| 115 |
+
|
| 116 |
+
@property
|
| 117 |
+
def qualified_name(self) -> str:
|
| 118 |
+
"""``<module>.<name>`` — the canonical address used on edges."""
|
| 119 |
+
return f"{self.module}.{self.name}"
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
class ArgMapping(BaseModel):
|
| 123 |
+
"""How an edge wires a caller's argument to a callee's parameter."""
|
| 124 |
+
|
| 125 |
+
model_config = ConfigDict(extra="forbid", frozen=False)
|
| 126 |
+
|
| 127 |
+
caller_arg: str = Field(..., min_length=1)
|
| 128 |
+
callee_param: str = Field(..., min_length=1)
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
class Edge(BaseModel):
|
| 132 |
+
"""A CALLS edge. Endpoints are qualified node names ``<module>.<name>``."""
|
| 133 |
+
|
| 134 |
+
model_config = ConfigDict(extra="forbid", frozen=False)
|
| 135 |
+
|
| 136 |
+
caller: str = Field(..., min_length=3)
|
| 137 |
+
callee: str = Field(..., min_length=3)
|
| 138 |
+
arg_mapping: list[ArgMapping] = Field(default_factory=list)
|
| 139 |
+
|
| 140 |
+
@field_validator("caller", "callee")
|
| 141 |
+
@classmethod
|
| 142 |
+
def _qualified(cls, v: str) -> str:
|
| 143 |
+
if v.count(".") != 1:
|
| 144 |
+
raise ValueError(
|
| 145 |
+
f"edge endpoint {v!r} is not qualified (expected '<module>.<name>')"
|
| 146 |
+
)
|
| 147 |
+
mod, name = v.split(".")
|
| 148 |
+
if not mod.isidentifier() or not name.isidentifier():
|
| 149 |
+
raise ValueError(f"edge endpoint {v!r} has non-identifier parts")
|
| 150 |
+
return v
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
# ----------------------------------------------------------------------
|
| 154 |
+
# Graph
|
| 155 |
+
# ----------------------------------------------------------------------
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
class Graph(BaseModel):
|
| 159 |
+
"""Canonical graph state. Mutable; cloned via ``snapshot``/``restore``."""
|
| 160 |
+
|
| 161 |
+
model_config = ConfigDict(extra="forbid", frozen=False)
|
| 162 |
+
|
| 163 |
+
modules: list[Module] = Field(default_factory=list)
|
| 164 |
+
nodes: list[Node] = Field(default_factory=list)
|
| 165 |
+
edges: list[Edge] = Field(default_factory=list)
|
| 166 |
+
|
| 167 |
+
# ----- lookup ----------------------------------------------------
|
| 168 |
+
|
| 169 |
+
def find_module(self, name: str) -> Optional[Module]:
|
| 170 |
+
for m in self.modules:
|
| 171 |
+
if m.name == name:
|
| 172 |
+
return m
|
| 173 |
+
return None
|
| 174 |
+
|
| 175 |
+
def find_node(self, name: str, module: str) -> Optional[Node]:
|
| 176 |
+
for n in self.nodes:
|
| 177 |
+
if n.name == name and n.module == module:
|
| 178 |
+
return n
|
| 179 |
+
return None
|
| 180 |
+
|
| 181 |
+
def find_node_qualified(self, qualified: str) -> Optional[Node]:
|
| 182 |
+
if qualified.count(".") != 1:
|
| 183 |
+
return None
|
| 184 |
+
mod, nm = qualified.split(".")
|
| 185 |
+
return self.find_node(nm, mod)
|
| 186 |
+
|
| 187 |
+
def find_edge(self, caller: str, callee: str) -> Optional[Edge]:
|
| 188 |
+
for e in self.edges:
|
| 189 |
+
if e.caller == caller and e.callee == callee:
|
| 190 |
+
return e
|
| 191 |
+
return None
|
| 192 |
+
|
| 193 |
+
def nodes_in_module(self, module: str) -> list[Node]:
|
| 194 |
+
return [n for n in self.nodes if n.module == module]
|
| 195 |
+
|
| 196 |
+
def callers_of(self, qualified: str) -> list[str]:
|
| 197 |
+
return [e.caller for e in self.edges if e.callee == qualified]
|
| 198 |
+
|
| 199 |
+
def callees_of(self, qualified: str) -> list[str]:
|
| 200 |
+
return [e.callee for e in self.edges if e.caller == qualified]
|
| 201 |
+
|
| 202 |
+
def fan_in(self, qualified: str) -> int:
|
| 203 |
+
return len(self.callers_of(qualified))
|
| 204 |
+
|
| 205 |
+
def fan_out(self, qualified: str) -> int:
|
| 206 |
+
return len(self.callees_of(qualified))
|
| 207 |
+
|
| 208 |
+
# ----- structural derivations ------------------------------------
|
| 209 |
+
|
| 210 |
+
def import_edges(self) -> set[tuple[str, str]]:
|
| 211 |
+
"""Set of (caller_module, callee_module) pairs from cross-module edges."""
|
| 212 |
+
out: set[tuple[str, str]] = set()
|
| 213 |
+
for e in self.edges:
|
| 214 |
+
cm = e.caller.split(".")[0]
|
| 215 |
+
tm = e.callee.split(".")[0]
|
| 216 |
+
if cm != tm:
|
| 217 |
+
out.add((cm, tm))
|
| 218 |
+
return out
|
| 219 |
+
|
| 220 |
+
def has_module_cycle(self) -> bool:
|
| 221 |
+
"""True iff the cross-module import graph contains a directed cycle."""
|
| 222 |
+
adj: dict[str, set[str]] = {m.name: set() for m in self.modules}
|
| 223 |
+
for src, dst in self.import_edges():
|
| 224 |
+
adj.setdefault(src, set()).add(dst)
|
| 225 |
+
adj.setdefault(dst, set())
|
| 226 |
+
WHITE, GRAY, BLACK = 0, 1, 2
|
| 227 |
+
color: dict[str, int] = {k: WHITE for k in adj}
|
| 228 |
+
|
| 229 |
+
def visit(u: str) -> bool:
|
| 230 |
+
color[u] = GRAY
|
| 231 |
+
for v in adj.get(u, ()):
|
| 232 |
+
if color[v] == GRAY:
|
| 233 |
+
return True
|
| 234 |
+
if color[v] == WHITE and visit(v):
|
| 235 |
+
return True
|
| 236 |
+
color[u] = BLACK
|
| 237 |
+
return False
|
| 238 |
+
|
| 239 |
+
return any(color[u] == WHITE and visit(u) for u in adj)
|
| 240 |
+
|
| 241 |
+
def call_graph_depth(self) -> int:
|
| 242 |
+
"""Longest path length (in edges) in the function call DAG.
|
| 243 |
+
|
| 244 |
+
If the call graph is cyclic, returns the special value -1 (callers
|
| 245 |
+
should treat this as an invariant violation).
|
| 246 |
+
"""
|
| 247 |
+
adj: dict[str, list[str]] = {n.qualified_name: [] for n in self.nodes}
|
| 248 |
+
for e in self.edges:
|
| 249 |
+
adj.setdefault(e.caller, []).append(e.callee)
|
| 250 |
+
adj.setdefault(e.callee, [])
|
| 251 |
+
memo: dict[str, int] = {}
|
| 252 |
+
ON_STACK = -2
|
| 253 |
+
|
| 254 |
+
def dfs(u: str) -> int:
|
| 255 |
+
if u in memo:
|
| 256 |
+
if memo[u] == ON_STACK:
|
| 257 |
+
return -1
|
| 258 |
+
return memo[u]
|
| 259 |
+
memo[u] = ON_STACK
|
| 260 |
+
best = 0
|
| 261 |
+
for v in adj.get(u, ()):
|
| 262 |
+
d = dfs(v)
|
| 263 |
+
if d == -1:
|
| 264 |
+
return -1
|
| 265 |
+
best = max(best, d + 1)
|
| 266 |
+
memo[u] = best
|
| 267 |
+
return best
|
| 268 |
+
|
| 269 |
+
results = [dfs(u) for u in adj]
|
| 270 |
+
if any(r == -1 for r in results):
|
| 271 |
+
return -1
|
| 272 |
+
return max(results, default=0)
|
| 273 |
+
|
| 274 |
+
# ----- copying / hashing -----------------------------------------
|
| 275 |
+
|
| 276 |
+
def snapshot(self) -> "Graph":
|
| 277 |
+
"""Deep copy. Used by the dispatcher for atomic action rollback."""
|
| 278 |
+
return self.model_copy(deep=True)
|
| 279 |
+
|
| 280 |
+
def structural_hash(self) -> str:
|
| 281 |
+
"""Stable SHA-256 over a canonical JSON projection.
|
| 282 |
+
|
| 283 |
+
Insensitive to list ordering on the dimensions where order is not
|
| 284 |
+
semantically meaningful (modules, nodes), but sensitive to
|
| 285 |
+
``decl_order`` because that affects materialized output.
|
| 286 |
+
"""
|
| 287 |
+
canon: dict[str, object] = {
|
| 288 |
+
"modules": sorted(
|
| 289 |
+
[m.model_dump() for m in self.modules],
|
| 290 |
+
key=lambda d: d["name"],
|
| 291 |
+
),
|
| 292 |
+
"nodes": sorted(
|
| 293 |
+
[n.model_dump() for n in self.nodes],
|
| 294 |
+
key=lambda d: (d["module"], d["name"]),
|
| 295 |
+
),
|
| 296 |
+
"edges": sorted(
|
| 297 |
+
[e.model_dump() for e in self.edges],
|
| 298 |
+
key=lambda d: (d["caller"], d["callee"]),
|
| 299 |
+
),
|
| 300 |
+
}
|
| 301 |
+
blob = json.dumps(canon, sort_keys=True, default=str).encode("utf-8")
|
| 302 |
+
return hashlib.sha256(blob).hexdigest()
|
| 303 |
+
|
| 304 |
+
# ----- factories -------------------------------------------------
|
| 305 |
+
|
| 306 |
+
@classmethod
|
| 307 |
+
def empty(cls) -> "Graph":
|
| 308 |
+
return cls()
|
graphforge/knowledge_graph.py
ADDED
|
@@ -0,0 +1,233 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""In-memory Knowledge Graph for a Python repository.
|
| 2 |
+
|
| 3 |
+
Mirrors the structure of a Neo4j property graph but lives in RAM:
|
| 4 |
+
|
| 5 |
+
Nodes
|
| 6 |
+
-----
|
| 7 |
+
repo — the repository root
|
| 8 |
+
package — a directory containing __init__.py
|
| 9 |
+
module — a .py file
|
| 10 |
+
class — a class definition
|
| 11 |
+
function — a top-level or nested function / async function
|
| 12 |
+
method — a method inside a class
|
| 13 |
+
|
| 14 |
+
Edges (directed)
|
| 15 |
+
-----------------
|
| 16 |
+
contains — parent → child (repo→package, package→module, module→class, …)
|
| 17 |
+
calls — function/method → function/method (same-file same-package)
|
| 18 |
+
imports — module → module (from x import y / import x)
|
| 19 |
+
inherits — class → class
|
| 20 |
+
|
| 21 |
+
Each node stores the actual source lines so the agent can read/edit them.
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
from __future__ import annotations
|
| 25 |
+
|
| 26 |
+
import textwrap
|
| 27 |
+
from dataclasses import dataclass, field
|
| 28 |
+
from typing import Iterable
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# ── node & edge ───────────────────────────────────────────────────────────────
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class KGNode:
|
| 35 |
+
node_id: str # unique key, e.g. "function:validators.py:validate_title"
|
| 36 |
+
node_type: str # module | class | function | method | package | repo
|
| 37 |
+
name: str # short identifier
|
| 38 |
+
file_path: str # relative path from repo root (empty for repo/package)
|
| 39 |
+
line_start: int = 0
|
| 40 |
+
line_end: int = 0
|
| 41 |
+
source: str = "" # full source text of this node (incl. def line)
|
| 42 |
+
docstring: str = ""
|
| 43 |
+
metadata: dict = field(default_factory=dict)
|
| 44 |
+
|
| 45 |
+
def brief(self) -> str:
|
| 46 |
+
"""One-line summary for graph overviews."""
|
| 47 |
+
loc = f" [{self.file_path}:{self.line_start}]" if self.file_path else ""
|
| 48 |
+
return f"[{self.node_type.upper():<8}] {self.node_id}{loc}"
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
@dataclass
|
| 52 |
+
class KGEdge:
|
| 53 |
+
edge_type: str # contains | calls | imports | inherits
|
| 54 |
+
source_id: str
|
| 55 |
+
target_id: str
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
# ── knowledge graph ───────────────────────────────────────────────────────────
|
| 59 |
+
|
| 60 |
+
class KnowledgeGraph:
|
| 61 |
+
"""Property graph for a repository.
|
| 62 |
+
|
| 63 |
+
Supports rich queries used by the agent and reward checker.
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
def __init__(self, repo_path: str) -> None:
|
| 67 |
+
self.repo_path = repo_path
|
| 68 |
+
self._nodes: dict[str, KGNode] = {}
|
| 69 |
+
self._edges: list[KGEdge] = []
|
| 70 |
+
|
| 71 |
+
# ── mutation ──────────────────────────────────────────────────────────────
|
| 72 |
+
|
| 73 |
+
def add_node(self, node: KGNode) -> None:
|
| 74 |
+
self._nodes[node.node_id] = node
|
| 75 |
+
|
| 76 |
+
def add_edge(self, edge: KGEdge) -> None:
|
| 77 |
+
self._edges.append(edge)
|
| 78 |
+
|
| 79 |
+
def update_node_source(self, node_id: str, new_source: str) -> None:
|
| 80 |
+
"""Replace a node's source and recount lines."""
|
| 81 |
+
node = self._nodes[node_id]
|
| 82 |
+
node.source = new_source
|
| 83 |
+
lines = new_source.splitlines()
|
| 84 |
+
node.line_end = node.line_start + len(lines) - 1
|
| 85 |
+
|
| 86 |
+
def insert_node(
|
| 87 |
+
self,
|
| 88 |
+
parent_id: str,
|
| 89 |
+
new_node: KGNode,
|
| 90 |
+
) -> None:
|
| 91 |
+
"""Add new_node to the graph and wire a contains edge from parent."""
|
| 92 |
+
self._nodes[new_node.node_id] = new_node
|
| 93 |
+
self._edges.append(KGEdge("contains", parent_id, new_node.node_id))
|
| 94 |
+
|
| 95 |
+
def remove_node(self, node_id: str) -> None:
|
| 96 |
+
self._nodes.pop(node_id, None)
|
| 97 |
+
self._edges = [e for e in self._edges
|
| 98 |
+
if e.source_id != node_id and e.target_id != node_id]
|
| 99 |
+
|
| 100 |
+
# ── queries ───────────────────────────────────────────────────────────────
|
| 101 |
+
|
| 102 |
+
def get_node(self, node_id: str) -> KGNode | None:
|
| 103 |
+
return self._nodes.get(node_id)
|
| 104 |
+
|
| 105 |
+
def all_nodes(self, node_type: str | None = None) -> list[KGNode]:
|
| 106 |
+
nodes = list(self._nodes.values())
|
| 107 |
+
if node_type:
|
| 108 |
+
nodes = [n for n in nodes if n.node_type == node_type]
|
| 109 |
+
return nodes
|
| 110 |
+
|
| 111 |
+
def children_of(self, node_id: str) -> list[KGNode]:
|
| 112 |
+
child_ids = {e.target_id for e in self._edges
|
| 113 |
+
if e.source_id == node_id and e.edge_type == "contains"}
|
| 114 |
+
return [self._nodes[cid] for cid in child_ids if cid in self._nodes]
|
| 115 |
+
|
| 116 |
+
def parent_of(self, node_id: str) -> KGNode | None:
|
| 117 |
+
for e in self._edges:
|
| 118 |
+
if e.target_id == node_id and e.edge_type == "contains":
|
| 119 |
+
return self._nodes.get(e.source_id)
|
| 120 |
+
return None
|
| 121 |
+
|
| 122 |
+
def callers_of(self, node_id: str) -> list[KGNode]:
|
| 123 |
+
caller_ids = {e.source_id for e in self._edges
|
| 124 |
+
if e.target_id == node_id and e.edge_type == "calls"}
|
| 125 |
+
return [self._nodes[cid] for cid in caller_ids if cid in self._nodes]
|
| 126 |
+
|
| 127 |
+
def callees_of(self, node_id: str) -> list[KGNode]:
|
| 128 |
+
callee_ids = {e.target_id for e in self._edges
|
| 129 |
+
if e.source_id == node_id and e.edge_type == "calls"}
|
| 130 |
+
return [self._nodes[cid] for cid in callee_ids if cid in self._nodes]
|
| 131 |
+
|
| 132 |
+
def imports_of(self, module_id: str) -> list[KGNode]:
|
| 133 |
+
imp_ids = {e.target_id for e in self._edges
|
| 134 |
+
if e.source_id == module_id and e.edge_type == "imports"}
|
| 135 |
+
return [self._nodes[i] for i in imp_ids if i in self._nodes]
|
| 136 |
+
|
| 137 |
+
def search(self, keywords: str, node_type: str | None = None) -> list[KGNode]:
|
| 138 |
+
"""Fuzzy keyword search over node names, docstrings, and source."""
|
| 139 |
+
kws = keywords.lower().split()
|
| 140 |
+
results: list[KGNode] = []
|
| 141 |
+
for node in self._nodes.values():
|
| 142 |
+
if node_type and node.node_type != node_type:
|
| 143 |
+
continue
|
| 144 |
+
haystack = f"{node.name} {node.docstring} {node.source}".lower()
|
| 145 |
+
if all(kw in haystack for kw in kws):
|
| 146 |
+
results.append(node)
|
| 147 |
+
return results
|
| 148 |
+
|
| 149 |
+
def subgraph(self, root_id: str, depth: int = 2) -> list[KGNode]:
|
| 150 |
+
"""BFS from root_id up to depth hops; returns all encountered nodes."""
|
| 151 |
+
visited: set[str] = set()
|
| 152 |
+
frontier = {root_id}
|
| 153 |
+
for _ in range(depth):
|
| 154 |
+
next_frontier: set[str] = set()
|
| 155 |
+
for nid in frontier:
|
| 156 |
+
if nid in visited:
|
| 157 |
+
continue
|
| 158 |
+
visited.add(nid)
|
| 159 |
+
for e in self._edges:
|
| 160 |
+
if e.source_id == nid and e.target_id not in visited:
|
| 161 |
+
next_frontier.add(e.target_id)
|
| 162 |
+
frontier = next_frontier
|
| 163 |
+
visited.update(frontier)
|
| 164 |
+
return [self._nodes[nid] for nid in visited if nid in self._nodes]
|
| 165 |
+
|
| 166 |
+
# ── text representations ──────────────────────────────────────────────────
|
| 167 |
+
|
| 168 |
+
def overview(self, max_chars: int = 3000) -> str:
|
| 169 |
+
"""Compact multi-line overview of the repo graph, capped to avoid LLM context overflow."""
|
| 170 |
+
lines: list[str] = [f"## Repository: {self.repo_path}", ""]
|
| 171 |
+
modules = self.all_nodes("module")
|
| 172 |
+
all_fns = self.all_nodes("function")
|
| 173 |
+
all_cls = self.all_nodes("class")
|
| 174 |
+
lines.append(f" {len(modules)} modules · {len(all_fns)} functions · {len(all_cls)} classes")
|
| 175 |
+
lines.append("")
|
| 176 |
+
|
| 177 |
+
for mod in sorted(modules, key=lambda n: n.file_path):
|
| 178 |
+
children = self.children_of(mod.node_id)
|
| 179 |
+
funcs = [c for c in children if c.node_type in ("function", "method")]
|
| 180 |
+
classes = [c for c in children if c.node_type == "class"]
|
| 181 |
+
summary = []
|
| 182 |
+
if classes:
|
| 183 |
+
summary.append(f"{len(classes)} class{'es' if len(classes)>1 else ''}")
|
| 184 |
+
if funcs:
|
| 185 |
+
summary.append(f"{len(funcs)} fn{'s' if len(funcs)>1 else ''}")
|
| 186 |
+
lines.append(f" [{mod.file_path}] ({', '.join(summary) or 'empty'})")
|
| 187 |
+
for cls in sorted(classes, key=lambda n: n.name):
|
| 188 |
+
methods = [c for c in self.children_of(cls.node_id) if c.node_type == "method"]
|
| 189 |
+
mnames = ", ".join(m.name for m in sorted(methods, key=lambda n: n.line_start))
|
| 190 |
+
lines.append(f" class {cls.name} → {mnames or '(no methods)'}")
|
| 191 |
+
lines.append(f" node_id: {cls.node_id}")
|
| 192 |
+
for fn in sorted(funcs, key=lambda n: n.line_start):
|
| 193 |
+
lines.append(f" def {fn.name}{fn.metadata.get('signature', '')}")
|
| 194 |
+
lines.append(f" node_id: {fn.node_id}")
|
| 195 |
+
|
| 196 |
+
# Stop expanding if we are already near the character cap
|
| 197 |
+
current = "\n".join(lines)
|
| 198 |
+
if len(current) > max_chars:
|
| 199 |
+
remaining = len(modules) - (modules.index(mod) + 1)
|
| 200 |
+
if remaining:
|
| 201 |
+
lines.append(f"\n ... [{remaining} more modules not shown — use query() to explore]")
|
| 202 |
+
break
|
| 203 |
+
|
| 204 |
+
return "\n".join(lines)
|
| 205 |
+
|
| 206 |
+
def node_detail(self, node_id: str) -> str:
|
| 207 |
+
"""Full inspection view of a single node."""
|
| 208 |
+
node = self._nodes.get(node_id)
|
| 209 |
+
if node is None:
|
| 210 |
+
return f"[ERROR] node_id {node_id!r} not found in graph."
|
| 211 |
+
lines = [
|
| 212 |
+
f"## Node: {node.node_id}",
|
| 213 |
+
f"type : {node.node_type}",
|
| 214 |
+
f"file : {node.file_path} (lines {node.line_start}–{node.line_end})",
|
| 215 |
+
]
|
| 216 |
+
if node.docstring:
|
| 217 |
+
lines.append(f"docstring: {node.docstring[:120]}")
|
| 218 |
+
callers = self.callers_of(node_id)
|
| 219 |
+
callees = self.callees_of(node_id)
|
| 220 |
+
if callers:
|
| 221 |
+
lines.append("called by: " + ", ".join(n.name for n in callers))
|
| 222 |
+
if callees:
|
| 223 |
+
lines.append("calls : " + ", ".join(n.name for n in callees))
|
| 224 |
+
children = self.children_of(node_id)
|
| 225 |
+
if children:
|
| 226 |
+
lines.append("contains : " + ", ".join(c.name for c in children))
|
| 227 |
+
lines += ["", "### Source", "```python", node.source or "(no source)", "```"]
|
| 228 |
+
return "\n".join(lines)
|
| 229 |
+
|
| 230 |
+
def snapshot(self) -> "KnowledgeGraph":
|
| 231 |
+
"""Deep copy — used to preserve state before mutations."""
|
| 232 |
+
import copy
|
| 233 |
+
return copy.deepcopy(self)
|
graphforge/materializer/__init__.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Graph -> Python source projection.
|
| 2 |
+
|
| 3 |
+
Responsibilities (PROPOSAL.md §3.3):
|
| 4 |
+
|
| 5 |
+
* Emit one ``<module>.py`` per declared module.
|
| 6 |
+
* Emit functions in :attr:`Node.decl_order` order.
|
| 7 |
+
* Compute ``from <module> import <name>`` lines from cross-module edges,
|
| 8 |
+
deduplicated and sorted.
|
| 9 |
+
* Expand body templates with the node's ``body_template_args`` to produce
|
| 10 |
+
a runnable function body.
|
| 11 |
+
|
| 12 |
+
The materializer is total over well-formed graphs: every dispatcher-accepted
|
| 13 |
+
graph must produce parseable source. Round-trip correctness (the produced
|
| 14 |
+
source re-parses to the same graph) is enforced by tests in
|
| 15 |
+
:mod:`graphforge.parser` (TODO).
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from graphforge.materializer.materialize import materialize
|
| 19 |
+
|
| 20 |
+
__all__ = ["materialize"]
|
graphforge/materializer/codegen.py
ADDED
|
@@ -0,0 +1,169 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Per-template body codegen.
|
| 2 |
+
|
| 3 |
+
Each public ``render_<template>`` function takes the host node, its outgoing
|
| 4 |
+
edges in deterministic order, and returns a multi-line indented body suitable
|
| 5 |
+
for inserting after a ``def`` line. Bodies use only stdlib and never reference
|
| 6 |
+
unresolved names (the orchestrator ensures imports + pattern constants are
|
| 7 |
+
in scope).
|
| 8 |
+
|
| 9 |
+
Codegen is intentionally simple: the goal is *runnable, readable* Python that
|
| 10 |
+
respects template semantics, not optimal idiomatic code.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
from graphforge.graph.schema import Edge, Graph, Node
|
| 16 |
+
from graphforge.materializer import patterns
|
| 17 |
+
|
| 18 |
+
INDENT = " "
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
# ---- helpers ---------------------------------------------------------
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _kwargs_for(edge: Edge) -> str:
|
| 25 |
+
"""Render an edge's arg_mapping as ``param=arg, param2=arg2``."""
|
| 26 |
+
return ", ".join(f"{m.callee_param}={m.caller_arg}" for m in edge.arg_mapping)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _callee_name(edge: Edge) -> str:
|
| 30 |
+
"""The local symbol used at the call site (just the function name).
|
| 31 |
+
|
| 32 |
+
The orchestrator emits ``from <module> import <name>`` for cross-module
|
| 33 |
+
callees, so the call site can always use the bare name.
|
| 34 |
+
"""
|
| 35 |
+
return edge.callee.split(".", 1)[1]
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def _indent(lines: list[str]) -> str:
|
| 39 |
+
return "\n".join(INDENT + line for line in lines)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# ---- per-template renderers -----------------------------------------
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def render_passthrough_call(node: Node, out_edges: list[Edge], _g: Graph) -> str:
|
| 46 |
+
if len(out_edges) != 1:
|
| 47 |
+
raise ValueError(
|
| 48 |
+
f"passthrough_call on {node.qualified_name} requires 1 out-edge, "
|
| 49 |
+
f"got {len(out_edges)}"
|
| 50 |
+
)
|
| 51 |
+
e = out_edges[0]
|
| 52 |
+
return _indent([f"return {_callee_name(e)}({_kwargs_for(e)})"])
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def render_sequential_calls(node: Node, out_edges: list[Edge], _g: Graph) -> str:
|
| 56 |
+
if not out_edges:
|
| 57 |
+
raise ValueError(
|
| 58 |
+
f"sequential_calls on {node.qualified_name} requires >=1 out-edge"
|
| 59 |
+
)
|
| 60 |
+
lines: list[str] = []
|
| 61 |
+
for e in out_edges[:-1]:
|
| 62 |
+
lines.append(f"{_callee_name(e)}({_kwargs_for(e)})")
|
| 63 |
+
last = out_edges[-1]
|
| 64 |
+
lines.append(f"return {_callee_name(last)}({_kwargs_for(last)})")
|
| 65 |
+
return _indent(lines)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def render_validate_with_regex(node: Node, out_edges: list[Edge], _g: Graph) -> str:
|
| 69 |
+
if out_edges:
|
| 70 |
+
raise ValueError(
|
| 71 |
+
f"validate_with_regex on {node.qualified_name} must have 0 out-edges"
|
| 72 |
+
)
|
| 73 |
+
pattern_name = str(node.body_template_args.get("pattern", ""))
|
| 74 |
+
if patterns.get_pattern(pattern_name) is None:
|
| 75 |
+
raise ValueError(
|
| 76 |
+
f"unknown regex pattern {pattern_name!r} on {node.qualified_name}; "
|
| 77 |
+
f"known: {patterns.known_patterns()}"
|
| 78 |
+
)
|
| 79 |
+
constant = patterns.constant_name(pattern_name)
|
| 80 |
+
# The host signature is expected to be (s: str) -> bool — but we just use
|
| 81 |
+
# the first parameter name, whatever it is, to be tolerant.
|
| 82 |
+
from graphforge.actions.signature import parse_signature
|
| 83 |
+
parsed = parse_signature(node.signature)
|
| 84 |
+
if not parsed.parameters:
|
| 85 |
+
raise ValueError(
|
| 86 |
+
f"validate_with_regex on {node.qualified_name} requires "
|
| 87 |
+
f"at least one parameter"
|
| 88 |
+
)
|
| 89 |
+
arg = parsed.parameters[0].name
|
| 90 |
+
return _indent([f"return re.match({constant}, {arg}) is not None"])
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def render_early_return_guard(node: Node, out_edges: list[Edge], _g: Graph) -> str:
|
| 94 |
+
if len(out_edges) != 1:
|
| 95 |
+
raise ValueError(
|
| 96 |
+
f"early_return_guard on {node.qualified_name} requires 1 out-edge"
|
| 97 |
+
)
|
| 98 |
+
condition = str(node.body_template_args.get("condition", "True"))
|
| 99 |
+
e = out_edges[0]
|
| 100 |
+
return _indent(
|
| 101 |
+
[
|
| 102 |
+
f"if not ({condition}):",
|
| 103 |
+
f"{INDENT}return None",
|
| 104 |
+
f"return {_callee_name(e)}({_kwargs_for(e)})",
|
| 105 |
+
]
|
| 106 |
+
)
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def render_try_call_with_fallback(node: Node, out_edges: list[Edge], _g: Graph) -> str:
|
| 110 |
+
if len(out_edges) != 2:
|
| 111 |
+
raise ValueError(
|
| 112 |
+
f"try_call_with_fallback on {node.qualified_name} requires "
|
| 113 |
+
f"exactly 2 out-edges (primary, fallback)"
|
| 114 |
+
)
|
| 115 |
+
primary, fallback = out_edges
|
| 116 |
+
return _indent(
|
| 117 |
+
[
|
| 118 |
+
"try:",
|
| 119 |
+
f"{INDENT}return {_callee_name(primary)}({_kwargs_for(primary)})",
|
| 120 |
+
"except Exception:",
|
| 121 |
+
f"{INDENT}return {_callee_name(fallback)}({_kwargs_for(fallback)})",
|
| 122 |
+
]
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def render_leaf_constant(node: Node, out_edges: list[Edge], _g: Graph) -> str:
|
| 127 |
+
if out_edges:
|
| 128 |
+
raise ValueError(
|
| 129 |
+
f"leaf_constant on {node.qualified_name} must have 0 out-edges"
|
| 130 |
+
)
|
| 131 |
+
if "value" not in node.body_template_args:
|
| 132 |
+
raise ValueError(
|
| 133 |
+
f"leaf_constant on {node.qualified_name} requires args.value"
|
| 134 |
+
)
|
| 135 |
+
value = node.body_template_args["value"]
|
| 136 |
+
return _indent([f"return {value!r}"])
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
# ---- registry --------------------------------------------------------
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
_RENDERERS: dict[str, object] = {
|
| 143 |
+
"passthrough_call": render_passthrough_call,
|
| 144 |
+
"sequential_calls": render_sequential_calls,
|
| 145 |
+
"validate_with_regex": render_validate_with_regex,
|
| 146 |
+
"early_return_guard": render_early_return_guard,
|
| 147 |
+
"try_call_with_fallback": render_try_call_with_fallback,
|
| 148 |
+
"leaf_constant": render_leaf_constant,
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
def render_body(node: Node, out_edges: list[Edge], graph: Graph) -> str:
|
| 153 |
+
"""Render the body for ``node`` based on its attached body template."""
|
| 154 |
+
if node.body_template is None:
|
| 155 |
+
# No body attached yet — emit a placeholder so the file still parses.
|
| 156 |
+
return _indent(['raise NotImplementedError("body not attached")'])
|
| 157 |
+
fn = _RENDERERS.get(node.body_template)
|
| 158 |
+
if fn is None:
|
| 159 |
+
raise ValueError(
|
| 160 |
+
f"no codegen for template {node.body_template!r} on {node.qualified_name}"
|
| 161 |
+
)
|
| 162 |
+
return fn(node, out_edges, graph) # type: ignore[operator]
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def template_imports(template: str | None) -> set[str]:
|
| 166 |
+
"""Stdlib imports a template needs, beyond cross-module function imports."""
|
| 167 |
+
if template == "validate_with_regex":
|
| 168 |
+
return {"re"}
|
| 169 |
+
return set()
|
graphforge/materializer/materialize.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Materialize a :class:`Graph` into a dict of ``{filename: source}``.
|
| 2 |
+
|
| 3 |
+
Determinism guarantees:
|
| 4 |
+
|
| 5 |
+
* One file per module, named ``<module>.py``.
|
| 6 |
+
* Within a file, functions emitted in :attr:`Node.decl_order`.
|
| 7 |
+
* Imports sorted: stdlib first (alpha), then ``from <module> import <name>``
|
| 8 |
+
(alpha by module, alpha by name).
|
| 9 |
+
* Pattern constants emitted only if used, in alpha order.
|
| 10 |
+
* Out-edges of a node iterated in insertion order, which matters for
|
| 11 |
+
``sequential_calls`` and ``try_call_with_fallback`` semantics.
|
| 12 |
+
|
| 13 |
+
The orchestrator is a pure function: same graph in, same source out.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
from collections import defaultdict
|
| 19 |
+
from typing import Iterable
|
| 20 |
+
|
| 21 |
+
from graphforge.graph.schema import Edge, Graph, Node
|
| 22 |
+
from graphforge.materializer import codegen, patterns
|
| 23 |
+
|
| 24 |
+
HEADER = '"""Auto-generated by graphforge.materializer. Do not edit by hand."""\n'
|
| 25 |
+
FUTURE = "from __future__ import annotations\n"
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# ---- helpers ---------------------------------------------------------
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def _out_edges_in_order(graph: Graph, qualified: str) -> list[Edge]:
|
| 32 |
+
"""Out-edges of ``qualified`` in insertion order."""
|
| 33 |
+
return [e for e in graph.edges if e.caller == qualified]
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _nodes_by_module(graph: Graph) -> dict[str, list[Node]]:
|
| 37 |
+
"""Map module-name -> nodes in decl_order."""
|
| 38 |
+
by_mod: dict[str, list[Node]] = defaultdict(list)
|
| 39 |
+
for n in graph.nodes:
|
| 40 |
+
by_mod[n.module].append(n)
|
| 41 |
+
for ns in by_mod.values():
|
| 42 |
+
ns.sort(key=lambda n: (n.decl_order, n.name))
|
| 43 |
+
return by_mod
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def _cross_module_imports(graph: Graph, module: str) -> list[tuple[str, str]]:
|
| 47 |
+
"""``[(callee_module, callee_name), ...]`` needed by ``module``."""
|
| 48 |
+
pairs: set[tuple[str, str]] = set()
|
| 49 |
+
for e in graph.edges:
|
| 50 |
+
caller_mod = e.caller.split(".", 1)[0]
|
| 51 |
+
if caller_mod != module:
|
| 52 |
+
continue
|
| 53 |
+
callee_mod, callee_name = e.callee.split(".", 1)
|
| 54 |
+
if callee_mod != module:
|
| 55 |
+
pairs.add((callee_mod, callee_name))
|
| 56 |
+
return sorted(pairs)
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _stdlib_imports_for(nodes: Iterable[Node]) -> list[str]:
|
| 60 |
+
"""Stdlib imports the templates in this module require."""
|
| 61 |
+
needed: set[str] = set()
|
| 62 |
+
for n in nodes:
|
| 63 |
+
needed |= codegen.template_imports(n.body_template)
|
| 64 |
+
return sorted(needed)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def _patterns_used_by(nodes: Iterable[Node]) -> list[str]:
|
| 68 |
+
"""Named patterns referenced by validate_with_regex nodes in this module."""
|
| 69 |
+
used: set[str] = set()
|
| 70 |
+
for n in nodes:
|
| 71 |
+
if n.body_template == "validate_with_regex":
|
| 72 |
+
name = str(n.body_template_args.get("pattern", ""))
|
| 73 |
+
if patterns.get_pattern(name) is not None:
|
| 74 |
+
used.add(name)
|
| 75 |
+
return sorted(used)
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# ---- core ------------------------------------------------------------
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def materialize(graph: Graph) -> dict[str, str]:
|
| 82 |
+
"""Project ``graph`` to a ``{filename: source}`` map.
|
| 83 |
+
|
| 84 |
+
Modules with zero nodes are still emitted as empty files (just header +
|
| 85 |
+
future import) so that downstream import-resolution sees them.
|
| 86 |
+
"""
|
| 87 |
+
by_mod = _nodes_by_module(graph)
|
| 88 |
+
files: dict[str, str] = {}
|
| 89 |
+
for module in graph.modules:
|
| 90 |
+
nodes = by_mod.get(module.name, [])
|
| 91 |
+
files[f"{module.name}.py"] = _render_module(graph, module.name, nodes)
|
| 92 |
+
return files
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _render_module(graph: Graph, module_name: str, nodes: list[Node]) -> str:
|
| 96 |
+
parts: list[str] = [HEADER, FUTURE, "\n"]
|
| 97 |
+
|
| 98 |
+
# Stdlib imports.
|
| 99 |
+
for imp in _stdlib_imports_for(nodes):
|
| 100 |
+
parts.append(f"import {imp}\n")
|
| 101 |
+
|
| 102 |
+
# Cross-module function imports.
|
| 103 |
+
for callee_mod, callee_name in _cross_module_imports(graph, module_name):
|
| 104 |
+
parts.append(f"from {callee_mod} import {callee_name}\n")
|
| 105 |
+
|
| 106 |
+
if (
|
| 107 |
+
any(_stdlib_imports_for(nodes))
|
| 108 |
+
or _cross_module_imports(graph, module_name)
|
| 109 |
+
):
|
| 110 |
+
parts.append("\n")
|
| 111 |
+
|
| 112 |
+
# Pattern constants used in this module. We emit a plain string literal
|
| 113 |
+
# (not a raw-string-prefixed one) because ``repr()`` already produces a
|
| 114 |
+
# valid Python string literal — wrapping it in ``r"..."`` would double
|
| 115 |
+
# the backslashes and break regex metacharacters like ``\s`` and ``\d``.
|
| 116 |
+
used_patterns = _patterns_used_by(nodes)
|
| 117 |
+
for name in used_patterns:
|
| 118 |
+
regex = patterns.get_pattern(name)
|
| 119 |
+
constant = patterns.constant_name(name)
|
| 120 |
+
parts.append(f"{constant} = {regex!r}\n")
|
| 121 |
+
if used_patterns:
|
| 122 |
+
parts.append("\n")
|
| 123 |
+
|
| 124 |
+
# Functions.
|
| 125 |
+
for i, node in enumerate(nodes):
|
| 126 |
+
out_edges = _out_edges_in_order(graph, node.qualified_name)
|
| 127 |
+
body = codegen.render_body(node, out_edges, graph)
|
| 128 |
+
parts.append(f"def {node.name}{node.signature}:\n{body}\n")
|
| 129 |
+
if i != len(nodes) - 1:
|
| 130 |
+
parts.append("\n")
|
| 131 |
+
|
| 132 |
+
source = "".join(parts)
|
| 133 |
+
# Ensure exactly one trailing newline.
|
| 134 |
+
return source.rstrip("\n") + "\n"
|
graphforge/materializer/patterns.py
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Named regex patterns for ``validate_with_regex`` template.
|
| 2 |
+
|
| 3 |
+
Patterns are referenced by name in the graph (e.g. ``args={"pattern": "EMAIL"}``)
|
| 4 |
+
and resolved here at materialization time. The registry keeps task definitions
|
| 5 |
+
domain-agnostic — a task constraint can name a pattern without leaking the
|
| 6 |
+
regex itself into the graph schema.
|
| 7 |
+
|
| 8 |
+
Add new patterns sparingly; every name here becomes part of the constraint
|
| 9 |
+
vocabulary that tasks can use.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
# name -> (regex string, brief description)
|
| 15 |
+
_PATTERNS: dict[str, str] = {
|
| 16 |
+
"EMAIL": r"[^@\s]+@[^@\s]+\.[^@\s]+",
|
| 17 |
+
"HEXCOLOR": r"#[0-9a-fA-F]{6}",
|
| 18 |
+
"PHONE": r"\+?\d{10,15}",
|
| 19 |
+
"ALPHANUM": r"[A-Za-z0-9]+",
|
| 20 |
+
"URL": r"https?://[^\s]+",
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def known_patterns() -> list[str]:
|
| 25 |
+
return sorted(_PATTERNS.keys())
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_pattern(name: str) -> str | None:
|
| 29 |
+
return _PATTERNS.get(name)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def constant_name(name: str) -> str:
|
| 33 |
+
"""Module-level constant name we emit for a given pattern name."""
|
| 34 |
+
return f"_PATTERN_{name}"
|
graphforge/parser/__init__.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Round-trip parser: Python source -> Graph.
|
| 2 |
+
|
| 3 |
+
Responsibilities (PROPOSAL.md §3.4):
|
| 4 |
+
|
| 5 |
+
* Walk an AST per module file.
|
| 6 |
+
* Recover function declarations as :class:`Node` objects.
|
| 7 |
+
* Recover ``from x import y`` lines as cross-module edges (best-effort).
|
| 8 |
+
* Recognize body templates by structural pattern matching against the
|
| 9 |
+
template library, and recover ``body_template`` + ``body_template_args``.
|
| 10 |
+
* Produce a :class:`Graph` identical (per ``structural_hash``) to the one
|
| 11 |
+
that produced the source via :mod:`graphforge.materializer`.
|
| 12 |
+
|
| 13 |
+
The round-trip parser is unit-tested against every body template + every
|
| 14 |
+
constraint pattern. If it fails to round-trip, the materializer emits a
|
| 15 |
+
warning and the graph is treated as canonical.
|
| 16 |
+
|
| 17 |
+
Public surface (TODO):
|
| 18 |
+
|
| 19 |
+
parse_program(files: dict[str, str]) -> Graph
|
| 20 |
+
parse_directory(path: Path) -> Graph
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def parse_program(files: dict[str, str]) -> object: # pragma: no cover — TODO
|
| 27 |
+
raise NotImplementedError("round-trip parser TODO — see PROPOSAL.md §3.4")
|
graphforge/repo_parser.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Parse a Python repository (directory tree) into a KnowledgeGraph.
|
| 2 |
+
|
| 3 |
+
Usage
|
| 4 |
+
-----
|
| 5 |
+
from graphforge.repo_parser import parse_repo
|
| 6 |
+
kg = parse_repo("/path/to/my_package")
|
| 7 |
+
|
| 8 |
+
What it extracts
|
| 9 |
+
----------------
|
| 10 |
+
Nodes : repo, package, module, class, function, method
|
| 11 |
+
Edges : contains, calls (same-file), imports, inherits
|
| 12 |
+
|
| 13 |
+
Cross-file call resolution is best-effort: if function A in file X calls
|
| 14 |
+
function B and B appears anywhere in the graph, an edge is added.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import ast
|
| 20 |
+
import os
|
| 21 |
+
from pathlib import Path
|
| 22 |
+
from typing import Any
|
| 23 |
+
|
| 24 |
+
from graphforge.knowledge_graph import KGEdge, KGNode, KnowledgeGraph
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ── helpers ───────────────────────────────────────────────────────────────────
|
| 28 |
+
|
| 29 |
+
def _node_id(node_type: str, file_path: str, *names: str) -> str:
|
| 30 |
+
parts = [node_type, file_path] + list(names)
|
| 31 |
+
return ":".join(p for p in parts if p)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _sig(node: ast.FunctionDef | ast.AsyncFunctionDef) -> str:
|
| 35 |
+
args = []
|
| 36 |
+
for arg in node.args.args:
|
| 37 |
+
ann = f": {ast.unparse(arg.annotation)}" if arg.annotation else ""
|
| 38 |
+
args.append(f"{arg.arg}{ann}")
|
| 39 |
+
ret = f" -> {ast.unparse(node.returns)}" if node.returns else ""
|
| 40 |
+
return f"({', '.join(args)}){ret}"
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _source_slice(source_lines: list[str], start: int, end: int) -> str:
|
| 44 |
+
"""1-indexed, inclusive."""
|
| 45 |
+
return "\n".join(source_lines[start - 1 : end])
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _direct_calls(func_node: ast.FunctionDef | ast.AsyncFunctionDef) -> set[str]:
|
| 49 |
+
"""Collect names of directly called functions (Name-style calls only)."""
|
| 50 |
+
calls: set[str] = set()
|
| 51 |
+
for node in ast.walk(func_node):
|
| 52 |
+
if isinstance(node, ast.Call) and isinstance(node.func, ast.Name):
|
| 53 |
+
calls.add(node.func.id)
|
| 54 |
+
return calls
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# ── single-file parser ────────────────────────────────────────────────────────
|
| 58 |
+
|
| 59 |
+
def _parse_file(
|
| 60 |
+
file_path: str, # relative to repo root
|
| 61 |
+
abs_path: str,
|
| 62 |
+
kg: KnowledgeGraph,
|
| 63 |
+
parent_id: str,
|
| 64 |
+
) -> None:
|
| 65 |
+
try:
|
| 66 |
+
source = Path(abs_path).read_text(encoding="utf-8", errors="replace")
|
| 67 |
+
except Exception:
|
| 68 |
+
return
|
| 69 |
+
|
| 70 |
+
try:
|
| 71 |
+
tree = ast.parse(source, filename=abs_path)
|
| 72 |
+
except SyntaxError:
|
| 73 |
+
return
|
| 74 |
+
|
| 75 |
+
lines = source.splitlines()
|
| 76 |
+
mod_id = _node_id("module", file_path)
|
| 77 |
+
|
| 78 |
+
# Module node
|
| 79 |
+
mod_doc = ast.get_docstring(tree) or ""
|
| 80 |
+
kg.add_node(KGNode(
|
| 81 |
+
node_id=mod_id,
|
| 82 |
+
node_type="module",
|
| 83 |
+
name=Path(file_path).stem,
|
| 84 |
+
file_path=file_path,
|
| 85 |
+
line_start=1,
|
| 86 |
+
line_end=len(lines),
|
| 87 |
+
source=source,
|
| 88 |
+
docstring=mod_doc,
|
| 89 |
+
))
|
| 90 |
+
kg.add_edge(KGEdge("contains", parent_id, mod_id))
|
| 91 |
+
|
| 92 |
+
# Import edges (resolve module names)
|
| 93 |
+
for node in ast.walk(tree):
|
| 94 |
+
if isinstance(node, ast.Import):
|
| 95 |
+
for alias in node.names:
|
| 96 |
+
imp_id = _node_id("module", alias.name.replace(".", "/") + ".py")
|
| 97 |
+
kg.add_edge(KGEdge("imports", mod_id, imp_id))
|
| 98 |
+
elif isinstance(node, ast.ImportFrom) and node.module:
|
| 99 |
+
imp_id = _node_id("module", node.module.replace(".", "/") + ".py")
|
| 100 |
+
kg.add_edge(KGEdge("imports", mod_id, imp_id))
|
| 101 |
+
|
| 102 |
+
# Top-level classes and functions
|
| 103 |
+
func_name_to_id: dict[str, str] = {} # for call resolution within file
|
| 104 |
+
|
| 105 |
+
for stmt in tree.body:
|
| 106 |
+
if isinstance(stmt, ast.ClassDef):
|
| 107 |
+
_parse_class(stmt, file_path, lines, kg, mod_id, func_name_to_id)
|
| 108 |
+
elif isinstance(stmt, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 109 |
+
_parse_function(stmt, file_path, lines, kg, mod_id, func_name_to_id)
|
| 110 |
+
|
| 111 |
+
# Same-file call edges
|
| 112 |
+
_resolve_calls(func_name_to_id, kg)
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def _parse_class(
|
| 116 |
+
cls_node: ast.ClassDef,
|
| 117 |
+
file_path: str,
|
| 118 |
+
lines: list[str],
|
| 119 |
+
kg: KnowledgeGraph,
|
| 120 |
+
parent_id: str,
|
| 121 |
+
func_name_to_id: dict[str, str],
|
| 122 |
+
) -> None:
|
| 123 |
+
cls_id = _node_id("class", file_path, cls_node.name)
|
| 124 |
+
doc = ast.get_docstring(cls_node) or ""
|
| 125 |
+
kg.add_node(KGNode(
|
| 126 |
+
node_id=cls_id,
|
| 127 |
+
node_type="class",
|
| 128 |
+
name=cls_node.name,
|
| 129 |
+
file_path=file_path,
|
| 130 |
+
line_start=cls_node.lineno,
|
| 131 |
+
line_end=cls_node.end_lineno,
|
| 132 |
+
source=_source_slice(lines, cls_node.lineno, cls_node.end_lineno),
|
| 133 |
+
docstring=doc,
|
| 134 |
+
))
|
| 135 |
+
kg.add_edge(KGEdge("contains", parent_id, cls_id))
|
| 136 |
+
|
| 137 |
+
# Inheritance edges
|
| 138 |
+
for base in cls_node.bases:
|
| 139 |
+
if isinstance(base, ast.Name):
|
| 140 |
+
base_id = _node_id("class", file_path, base.id)
|
| 141 |
+
kg.add_edge(KGEdge("inherits", cls_id, base_id))
|
| 142 |
+
|
| 143 |
+
# Methods
|
| 144 |
+
for item in cls_node.body:
|
| 145 |
+
if isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 146 |
+
_parse_method(item, file_path, lines, kg, cls_id, cls_node.name, func_name_to_id)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def _parse_function(
|
| 150 |
+
fn: ast.FunctionDef | ast.AsyncFunctionDef,
|
| 151 |
+
file_path: str,
|
| 152 |
+
lines: list[str],
|
| 153 |
+
kg: KnowledgeGraph,
|
| 154 |
+
parent_id: str,
|
| 155 |
+
func_name_to_id: dict[str, str],
|
| 156 |
+
) -> None:
|
| 157 |
+
fn_id = _node_id("function", file_path, fn.name)
|
| 158 |
+
doc = ast.get_docstring(fn) or ""
|
| 159 |
+
kg.add_node(KGNode(
|
| 160 |
+
node_id=fn_id,
|
| 161 |
+
node_type="function",
|
| 162 |
+
name=fn.name,
|
| 163 |
+
file_path=file_path,
|
| 164 |
+
line_start=fn.lineno,
|
| 165 |
+
line_end=fn.end_lineno,
|
| 166 |
+
source=_source_slice(lines, fn.lineno, fn.end_lineno),
|
| 167 |
+
docstring=doc,
|
| 168 |
+
metadata={"signature": _sig(fn), "calls": list(_direct_calls(fn))},
|
| 169 |
+
))
|
| 170 |
+
kg.add_edge(KGEdge("contains", parent_id, fn_id))
|
| 171 |
+
func_name_to_id[fn.name] = fn_id
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def _parse_method(
|
| 175 |
+
fn: ast.FunctionDef | ast.AsyncFunctionDef,
|
| 176 |
+
file_path: str,
|
| 177 |
+
lines: list[str],
|
| 178 |
+
kg: KnowledgeGraph,
|
| 179 |
+
parent_id: str,
|
| 180 |
+
class_name: str,
|
| 181 |
+
func_name_to_id: dict[str, str],
|
| 182 |
+
) -> None:
|
| 183 |
+
method_id = _node_id("method", file_path, class_name, fn.name)
|
| 184 |
+
doc = ast.get_docstring(fn) or ""
|
| 185 |
+
kg.add_node(KGNode(
|
| 186 |
+
node_id=method_id,
|
| 187 |
+
node_type="method",
|
| 188 |
+
name=fn.name,
|
| 189 |
+
file_path=file_path,
|
| 190 |
+
line_start=fn.lineno,
|
| 191 |
+
line_end=fn.end_lineno,
|
| 192 |
+
source=_source_slice(lines, fn.lineno, fn.end_lineno),
|
| 193 |
+
docstring=doc,
|
| 194 |
+
metadata={"signature": _sig(fn), "calls": list(_direct_calls(fn))},
|
| 195 |
+
))
|
| 196 |
+
kg.add_edge(KGEdge("contains", parent_id, method_id))
|
| 197 |
+
# register under unqualified name too for call resolution
|
| 198 |
+
func_name_to_id[fn.name] = method_id
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
def _resolve_calls(func_name_to_id: dict[str, str], kg: KnowledgeGraph) -> None:
|
| 202 |
+
"""Add calls edges based on direct-call names collected during parse."""
|
| 203 |
+
for fn_id, node in [(nid, n) for nid, n in kg._nodes.items()
|
| 204 |
+
if n.node_type in ("function", "method")]:
|
| 205 |
+
calls: list[str] = node.metadata.get("calls", [])
|
| 206 |
+
for callee_name in calls:
|
| 207 |
+
if callee_name in func_name_to_id:
|
| 208 |
+
callee_id = func_name_to_id[callee_name]
|
| 209 |
+
if callee_id != fn_id:
|
| 210 |
+
kg.add_edge(KGEdge("calls", fn_id, callee_id))
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
# ── repo walker ───────────────────────────────────────────────────────────────
|
| 214 |
+
|
| 215 |
+
def parse_repo(repo_path: str, exclude_dirs: set[str] | None = None) -> KnowledgeGraph:
|
| 216 |
+
"""Walk repo_path recursively and return a KnowledgeGraph.
|
| 217 |
+
|
| 218 |
+
Parameters
|
| 219 |
+
----------
|
| 220 |
+
repo_path : str
|
| 221 |
+
Absolute or relative path to the root of the repo.
|
| 222 |
+
exclude_dirs : set[str], optional
|
| 223 |
+
Directory names to skip (e.g. {"__pycache__", ".git", "tests"}).
|
| 224 |
+
"""
|
| 225 |
+
if exclude_dirs is None:
|
| 226 |
+
exclude_dirs = {"__pycache__", ".git", ".venv", "venv", "env",
|
| 227 |
+
"node_modules", ".mypy_cache", ".pytest_cache", "dist", "build"}
|
| 228 |
+
|
| 229 |
+
abs_root = str(Path(repo_path).resolve())
|
| 230 |
+
kg = KnowledgeGraph(repo_path=repo_path)
|
| 231 |
+
|
| 232 |
+
# Root repo node
|
| 233 |
+
repo_name = Path(abs_root).name
|
| 234 |
+
repo_id = _node_id("repo", "", repo_name)
|
| 235 |
+
kg.add_node(KGNode(
|
| 236 |
+
node_id=repo_id,
|
| 237 |
+
node_type="repo",
|
| 238 |
+
name=repo_name,
|
| 239 |
+
file_path="",
|
| 240 |
+
))
|
| 241 |
+
|
| 242 |
+
# Walk directory tree
|
| 243 |
+
for dirpath, dirnames, filenames in os.walk(abs_root):
|
| 244 |
+
# Prune excluded dirs in-place (modifies os.walk traversal)
|
| 245 |
+
dirnames[:] = [d for d in dirnames if d not in exclude_dirs]
|
| 246 |
+
|
| 247 |
+
rel_dir = os.path.relpath(dirpath, abs_root)
|
| 248 |
+
if rel_dir == ".":
|
| 249 |
+
rel_dir = ""
|
| 250 |
+
|
| 251 |
+
parent_id = repo_id
|
| 252 |
+
if rel_dir:
|
| 253 |
+
pkg_id = _node_id("package", rel_dir)
|
| 254 |
+
if pkg_id not in kg._nodes:
|
| 255 |
+
kg.add_node(KGNode(
|
| 256 |
+
node_id=pkg_id,
|
| 257 |
+
node_type="package",
|
| 258 |
+
name=Path(rel_dir).name,
|
| 259 |
+
file_path=rel_dir,
|
| 260 |
+
))
|
| 261 |
+
kg.add_edge(KGEdge("contains", repo_id, pkg_id))
|
| 262 |
+
parent_id = pkg_id
|
| 263 |
+
|
| 264 |
+
for fname in sorted(filenames):
|
| 265 |
+
if not fname.endswith(".py"):
|
| 266 |
+
continue
|
| 267 |
+
rel_file = os.path.join(rel_dir, fname) if rel_dir else fname
|
| 268 |
+
abs_file = os.path.join(dirpath, fname)
|
| 269 |
+
_parse_file(rel_file, abs_file, kg, parent_id)
|
| 270 |
+
|
| 271 |
+
return kg
|
graphforge/repo_registry.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Registry of training repos with their clone URLs and source paths.
|
| 2 |
+
|
| 3 |
+
Add a new repo by appending to REGISTRY. The pipeline will clone it,
|
| 4 |
+
parse it, and auto-generate tasks from its doctests.
|
| 5 |
+
|
| 6 |
+
Each entry:
|
| 7 |
+
name short identifier used in task_ids
|
| 8 |
+
url git clone URL (depth-1 clone)
|
| 9 |
+
src_hint subdirectory containing the Python package
|
| 10 |
+
(tried as: <clone>/<hint>, <clone>/src/<hint>, <clone>)
|
| 11 |
+
n_tasks max tasks to pull from this repo
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
from dataclasses import dataclass, field
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
@dataclass
|
| 21 |
+
class RepoSpec:
|
| 22 |
+
name: str
|
| 23 |
+
url: str
|
| 24 |
+
src_hint: str
|
| 25 |
+
n_tasks: int = 6
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
REGISTRY: list[RepoSpec] = [
|
| 29 |
+
# ── string / text ────────────────────────────────────────────────────────
|
| 30 |
+
RepoSpec(
|
| 31 |
+
name="humanize",
|
| 32 |
+
url="https://github.com/jmoiron/humanize.git",
|
| 33 |
+
src_hint="src/humanize",
|
| 34 |
+
n_tasks=6,
|
| 35 |
+
),
|
| 36 |
+
RepoSpec(
|
| 37 |
+
name="wcwidth",
|
| 38 |
+
url="https://github.com/jquast/wcwidth.git",
|
| 39 |
+
src_hint="wcwidth",
|
| 40 |
+
n_tasks=6,
|
| 41 |
+
),
|
| 42 |
+
RepoSpec(
|
| 43 |
+
name="inflect",
|
| 44 |
+
url="https://github.com/jaraco/inflect.git",
|
| 45 |
+
src_hint="inflect",
|
| 46 |
+
n_tasks=4,
|
| 47 |
+
),
|
| 48 |
+
|
| 49 |
+
# ── iteration / functional ───────────────────────────────────────────────
|
| 50 |
+
RepoSpec(
|
| 51 |
+
name="boltons",
|
| 52 |
+
url="https://github.com/mahmoud/boltons.git",
|
| 53 |
+
src_hint="boltons",
|
| 54 |
+
n_tasks=10,
|
| 55 |
+
),
|
| 56 |
+
RepoSpec(
|
| 57 |
+
name="more-itertools",
|
| 58 |
+
url="https://github.com/more-itertools/more-itertools.git",
|
| 59 |
+
src_hint="more_itertools",
|
| 60 |
+
n_tasks=8,
|
| 61 |
+
),
|
| 62 |
+
RepoSpec(
|
| 63 |
+
name="toolz",
|
| 64 |
+
url="https://github.com/pytoolz/toolz.git",
|
| 65 |
+
src_hint="toolz",
|
| 66 |
+
n_tasks=6,
|
| 67 |
+
),
|
| 68 |
+
|
| 69 |
+
# ── data transformation / ETL ────────────────────────────────────────────
|
| 70 |
+
RepoSpec(
|
| 71 |
+
name="petl",
|
| 72 |
+
url="https://github.com/petl-developers/petl.git",
|
| 73 |
+
src_hint="src/petl",
|
| 74 |
+
n_tasks=8,
|
| 75 |
+
),
|
| 76 |
+
RepoSpec(
|
| 77 |
+
name="pydash",
|
| 78 |
+
url="https://github.com/dgilland/pydash.git",
|
| 79 |
+
src_hint="src/pydash",
|
| 80 |
+
n_tasks=8,
|
| 81 |
+
),
|
| 82 |
+
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
# Repos that were evaluated and produced 0 tasks (no literal-eval-able doctests):
|
| 86 |
+
# num2words, parse, dateutil — omitted from REGISTRY
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def _find_src(clone_dir: str, hint: str) -> str:
|
| 90 |
+
for candidate in [
|
| 91 |
+
f"{clone_dir}/{hint}",
|
| 92 |
+
f"{clone_dir}/src/{hint}",
|
| 93 |
+
clone_dir,
|
| 94 |
+
]:
|
| 95 |
+
if Path(candidate).is_dir():
|
| 96 |
+
return candidate
|
| 97 |
+
return clone_dir
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def load_all_tasks(
|
| 101 |
+
clone_root: str = "/tmp/train_repos",
|
| 102 |
+
registry: list[RepoSpec] | None = None,
|
| 103 |
+
verbose: bool = True,
|
| 104 |
+
) -> list:
|
| 105 |
+
"""Clone every repo in the registry and return all AutoTask objects.
|
| 106 |
+
|
| 107 |
+
Args:
|
| 108 |
+
clone_root: Directory under which repos are cloned.
|
| 109 |
+
registry: Use a custom registry; defaults to REGISTRY.
|
| 110 |
+
verbose: Print progress.
|
| 111 |
+
|
| 112 |
+
Returns:
|
| 113 |
+
Flat list of AutoTask objects from all repos.
|
| 114 |
+
"""
|
| 115 |
+
import subprocess
|
| 116 |
+
from pathlib import Path
|
| 117 |
+
from graphforge.task_generator import generate_tasks
|
| 118 |
+
|
| 119 |
+
specs = registry or REGISTRY
|
| 120 |
+
all_tasks = []
|
| 121 |
+
Path(clone_root).mkdir(parents=True, exist_ok=True)
|
| 122 |
+
|
| 123 |
+
for spec in specs:
|
| 124 |
+
clone_dir = str(Path(clone_root) / spec.name)
|
| 125 |
+
if not Path(clone_dir).exists():
|
| 126 |
+
if verbose:
|
| 127 |
+
print(f"Cloning {spec.name} ...")
|
| 128 |
+
subprocess.check_call(
|
| 129 |
+
["git", "clone", "--depth", "1", "-q", spec.url, clone_dir]
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
src = _find_src(clone_dir, spec.src_hint)
|
| 133 |
+
try:
|
| 134 |
+
kg, tasks = generate_tasks(src, n_tasks=spec.n_tasks)
|
| 135 |
+
all_tasks.extend(tasks)
|
| 136 |
+
if verbose:
|
| 137 |
+
print(f" {spec.name}: {len(tasks)} tasks "
|
| 138 |
+
f"(DAG {len(kg._nodes)} nodes)")
|
| 139 |
+
except Exception as exc:
|
| 140 |
+
if verbose:
|
| 141 |
+
print(f" {spec.name}: SKIPPED — {exc}")
|
| 142 |
+
|
| 143 |
+
if verbose:
|
| 144 |
+
print(f"\nTotal auto-tasks: {len(all_tasks)}")
|
| 145 |
+
return all_tasks
|
graphforge/reward/__init__.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Reward engine — see :mod:`graphforge.reward.engine`.
|
| 2 |
+
|
| 3 |
+
Per-turn (dense, small) and terminal (sparse, large) reward computation
|
| 4 |
+
following PROPOSAL.md §5.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from graphforge.reward.engine import (
|
| 8 |
+
ActionOutcome,
|
| 9 |
+
ALL_BEHAVIORAL_BONUS,
|
| 10 |
+
ALL_STRUCTURAL_BONUS,
|
| 11 |
+
ALPHA_TOKEN_COST,
|
| 12 |
+
BEHAVIORAL_PER_PASS,
|
| 13 |
+
DUPLICATE_ACTION,
|
| 14 |
+
MATERIALIZE_FAIL_PENALTY,
|
| 15 |
+
MUTATION_FAIL,
|
| 16 |
+
PER_TURN_COST,
|
| 17 |
+
SCHEMA_REJECTION,
|
| 18 |
+
STRUCTURAL_PER_SAT,
|
| 19 |
+
TYPE_CHECK_BONUS,
|
| 20 |
+
TOKEN_EFFICIENCY_MAX,
|
| 21 |
+
TerminalReward,
|
| 22 |
+
TurnReward,
|
| 23 |
+
score_terminal,
|
| 24 |
+
score_turn,
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
__all__ = [
|
| 28 |
+
"ALPHA_TOKEN_COST",
|
| 29 |
+
"ALL_BEHAVIORAL_BONUS",
|
| 30 |
+
"ALL_STRUCTURAL_BONUS",
|
| 31 |
+
"ActionOutcome",
|
| 32 |
+
"BEHAVIORAL_PER_PASS",
|
| 33 |
+
"DUPLICATE_ACTION",
|
| 34 |
+
"MATERIALIZE_FAIL_PENALTY",
|
| 35 |
+
"MUTATION_FAIL",
|
| 36 |
+
"PER_TURN_COST",
|
| 37 |
+
"SCHEMA_REJECTION",
|
| 38 |
+
"STRUCTURAL_PER_SAT",
|
| 39 |
+
"TOKEN_EFFICIENCY_MAX",
|
| 40 |
+
"TYPE_CHECK_BONUS",
|
| 41 |
+
"TerminalReward",
|
| 42 |
+
"TurnReward",
|
| 43 |
+
"score_terminal",
|
| 44 |
+
"score_turn",
|
| 45 |
+
]
|
graphforge/reward/engine.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Reward engine — per-turn (dense, small) and terminal (sparse, large).
|
| 2 |
+
|
| 3 |
+
Implementation follows PROPOSAL.md §5 verbatim. The two halves are pure
|
| 4 |
+
functions over lightweight envelopes so the server can call them without
|
| 5 |
+
threading state through the reward module.
|
| 6 |
+
|
| 7 |
+
Decisions worth flagging:
|
| 8 |
+
|
| 9 |
+
* ``All-behavioral-passing`` bonus is awarded only when there is at least
|
| 10 |
+
one behavioral test. The gate for the token-efficiency bonus, however,
|
| 11 |
+
treats zero behavioral tests as vacuously satisfied (so a tier-0 task
|
| 12 |
+
with no behavioral tests can still earn token-efficiency reward).
|
| 13 |
+
* ``type_checks_ok`` is tri-state: ``True`` / ``False`` / ``None``. ``None``
|
| 14 |
+
means the type-check gate didn't run (e.g. mypy isn't wired yet); the
|
| 15 |
+
+3 bonus is suppressed in that case.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
from dataclasses import dataclass, field
|
| 21 |
+
from enum import Enum
|
| 22 |
+
|
| 23 |
+
# Coefficients (PROPOSAL.md §5.1). Override at call time if you want.
|
| 24 |
+
ALPHA_TOKEN_COST: float = 0.0008
|
| 25 |
+
PER_TURN_COST: float = -0.1
|
| 26 |
+
MUTATION_FAIL: float = -2.0
|
| 27 |
+
SCHEMA_REJECTION: float = -2.0
|
| 28 |
+
DUPLICATE_ACTION: float = -1.0
|
| 29 |
+
|
| 30 |
+
# Terminal magnitudes (§5.2)
|
| 31 |
+
STRUCTURAL_PER_SAT: float = 1.0
|
| 32 |
+
BEHAVIORAL_PER_PASS: float = 3.0
|
| 33 |
+
ALL_STRUCTURAL_BONUS: float = 5.0
|
| 34 |
+
ALL_BEHAVIORAL_BONUS: float = 5.0
|
| 35 |
+
TYPE_CHECK_BONUS: float = 3.0
|
| 36 |
+
MATERIALIZE_FAIL_PENALTY: float = -8.0
|
| 37 |
+
TOKEN_EFFICIENCY_MAX: float = 5.0
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# ---- per-turn -------------------------------------------------------
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class ActionOutcome(str, Enum):
|
| 44 |
+
"""Coarse classification used by ``score_turn``.
|
| 45 |
+
|
| 46 |
+
``SUCCESS`` — mutation or info action returned ``ok=True``.
|
| 47 |
+
``FAILURE`` — handler raised :class:`ActionError` (rollback path).
|
| 48 |
+
``MALFORMED`` — pydantic schema rejected the action at parse time.
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
SUCCESS = "success"
|
| 52 |
+
FAILURE = "failure"
|
| 53 |
+
MALFORMED = "malformed"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
@dataclass(frozen=True)
|
| 57 |
+
class TurnReward:
|
| 58 |
+
base: float # outcome-dependent component
|
| 59 |
+
duplicate: float # 0 or DUPLICATE_ACTION
|
| 60 |
+
per_turn: float # PER_TURN_COST
|
| 61 |
+
token_cost: float # alpha * tokens_returned, negated
|
| 62 |
+
|
| 63 |
+
@property
|
| 64 |
+
def total(self) -> float:
|
| 65 |
+
return self.base + self.duplicate + self.per_turn + self.token_cost
|
| 66 |
+
|
| 67 |
+
def to_dict(self) -> dict[str, float]:
|
| 68 |
+
return {
|
| 69 |
+
"base": self.base,
|
| 70 |
+
"duplicate": self.duplicate,
|
| 71 |
+
"per_turn": self.per_turn,
|
| 72 |
+
"token_cost": self.token_cost,
|
| 73 |
+
"total": self.total,
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def score_turn(
|
| 78 |
+
*,
|
| 79 |
+
outcome: ActionOutcome,
|
| 80 |
+
is_duplicate: bool,
|
| 81 |
+
tokens_returned: int,
|
| 82 |
+
alpha: float = ALPHA_TOKEN_COST,
|
| 83 |
+
per_turn_cost: float = PER_TURN_COST,
|
| 84 |
+
) -> TurnReward:
|
| 85 |
+
if outcome is ActionOutcome.SUCCESS:
|
| 86 |
+
base = 0.0
|
| 87 |
+
elif outcome is ActionOutcome.FAILURE:
|
| 88 |
+
base = MUTATION_FAIL
|
| 89 |
+
else: # MALFORMED
|
| 90 |
+
base = SCHEMA_REJECTION
|
| 91 |
+
return TurnReward(
|
| 92 |
+
base=base,
|
| 93 |
+
duplicate=DUPLICATE_ACTION if is_duplicate else 0.0,
|
| 94 |
+
per_turn=per_turn_cost,
|
| 95 |
+
token_cost=-alpha * max(0, tokens_returned),
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
# ---- terminal -------------------------------------------------------
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
@dataclass(frozen=True)
|
| 103 |
+
class TerminalReward:
|
| 104 |
+
structural: float # +1 per structural constraint satisfied
|
| 105 |
+
behavioral: float # +3 per behavioral test passing
|
| 106 |
+
bonus_all_structural: float
|
| 107 |
+
bonus_all_behavioral: float
|
| 108 |
+
bonus_type_checks: float
|
| 109 |
+
penalty_materialize: float # 0 or MATERIALIZE_FAIL_PENALTY
|
| 110 |
+
efficiency: float # gated by all-structural AND all-behavioral
|
| 111 |
+
|
| 112 |
+
components: dict[str, object] = field(default_factory=dict)
|
| 113 |
+
|
| 114 |
+
@property
|
| 115 |
+
def total(self) -> float:
|
| 116 |
+
return (
|
| 117 |
+
self.structural
|
| 118 |
+
+ self.behavioral
|
| 119 |
+
+ self.bonus_all_structural
|
| 120 |
+
+ self.bonus_all_behavioral
|
| 121 |
+
+ self.bonus_type_checks
|
| 122 |
+
+ self.penalty_materialize
|
| 123 |
+
+ self.efficiency
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
def to_dict(self) -> dict[str, object]:
|
| 127 |
+
return {
|
| 128 |
+
"structural": self.structural,
|
| 129 |
+
"behavioral": self.behavioral,
|
| 130 |
+
"bonus_all_structural": self.bonus_all_structural,
|
| 131 |
+
"bonus_all_behavioral": self.bonus_all_behavioral,
|
| 132 |
+
"bonus_type_checks": self.bonus_type_checks,
|
| 133 |
+
"penalty_materialize": self.penalty_materialize,
|
| 134 |
+
"efficiency": self.efficiency,
|
| 135 |
+
"total": self.total,
|
| 136 |
+
"components": self.components,
|
| 137 |
+
}
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def score_terminal(
|
| 141 |
+
*,
|
| 142 |
+
n_structural_satisfied: int,
|
| 143 |
+
n_structural_total: int,
|
| 144 |
+
n_behavioral_passing: int,
|
| 145 |
+
n_behavioral_total: int,
|
| 146 |
+
materialization_ok: bool,
|
| 147 |
+
type_checks_ok: bool | None,
|
| 148 |
+
tokens_used: int,
|
| 149 |
+
budget: int,
|
| 150 |
+
) -> TerminalReward:
|
| 151 |
+
if n_structural_satisfied < 0 or n_structural_total < 0:
|
| 152 |
+
raise ValueError("structural counts must be non-negative")
|
| 153 |
+
if n_behavioral_passing < 0 or n_behavioral_total < 0:
|
| 154 |
+
raise ValueError("behavioral counts must be non-negative")
|
| 155 |
+
if budget <= 0:
|
| 156 |
+
raise ValueError("budget must be positive")
|
| 157 |
+
|
| 158 |
+
structural = STRUCTURAL_PER_SAT * n_structural_satisfied
|
| 159 |
+
behavioral = BEHAVIORAL_PER_PASS * n_behavioral_passing
|
| 160 |
+
|
| 161 |
+
all_structural = (
|
| 162 |
+
n_structural_total > 0 and n_structural_satisfied == n_structural_total
|
| 163 |
+
)
|
| 164 |
+
all_behavioral_present_and_passing = (
|
| 165 |
+
n_behavioral_total > 0 and n_behavioral_passing == n_behavioral_total
|
| 166 |
+
)
|
| 167 |
+
bonus_all_structural = ALL_STRUCTURAL_BONUS if all_structural else 0.0
|
| 168 |
+
bonus_all_behavioral = (
|
| 169 |
+
ALL_BEHAVIORAL_BONUS if all_behavioral_present_and_passing else 0.0
|
| 170 |
+
)
|
| 171 |
+
|
| 172 |
+
if type_checks_ok is True:
|
| 173 |
+
bonus_type_checks = TYPE_CHECK_BONUS
|
| 174 |
+
else:
|
| 175 |
+
bonus_type_checks = 0.0
|
| 176 |
+
|
| 177 |
+
penalty_materialize = (
|
| 178 |
+
0.0 if materialization_ok else MATERIALIZE_FAIL_PENALTY
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# Efficiency bonus is gated on all-structural AND all-behavioral satisfied.
|
| 182 |
+
# When n_behavioral_total == 0 the behavioral half is vacuously satisfied
|
| 183 |
+
# for the gate's purposes (otherwise tier-0 tasks could never earn it).
|
| 184 |
+
behavioral_gate_ok = (
|
| 185 |
+
n_behavioral_total == 0
|
| 186 |
+
or n_behavioral_passing == n_behavioral_total
|
| 187 |
+
)
|
| 188 |
+
efficiency = 0.0
|
| 189 |
+
if all_structural and behavioral_gate_ok:
|
| 190 |
+
ratio = max(0.0, (budget - tokens_used) / budget)
|
| 191 |
+
efficiency = TOKEN_EFFICIENCY_MAX * ratio
|
| 192 |
+
|
| 193 |
+
return TerminalReward(
|
| 194 |
+
structural=structural,
|
| 195 |
+
behavioral=behavioral,
|
| 196 |
+
bonus_all_structural=bonus_all_structural,
|
| 197 |
+
bonus_all_behavioral=bonus_all_behavioral,
|
| 198 |
+
bonus_type_checks=bonus_type_checks,
|
| 199 |
+
penalty_materialize=penalty_materialize,
|
| 200 |
+
efficiency=efficiency,
|
| 201 |
+
components={
|
| 202 |
+
"n_structural_satisfied": n_structural_satisfied,
|
| 203 |
+
"n_structural_total": n_structural_total,
|
| 204 |
+
"n_behavioral_passing": n_behavioral_passing,
|
| 205 |
+
"n_behavioral_total": n_behavioral_total,
|
| 206 |
+
"materialization_ok": materialization_ok,
|
| 207 |
+
"type_checks_ok": type_checks_ok,
|
| 208 |
+
"tokens_used": tokens_used,
|
| 209 |
+
"budget": budget,
|
| 210 |
+
},
|
| 211 |
+
)
|
graphforge/sample_repos/humanize/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Humanize — convert numbers, file sizes, and times to human-readable strings."""
|
| 2 |
+
from graphforge.sample_repos.humanize.filesize import naturalsize
|
| 3 |
+
from graphforge.sample_repos.humanize.number import (
|
| 4 |
+
apnumber,
|
| 5 |
+
clamp,
|
| 6 |
+
fractional,
|
| 7 |
+
intcomma,
|
| 8 |
+
intword,
|
| 9 |
+
ordinal,
|
| 10 |
+
scientific,
|
| 11 |
+
)
|
| 12 |
+
from graphforge.sample_repos.humanize.time import (
|
| 13 |
+
naturaldate,
|
| 14 |
+
naturalday,
|
| 15 |
+
naturaldelta,
|
| 16 |
+
naturaltime,
|
| 17 |
+
precisedelta,
|
| 18 |
+
)
|
graphforge/sample_repos/humanize/filesize.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Bits and bytes related humanization."""
|
| 2 |
+
|
| 3 |
+
suffixes = {
|
| 4 |
+
"decimal": ("kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB"),
|
| 5 |
+
"binary": ("KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"),
|
| 6 |
+
"gnu": "KMGTPEZY",
|
| 7 |
+
}
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def naturalsize(value, binary=False, gnu=False, format="%.1f"):
|
| 11 |
+
"""Format a number of bytes as a human-readable file size (e.g. 10 kB).
|
| 12 |
+
|
| 13 |
+
By default, decimal suffixes (kB, MB) are used.
|
| 14 |
+
|
| 15 |
+
Examples:
|
| 16 |
+
>>> naturalsize(3000000)
|
| 17 |
+
'3.0 MB'
|
| 18 |
+
>>> naturalsize(300, False, True)
|
| 19 |
+
'300B'
|
| 20 |
+
>>> naturalsize(3000, True)
|
| 21 |
+
'2.9 KiB'
|
| 22 |
+
"""
|
| 23 |
+
if gnu:
|
| 24 |
+
suffix = suffixes["gnu"]
|
| 25 |
+
elif binary:
|
| 26 |
+
suffix = suffixes["binary"]
|
| 27 |
+
else:
|
| 28 |
+
suffix = suffixes["decimal"]
|
| 29 |
+
|
| 30 |
+
base = 1024 if (gnu or binary) else 1000
|
| 31 |
+
bytes_ = float(value)
|
| 32 |
+
abs_bytes = abs(bytes_)
|
| 33 |
+
|
| 34 |
+
if abs_bytes == 1 and not gnu:
|
| 35 |
+
return "%d Byte" % bytes_
|
| 36 |
+
elif abs_bytes < base and not gnu:
|
| 37 |
+
return "%d Bytes" % bytes_
|
| 38 |
+
elif abs_bytes < base and gnu:
|
| 39 |
+
return "%dB" % bytes_
|
| 40 |
+
|
| 41 |
+
for i, s in enumerate(suffix):
|
| 42 |
+
unit = base ** (i + 2)
|
| 43 |
+
if abs_bytes < unit and not gnu:
|
| 44 |
+
return (format + " %s") % ((base * bytes_ / unit), s)
|
| 45 |
+
elif abs_bytes < unit and gnu:
|
| 46 |
+
return (format + "%s") % ((base * bytes_ / unit), s)
|
| 47 |
+
if gnu:
|
| 48 |
+
return (format + "%s") % ((base * bytes_ / unit), s)
|
| 49 |
+
return (format + " %s") % ((base * bytes_ / unit), s)
|
graphforge/sample_repos/humanize/number.py
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Humanizing functions for numbers."""
|
| 2 |
+
|
| 3 |
+
import math
|
| 4 |
+
import re
|
| 5 |
+
from fractions import Fraction
|
| 6 |
+
|
| 7 |
+
powers = [10**x for x in (3, 6, 9, 12, 15, 18, 21, 24, 27, 30, 33, 100)]
|
| 8 |
+
human_powers = (
|
| 9 |
+
"thousand", "million", "billion", "trillion", "quadrillion",
|
| 10 |
+
"quintillion", "sextillion", "septillion", "octillion",
|
| 11 |
+
"nonillion", "decillion", "googol",
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def ordinal(value):
|
| 16 |
+
"""Convert an integer to its ordinal string (1 → '1st', 2 → '2nd', etc.).
|
| 17 |
+
|
| 18 |
+
Examples:
|
| 19 |
+
>>> ordinal(1)
|
| 20 |
+
'1st'
|
| 21 |
+
>>> ordinal(12)
|
| 22 |
+
'12th'
|
| 23 |
+
>>> ordinal(103)
|
| 24 |
+
'103rd'
|
| 25 |
+
"""
|
| 26 |
+
try:
|
| 27 |
+
value = int(value)
|
| 28 |
+
except (TypeError, ValueError):
|
| 29 |
+
return value
|
| 30 |
+
t = ("th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th")
|
| 31 |
+
if value % 100 in (11, 12, 13):
|
| 32 |
+
return f"{value}th"
|
| 33 |
+
return f"{value}{t[value % 10]}"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def intcomma(value, ndigits=None):
|
| 37 |
+
"""Convert an integer to a string with commas every three digits.
|
| 38 |
+
|
| 39 |
+
Examples:
|
| 40 |
+
>>> intcomma(1000000)
|
| 41 |
+
'1,000,000'
|
| 42 |
+
>>> intcomma(1234567.25)
|
| 43 |
+
'1,234,567.25'
|
| 44 |
+
"""
|
| 45 |
+
try:
|
| 46 |
+
if isinstance(value, str):
|
| 47 |
+
float(value.replace(",", ""))
|
| 48 |
+
else:
|
| 49 |
+
float(value)
|
| 50 |
+
except (TypeError, ValueError):
|
| 51 |
+
return value
|
| 52 |
+
|
| 53 |
+
if ndigits:
|
| 54 |
+
orig = "{0:.{1}f}".format(value, ndigits)
|
| 55 |
+
else:
|
| 56 |
+
orig = str(value)
|
| 57 |
+
|
| 58 |
+
new = re.sub(r"^(-?\d+)(\d{3})", r"\g<1>,\g<2>", orig)
|
| 59 |
+
if orig == new:
|
| 60 |
+
return new
|
| 61 |
+
return intcomma(new)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def intword(value, format="%.1f"):
|
| 65 |
+
"""Convert a large integer to a friendly text representation.
|
| 66 |
+
|
| 67 |
+
Examples:
|
| 68 |
+
>>> intword(1000000)
|
| 69 |
+
'1.0 million'
|
| 70 |
+
>>> intword(1200000000)
|
| 71 |
+
'1.2 billion'
|
| 72 |
+
"""
|
| 73 |
+
try:
|
| 74 |
+
value = int(value)
|
| 75 |
+
except (TypeError, ValueError):
|
| 76 |
+
return value
|
| 77 |
+
if value < powers[0]:
|
| 78 |
+
return str(value)
|
| 79 |
+
for ordinal_idx, power in enumerate(powers[1:], 1):
|
| 80 |
+
if value < power:
|
| 81 |
+
chopped = value / float(powers[ordinal_idx - 1])
|
| 82 |
+
count = math.ceil(chopped)
|
| 83 |
+
label = human_powers[ordinal_idx - 1]
|
| 84 |
+
plural = label + "s" if count != 1 else label
|
| 85 |
+
if float(format % chopped) == float(10**3):
|
| 86 |
+
chopped = value / float(powers[ordinal_idx])
|
| 87 |
+
count = math.ceil(chopped)
|
| 88 |
+
label = human_powers[ordinal_idx]
|
| 89 |
+
plural = label + "s" if count != 1 else label
|
| 90 |
+
return (format + " %s") % (chopped, plural)
|
| 91 |
+
return (format + " %s") % (chopped, plural)
|
| 92 |
+
return str(value)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def apnumber(value):
|
| 96 |
+
"""Convert integers 0–9 to their AP-style word equivalents.
|
| 97 |
+
|
| 98 |
+
Examples:
|
| 99 |
+
>>> apnumber(5)
|
| 100 |
+
'five'
|
| 101 |
+
>>> apnumber(10)
|
| 102 |
+
'10'
|
| 103 |
+
"""
|
| 104 |
+
words = ("zero", "one", "two", "three", "four",
|
| 105 |
+
"five", "six", "seven", "eight", "nine")
|
| 106 |
+
try:
|
| 107 |
+
value = int(value)
|
| 108 |
+
except (TypeError, ValueError):
|
| 109 |
+
return value
|
| 110 |
+
if not 0 <= value < 10:
|
| 111 |
+
return str(value)
|
| 112 |
+
return words[value]
|
| 113 |
+
|
| 114 |
+
|
| 115 |
+
def fractional(value):
|
| 116 |
+
"""Convert a float to a human-readable fractional string.
|
| 117 |
+
|
| 118 |
+
Examples:
|
| 119 |
+
>>> fractional(0.3)
|
| 120 |
+
'3/10'
|
| 121 |
+
>>> fractional(1.3)
|
| 122 |
+
'1 3/10'
|
| 123 |
+
>>> fractional(1)
|
| 124 |
+
'1'
|
| 125 |
+
"""
|
| 126 |
+
try:
|
| 127 |
+
number = float(value)
|
| 128 |
+
except (TypeError, ValueError):
|
| 129 |
+
return value
|
| 130 |
+
whole = int(number)
|
| 131 |
+
frac = Fraction(number - whole).limit_denominator(1000)
|
| 132 |
+
n, d = frac.numerator, frac.denominator
|
| 133 |
+
if whole and not n and d == 1:
|
| 134 |
+
return f"{whole:.0f}"
|
| 135 |
+
elif not whole:
|
| 136 |
+
return f"{n:.0f}/{d:.0f}"
|
| 137 |
+
return f"{whole:.0f} {n:.0f}/{d:.0f}"
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def scientific(value, precision=2):
|
| 141 |
+
"""Return a number in scientific notation (e.g. 5.00 x 10²).
|
| 142 |
+
|
| 143 |
+
Examples:
|
| 144 |
+
>>> scientific(500)
|
| 145 |
+
'5.00 x 10²'
|
| 146 |
+
>>> scientific(0.3)
|
| 147 |
+
'3.00 x 10⁻¹'
|
| 148 |
+
"""
|
| 149 |
+
exponents = {
|
| 150 |
+
"0": "⁰", "1": "¹", "2": "²", "3": "³", "4": "⁴",
|
| 151 |
+
"5": "⁵", "6": "⁶", "7": "⁷", "8": "⁸", "9": "⁹",
|
| 152 |
+
"+": "⁺", "-": "⁻",
|
| 153 |
+
}
|
| 154 |
+
negative = False
|
| 155 |
+
try:
|
| 156 |
+
if "-" in str(value):
|
| 157 |
+
value = str(value).replace("-", "")
|
| 158 |
+
negative = True
|
| 159 |
+
if isinstance(value, str):
|
| 160 |
+
value = float(value)
|
| 161 |
+
fmt = "{:.%se}" % str(int(precision))
|
| 162 |
+
n = fmt.format(value)
|
| 163 |
+
except (ValueError, TypeError):
|
| 164 |
+
return value
|
| 165 |
+
part1, part2 = n.split("e")
|
| 166 |
+
part2 = part2.replace("-0", "-").replace("+0", "")
|
| 167 |
+
new_part2 = []
|
| 168 |
+
if negative:
|
| 169 |
+
new_part2.append(exponents["-"])
|
| 170 |
+
for char in part2:
|
| 171 |
+
new_part2.append(exponents[char])
|
| 172 |
+
return part1 + " x 10" + "".join(new_part2)
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
def clamp(value, format="{:}", floor=None, ceil=None, floor_token="<", ceil_token=">"):
|
| 176 |
+
"""Return a number formatted and clamped between floor and ceil.
|
| 177 |
+
|
| 178 |
+
Examples:
|
| 179 |
+
>>> clamp(123.456)
|
| 180 |
+
'123.456'
|
| 181 |
+
>>> clamp(0.001, floor=0.01)
|
| 182 |
+
'<0.01'
|
| 183 |
+
>>> clamp(999, ceil=100)
|
| 184 |
+
'>100'
|
| 185 |
+
"""
|
| 186 |
+
if value is None:
|
| 187 |
+
return None
|
| 188 |
+
if floor is not None and value < floor:
|
| 189 |
+
value, token = floor, floor_token
|
| 190 |
+
elif ceil is not None and value > ceil:
|
| 191 |
+
value, token = ceil, ceil_token
|
| 192 |
+
else:
|
| 193 |
+
token = ""
|
| 194 |
+
if isinstance(format, str):
|
| 195 |
+
return token + format.format(value)
|
| 196 |
+
elif callable(format):
|
| 197 |
+
return token + format(value)
|
| 198 |
+
raise ValueError("format must be a string or callable")
|
graphforge/sample_repos/humanize/time.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Time humanizing functions."""
|
| 2 |
+
|
| 3 |
+
import datetime as dt
|
| 4 |
+
import math
|
| 5 |
+
from enum import Enum
|
| 6 |
+
from functools import total_ordering
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@total_ordering
|
| 10 |
+
class Unit(Enum):
|
| 11 |
+
MICROSECONDS = 0
|
| 12 |
+
MILLISECONDS = 1
|
| 13 |
+
SECONDS = 2
|
| 14 |
+
MINUTES = 3
|
| 15 |
+
HOURS = 4
|
| 16 |
+
DAYS = 5
|
| 17 |
+
MONTHS = 6
|
| 18 |
+
YEARS = 7
|
| 19 |
+
|
| 20 |
+
def __lt__(self, other):
|
| 21 |
+
if self.__class__ is other.__class__:
|
| 22 |
+
return self.value < other.value
|
| 23 |
+
return NotImplemented
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def _now():
|
| 27 |
+
return dt.datetime.now()
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _abs_timedelta(delta):
|
| 31 |
+
if delta.days < 0:
|
| 32 |
+
now = _now()
|
| 33 |
+
return now - (now + delta)
|
| 34 |
+
return delta
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _date_and_delta(value, *, now=None):
|
| 38 |
+
if not now:
|
| 39 |
+
now = _now()
|
| 40 |
+
if isinstance(value, dt.datetime):
|
| 41 |
+
date = value
|
| 42 |
+
delta = now - value
|
| 43 |
+
elif isinstance(value, dt.timedelta):
|
| 44 |
+
date = now - value
|
| 45 |
+
delta = value
|
| 46 |
+
else:
|
| 47 |
+
try:
|
| 48 |
+
value = int(value)
|
| 49 |
+
delta = dt.timedelta(seconds=value)
|
| 50 |
+
date = now - delta
|
| 51 |
+
except (ValueError, TypeError):
|
| 52 |
+
return None, value
|
| 53 |
+
return date, _abs_timedelta(delta)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def naturaldelta(value, months=True, minimum_unit="seconds") -> str:
|
| 57 |
+
"""Return a natural representation of a timedelta or number of seconds.
|
| 58 |
+
|
| 59 |
+
Does not include tense (use naturaltime for past/future).
|
| 60 |
+
|
| 61 |
+
Examples:
|
| 62 |
+
>>> import datetime as dt
|
| 63 |
+
>>> naturaldelta(dt.timedelta(seconds=90))
|
| 64 |
+
'a minute'
|
| 65 |
+
>>> naturaldelta(dt.timedelta(hours=2))
|
| 66 |
+
'2 hours'
|
| 67 |
+
>>> naturaldelta(dt.timedelta(days=400))
|
| 68 |
+
'a year'
|
| 69 |
+
"""
|
| 70 |
+
tmp = Unit[minimum_unit.upper()]
|
| 71 |
+
if tmp not in (Unit.SECONDS, Unit.MILLISECONDS, Unit.MICROSECONDS):
|
| 72 |
+
raise ValueError(f"Minimum unit '{minimum_unit}' not supported")
|
| 73 |
+
minimum_unit = tmp
|
| 74 |
+
|
| 75 |
+
if isinstance(value, dt.timedelta):
|
| 76 |
+
delta = value
|
| 77 |
+
else:
|
| 78 |
+
try:
|
| 79 |
+
value = int(value)
|
| 80 |
+
delta = dt.timedelta(seconds=value)
|
| 81 |
+
except (ValueError, TypeError):
|
| 82 |
+
return value
|
| 83 |
+
|
| 84 |
+
seconds = abs(delta.seconds)
|
| 85 |
+
days = abs(delta.days)
|
| 86 |
+
years = days // 365
|
| 87 |
+
days = days % 365
|
| 88 |
+
months_count = int(days // 30.5)
|
| 89 |
+
|
| 90 |
+
if not years and days < 1:
|
| 91 |
+
if seconds == 0:
|
| 92 |
+
return "a moment"
|
| 93 |
+
elif seconds == 1:
|
| 94 |
+
return "a second"
|
| 95 |
+
elif seconds < 60:
|
| 96 |
+
return f"{seconds} seconds" if seconds > 1 else "a second"
|
| 97 |
+
elif 60 <= seconds < 120:
|
| 98 |
+
return "a minute"
|
| 99 |
+
elif 120 <= seconds < 3600:
|
| 100 |
+
minutes = seconds // 60
|
| 101 |
+
return f"{minutes} minutes"
|
| 102 |
+
elif 3600 <= seconds < 7200:
|
| 103 |
+
return "an hour"
|
| 104 |
+
else:
|
| 105 |
+
hours = seconds // 3600
|
| 106 |
+
return f"{hours} hours"
|
| 107 |
+
elif years == 0:
|
| 108 |
+
if days == 1:
|
| 109 |
+
return "a day"
|
| 110 |
+
if not months or not months_count:
|
| 111 |
+
return f"{days} days"
|
| 112 |
+
elif months_count == 1:
|
| 113 |
+
return "a month"
|
| 114 |
+
return f"{months_count} months"
|
| 115 |
+
elif years == 1:
|
| 116 |
+
if not months_count and not days:
|
| 117 |
+
return "a year"
|
| 118 |
+
elif not months_count:
|
| 119 |
+
return f"1 year, {days} days" if days > 1 else "1 year, a day"
|
| 120 |
+
elif months_count == 1:
|
| 121 |
+
return "1 year, 1 month"
|
| 122 |
+
return f"1 year, {months_count} months"
|
| 123 |
+
return f"{years} years"
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def naturaltime(value, future=False, months=True, minimum_unit="seconds", when=None) -> str:
|
| 127 |
+
"""Return a natural representation of a time relative to now.
|
| 128 |
+
|
| 129 |
+
Examples:
|
| 130 |
+
>>> import datetime as dt
|
| 131 |
+
>>> naturaltime(dt.timedelta(seconds=30))
|
| 132 |
+
'30 seconds ago'
|
| 133 |
+
>>> naturaltime(dt.timedelta(hours=1), future=True)
|
| 134 |
+
'an hour from now'
|
| 135 |
+
"""
|
| 136 |
+
now = when or _now()
|
| 137 |
+
date, delta = _date_and_delta(value, now=now)
|
| 138 |
+
if date is None:
|
| 139 |
+
return value
|
| 140 |
+
if isinstance(value, (dt.datetime, dt.timedelta)):
|
| 141 |
+
future = date > now
|
| 142 |
+
ago = "%s from now" if future else "%s ago"
|
| 143 |
+
delta_str = naturaldelta(delta, months, minimum_unit)
|
| 144 |
+
if delta_str == "a moment":
|
| 145 |
+
return "now"
|
| 146 |
+
return ago % delta_str
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def naturalday(value, format="%b %d") -> str:
|
| 150 |
+
"""Return 'today', 'tomorrow', 'yesterday', or a formatted date string.
|
| 151 |
+
|
| 152 |
+
Examples:
|
| 153 |
+
>>> import datetime as dt
|
| 154 |
+
>>> naturalday(dt.date.today())
|
| 155 |
+
'today'
|
| 156 |
+
"""
|
| 157 |
+
try:
|
| 158 |
+
value = dt.date(value.year, value.month, value.day)
|
| 159 |
+
except (AttributeError, OverflowError, ValueError):
|
| 160 |
+
return value
|
| 161 |
+
delta = value - dt.date.today()
|
| 162 |
+
if delta.days == 0:
|
| 163 |
+
return "today"
|
| 164 |
+
elif delta.days == 1:
|
| 165 |
+
return "tomorrow"
|
| 166 |
+
elif delta.days == -1:
|
| 167 |
+
return "yesterday"
|
| 168 |
+
return value.strftime(format)
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def naturaldate(value) -> str:
|
| 172 |
+
"""Like naturalday, but appends year for dates more than ~5 months away."""
|
| 173 |
+
try:
|
| 174 |
+
value = dt.date(value.year, value.month, value.day)
|
| 175 |
+
except (AttributeError, OverflowError, ValueError):
|
| 176 |
+
return value
|
| 177 |
+
delta = _abs_timedelta(value - dt.date.today())
|
| 178 |
+
if delta.days >= 5 * 365 / 12:
|
| 179 |
+
return naturalday(value, "%b %d %Y")
|
| 180 |
+
return naturalday(value)
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def precisedelta(value, minimum_unit="seconds", suppress=(), format="%0.2f") -> str:
|
| 184 |
+
"""Return a precise, human-readable representation of a timedelta.
|
| 185 |
+
|
| 186 |
+
Examples:
|
| 187 |
+
>>> import datetime as dt
|
| 188 |
+
>>> precisedelta(dt.timedelta(seconds=3633, days=2))
|
| 189 |
+
'2 days and 1 hour and 33 seconds'
|
| 190 |
+
"""
|
| 191 |
+
date, delta = _date_and_delta(value)
|
| 192 |
+
if date is None:
|
| 193 |
+
return value
|
| 194 |
+
|
| 195 |
+
suppress_units = {Unit[s.upper()] for s in suppress}
|
| 196 |
+
min_unit = Unit[minimum_unit.upper()]
|
| 197 |
+
|
| 198 |
+
days = delta.days
|
| 199 |
+
secs = delta.seconds
|
| 200 |
+
|
| 201 |
+
years, days = divmod(days, 365)
|
| 202 |
+
months_count = int(days // 30.5)
|
| 203 |
+
days = days % 30
|
| 204 |
+
|
| 205 |
+
hours, secs = divmod(secs, 3600)
|
| 206 |
+
minutes, secs = divmod(secs, 60)
|
| 207 |
+
|
| 208 |
+
parts = []
|
| 209 |
+
for count, singular, plural in [
|
| 210 |
+
(years, "year", "years"),
|
| 211 |
+
(months_count, "month", "months"),
|
| 212 |
+
(days, "day", "days"),
|
| 213 |
+
(hours, "hour", "hours"),
|
| 214 |
+
(minutes, "minute", "minutes"),
|
| 215 |
+
(secs, "second", "seconds"),
|
| 216 |
+
]:
|
| 217 |
+
if count > 0:
|
| 218 |
+
label = singular if count == 1 else plural
|
| 219 |
+
parts.append(f"{count} {label}")
|
| 220 |
+
|
| 221 |
+
if not parts:
|
| 222 |
+
return "0 seconds"
|
| 223 |
+
if len(parts) == 1:
|
| 224 |
+
return parts[0]
|
| 225 |
+
return " and ".join(parts)
|
graphforge/sample_repos/task_manager/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""Task Manager — a small synthetic package used as the training repo."""
|
graphforge/sample_repos/task_manager/api.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""High-level API layer that wires models, storage, and validators together."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from graphforge.sample_repos.task_manager.models import Task
|
| 6 |
+
from graphforge.sample_repos.task_manager.storage import TaskStore
|
| 7 |
+
from graphforge.sample_repos.task_manager.validators import validate_priority, validate_tags, validate_title
|
| 8 |
+
|
| 9 |
+
_store = TaskStore()
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def create_task(
|
| 13 |
+
title: str,
|
| 14 |
+
priority: str = "medium",
|
| 15 |
+
tags: list[str] | None = None,
|
| 16 |
+
) -> Task:
|
| 17 |
+
"""Create and persist a new task.
|
| 18 |
+
|
| 19 |
+
Raises ValueError if title or tags are invalid.
|
| 20 |
+
"""
|
| 21 |
+
if not validate_title(title):
|
| 22 |
+
raise ValueError(f"Invalid title: {title!r}")
|
| 23 |
+
resolved_tags = tags or []
|
| 24 |
+
if not validate_tags(resolved_tags):
|
| 25 |
+
raise ValueError(f"Invalid tags: {resolved_tags!r}")
|
| 26 |
+
task = Task(title=title, priority=priority, tags=resolved_tags)
|
| 27 |
+
_store.add(task)
|
| 28 |
+
return task
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def get_all_tasks() -> list[Task]:
|
| 32 |
+
"""Return every task in the store."""
|
| 33 |
+
return _store.all()
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def complete_task(title: str) -> bool:
|
| 37 |
+
"""Mark a task done by title. Returns True if found, False otherwise."""
|
| 38 |
+
task = _store.find_by_title(title)
|
| 39 |
+
if task:
|
| 40 |
+
task.complete()
|
| 41 |
+
return True
|
| 42 |
+
return False
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def reset_store() -> None:
|
| 46 |
+
"""Clear the store — used by tests between runs."""
|
| 47 |
+
global _store
|
| 48 |
+
_store = TaskStore()
|
graphforge/sample_repos/task_manager/models.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Domain models for the task manager."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from datetime import date
|
| 6 |
+
from typing import Optional
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
class Task:
|
| 10 |
+
"""A single task in the task manager."""
|
| 11 |
+
|
| 12 |
+
def __init__(
|
| 13 |
+
self,
|
| 14 |
+
title: str,
|
| 15 |
+
priority: str,
|
| 16 |
+
tags: list[str],
|
| 17 |
+
due_date: Optional[date] = None,
|
| 18 |
+
) -> None:
|
| 19 |
+
self.title = title
|
| 20 |
+
self.priority = priority # expected: "low" | "medium" | "high"
|
| 21 |
+
self.tags = tags
|
| 22 |
+
self.due_date = due_date
|
| 23 |
+
self.done = False
|
| 24 |
+
|
| 25 |
+
def complete(self) -> None:
|
| 26 |
+
"""Mark this task as done."""
|
| 27 |
+
self.done = True
|
| 28 |
+
|
| 29 |
+
def to_dict(self) -> dict:
|
| 30 |
+
return {
|
| 31 |
+
"title": self.title,
|
| 32 |
+
"priority": self.priority,
|
| 33 |
+
"tags": self.tags,
|
| 34 |
+
"done": self.done,
|
| 35 |
+
"due_date": str(self.due_date) if self.due_date else None,
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
class User:
|
| 40 |
+
"""A user who owns tasks."""
|
| 41 |
+
|
| 42 |
+
def __init__(self, username: str, email: str) -> None:
|
| 43 |
+
self.username = username
|
| 44 |
+
self.email = email
|
| 45 |
+
|
| 46 |
+
def display(self) -> str:
|
| 47 |
+
return f"{self.username} <{self.email}>"
|
graphforge/sample_repos/task_manager/storage.py
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""In-memory task storage."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
from graphforge.sample_repos.task_manager.models import Task
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TaskStore:
|
| 11 |
+
"""Simple in-memory list-backed store for Task objects."""
|
| 12 |
+
|
| 13 |
+
def __init__(self) -> None:
|
| 14 |
+
self._tasks: list[Task] = []
|
| 15 |
+
|
| 16 |
+
def add(self, task: Task) -> None:
|
| 17 |
+
"""Append task to the store."""
|
| 18 |
+
self._tasks.append(task)
|
| 19 |
+
|
| 20 |
+
def all(self) -> list[Task]:
|
| 21 |
+
"""Return all tasks."""
|
| 22 |
+
return list(self._tasks)
|
| 23 |
+
|
| 24 |
+
def find_by_title(self, title: str) -> Optional[Task]:
|
| 25 |
+
"""Return the first task whose title matches, or None."""
|
| 26 |
+
for t in self._tasks:
|
| 27 |
+
if t.title == title:
|
| 28 |
+
return t
|
| 29 |
+
return None
|
| 30 |
+
|
| 31 |
+
def find_done(self) -> list[Task]:
|
| 32 |
+
"""Return all completed tasks."""
|
| 33 |
+
return [t for t in self._tasks if t.done]
|
| 34 |
+
|
| 35 |
+
def find_pending(self) -> list[Task]:
|
| 36 |
+
"""Return all incomplete tasks."""
|
| 37 |
+
return [t for t in self._tasks if not t.done]
|
graphforge/sample_repos/task_manager/validators.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Input validation functions for the task manager."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
VALID_PRIORITIES = {"low", "medium", "high"}
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def validate_title(title: str) -> bool:
|
| 9 |
+
"""Return True if title is a non-empty string <= 200 chars."""
|
| 10 |
+
return isinstance(title, str) and 0 < len(title) <= 200
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def validate_tags(tags: object) -> bool:
|
| 14 |
+
"""Return True if tags is a list of strings."""
|
| 15 |
+
return isinstance(tags, list) and all(isinstance(t, str) for t in tags)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def validate_email(email: str) -> bool:
|
| 19 |
+
"""Return True if email looks like a valid address (contains @ and .)."""
|
| 20 |
+
return isinstance(email, str) and "@" in email and "." in email.split("@")[-1]
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def validate_priority(priority: str) -> bool:
|
| 24 |
+
"""Return True if priority is one of 'low', 'medium', or 'high'."""
|
| 25 |
+
return priority in VALID_PRIORITIES
|
graphforge/server/__init__.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI OpenEnv server.
|
| 2 |
+
|
| 3 |
+
Endpoints (PROPOSAL.md §6.1):
|
| 4 |
+
|
| 5 |
+
POST /reset -> create a fresh episode, return initial observation
|
| 6 |
+
POST /step -> apply an Action, return (observation, reward, done, info)
|
| 7 |
+
GET /state -> snapshot the current episode state for debugging
|
| 8 |
+
POST /close -> tear down the episode
|
| 9 |
+
|
| 10 |
+
The server is a thin shell: it owns episode state (graph, task spec,
|
| 11 |
+
action history, token counter, turn counter, materialization cache) and
|
| 12 |
+
delegates the work to the dispatcher, reward engine, and validators.
|
| 13 |
+
|
| 14 |
+
The training-side OpenEnv client calls this over HTTP at localhost:8000.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from graphforge.server.app import app
|
| 18 |
+
|
| 19 |
+
__all__ = ["app"]
|
graphforge/server/app.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""FastAPI application — the OpenEnv server.
|
| 2 |
+
|
| 3 |
+
Endpoints (PROPOSAL.md §6.1):
|
| 4 |
+
|
| 5 |
+
POST /reset { task_id?: str | None, seed?: int }
|
| 6 |
+
-> { episode_id, observation }
|
| 7 |
+
POST /step { episode_id, action: Action }
|
| 8 |
+
-> { observation, reward, done, info }
|
| 9 |
+
GET /state?episode_id=...
|
| 10 |
+
-> { ... full snapshot ... }
|
| 11 |
+
POST /close { episode_id }
|
| 12 |
+
-> { closed: bool }
|
| 13 |
+
|
| 14 |
+
The handlers are thin: routing, request validation, episode lookup. The
|
| 15 |
+
actual per-step orchestration lives in :mod:`graphforge.server.runner`.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
from typing import Any, Optional
|
| 21 |
+
|
| 22 |
+
from fastapi import FastAPI, HTTPException
|
| 23 |
+
from pydantic import BaseModel
|
| 24 |
+
|
| 25 |
+
from graphforge.actions.schema import Action
|
| 26 |
+
from graphforge.server.episode import GLOBAL_STORE, Episode, EpisodeStore
|
| 27 |
+
from graphforge.server.runner import step as runner_step
|
| 28 |
+
from graphforge.tasks import default_task, get_task
|
| 29 |
+
|
| 30 |
+
app = FastAPI(
|
| 31 |
+
title="GraphForge OpenEnv server",
|
| 32 |
+
version="0.0.1",
|
| 33 |
+
description="See graphforge.server for the wire shape.",
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# ---- request / response models --------------------------------------
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
class ResetRequest(BaseModel):
|
| 41 |
+
task_id: Optional[str] = None
|
| 42 |
+
seed: Optional[int] = None # reserved for variant generation, unused for tier-0
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class StepRequest(BaseModel):
|
| 46 |
+
episode_id: str
|
| 47 |
+
# ``Action`` is itself an Annotated discriminated union; no need to
|
| 48 |
+
# re-declare the discriminator on this field.
|
| 49 |
+
action: Action
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class CloseRequest(BaseModel):
|
| 53 |
+
episode_id: str
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
# ---- store wiring (overridable for tests) ---------------------------
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _store() -> EpisodeStore:
|
| 60 |
+
return GLOBAL_STORE
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
# ---- helpers --------------------------------------------------------
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def _require_episode(episode_id: str) -> Episode:
|
| 67 |
+
ep = _store().get(episode_id)
|
| 68 |
+
if ep is None:
|
| 69 |
+
raise HTTPException(status_code=404, detail=f"unknown episode_id: {episode_id!r}")
|
| 70 |
+
return ep
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def _initial_observation(ep: Episode) -> dict[str, Any]:
|
| 74 |
+
return {
|
| 75 |
+
"episode_id": ep.id,
|
| 76 |
+
"task": ep.task.visible_payload(),
|
| 77 |
+
"turns_total": 0,
|
| 78 |
+
"tokens_used_total": 0,
|
| 79 |
+
"budget": ep.task.budget,
|
| 80 |
+
"episode_cap": ep.task.episode_cap,
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ---- endpoints ------------------------------------------------------
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
@app.post("/reset")
|
| 88 |
+
def reset(req: ResetRequest) -> dict:
|
| 89 |
+
if req.task_id is None:
|
| 90 |
+
task = default_task()
|
| 91 |
+
else:
|
| 92 |
+
t = get_task(req.task_id)
|
| 93 |
+
if t is None:
|
| 94 |
+
raise HTTPException(status_code=404, detail=f"unknown task_id: {req.task_id!r}")
|
| 95 |
+
task = t
|
| 96 |
+
ep = Episode.new(task=task)
|
| 97 |
+
_store().put(ep)
|
| 98 |
+
return {
|
| 99 |
+
"episode_id": ep.id,
|
| 100 |
+
"observation": _initial_observation(ep),
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
@app.post("/step")
|
| 105 |
+
def step(req: StepRequest) -> dict:
|
| 106 |
+
ep = _require_episode(req.episode_id)
|
| 107 |
+
return runner_step(ep, req.action)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@app.get("/state")
|
| 111 |
+
def state(episode_id: str) -> dict:
|
| 112 |
+
ep = _require_episode(episode_id)
|
| 113 |
+
return ep.state_snapshot()
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
@app.post("/close")
|
| 117 |
+
def close(req: CloseRequest) -> dict:
|
| 118 |
+
closed = _store().drop(req.episode_id)
|
| 119 |
+
return {"closed": closed}
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
@app.get("/healthz")
|
| 123 |
+
def healthz() -> dict:
|
| 124 |
+
return {"status": "ok", "version": app.version}
|
graphforge/server/episode.py
ADDED
|
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Episode state — one per active OpenEnv session.
|
| 2 |
+
|
| 3 |
+
The server holds episodes in an in-memory dict keyed by ``episode_id``.
|
| 4 |
+
Episodes are entirely self-contained: they own a :class:`Graph`, a
|
| 5 |
+
:class:`Task`, and the running history. There is no leakage between
|
| 6 |
+
episodes (PROPOSAL.md §6.2 — "episode isolation").
|
| 7 |
+
|
| 8 |
+
Token accounting is a server-side concern. We use a coarse character-based
|
| 9 |
+
estimate (``len(json) // 4``) until a real tokenizer is wired in. The
|
| 10 |
+
estimate is consistent across baseline and trained runs because both go
|
| 11 |
+
through the same envelope.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import json
|
| 17 |
+
import uuid
|
| 18 |
+
from dataclasses import dataclass, field
|
| 19 |
+
from typing import Any
|
| 20 |
+
|
| 21 |
+
from graphforge.actions.dispatcher import ActionResult
|
| 22 |
+
from graphforge.graph.schema import Graph
|
| 23 |
+
from graphforge.reward.engine import ActionOutcome, TurnReward
|
| 24 |
+
from graphforge.tasks.schema import Task
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ---- token estimation -----------------------------------------------
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def estimate_tokens(payload: Any) -> int:
|
| 31 |
+
"""Coarse token estimate over a JSON-serializable payload.
|
| 32 |
+
|
| 33 |
+
~4 chars / token is the GPT-style rule of thumb. The exact tokenizer
|
| 34 |
+
matters for training-time reward magnitudes; this estimate is a
|
| 35 |
+
placeholder that's monotone in the size of the payload, which is
|
| 36 |
+
enough to drive the 'prefer cheap queries over expensive ones' shaping
|
| 37 |
+
while we wait on the real Qwen tokenizer.
|
| 38 |
+
"""
|
| 39 |
+
try:
|
| 40 |
+
s = json.dumps(payload, default=str)
|
| 41 |
+
except Exception:
|
| 42 |
+
s = str(payload)
|
| 43 |
+
return max(0, len(s) // 4)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
# ---- history records ------------------------------------------------
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@dataclass
|
| 50 |
+
class TurnRecord:
|
| 51 |
+
turn: int
|
| 52 |
+
action_kind: str
|
| 53 |
+
action_args: dict[str, Any]
|
| 54 |
+
outcome: str # ActionOutcome value
|
| 55 |
+
ok: bool
|
| 56 |
+
reward: float
|
| 57 |
+
payload: dict[str, Any] = field(default_factory=dict)
|
| 58 |
+
is_duplicate: bool = False
|
| 59 |
+
tokens_returned: int = 0
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# ---- episode --------------------------------------------------------
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
@dataclass
|
| 66 |
+
class Episode:
|
| 67 |
+
id: str
|
| 68 |
+
task: Task
|
| 69 |
+
graph: Graph = field(default_factory=Graph.empty)
|
| 70 |
+
history: list[TurnRecord] = field(default_factory=list)
|
| 71 |
+
tokens_used: int = 0
|
| 72 |
+
turns: int = 0
|
| 73 |
+
terminated: bool = False
|
| 74 |
+
terminal_reward: float | None = None
|
| 75 |
+
terminal_payload: dict[str, Any] | None = None
|
| 76 |
+
|
| 77 |
+
@classmethod
|
| 78 |
+
def new(cls, task: Task) -> "Episode":
|
| 79 |
+
return cls(id=str(uuid.uuid4()), task=task)
|
| 80 |
+
|
| 81 |
+
# ----- duplicate detection ---------------------------------------
|
| 82 |
+
|
| 83 |
+
def is_duplicate(self, kind: str, args: dict[str, Any]) -> bool:
|
| 84 |
+
"""True iff an identical (kind, args) action was tried this episode."""
|
| 85 |
+
for r in self.history:
|
| 86 |
+
if r.action_kind == kind and r.action_args == args:
|
| 87 |
+
return True
|
| 88 |
+
return False
|
| 89 |
+
|
| 90 |
+
# ----- bookkeeping -----------------------------------------------
|
| 91 |
+
|
| 92 |
+
def record_turn(
|
| 93 |
+
self,
|
| 94 |
+
kind: str,
|
| 95 |
+
args: dict[str, Any],
|
| 96 |
+
result: ActionResult,
|
| 97 |
+
outcome: ActionOutcome,
|
| 98 |
+
turn_reward: TurnReward,
|
| 99 |
+
is_duplicate: bool,
|
| 100 |
+
tokens_returned: int,
|
| 101 |
+
) -> TurnRecord:
|
| 102 |
+
rec = TurnRecord(
|
| 103 |
+
turn=self.turns,
|
| 104 |
+
action_kind=kind,
|
| 105 |
+
action_args=args,
|
| 106 |
+
outcome=outcome.value,
|
| 107 |
+
ok=result.ok,
|
| 108 |
+
reward=turn_reward.total,
|
| 109 |
+
payload=result.payload,
|
| 110 |
+
is_duplicate=is_duplicate,
|
| 111 |
+
tokens_returned=tokens_returned,
|
| 112 |
+
)
|
| 113 |
+
self.history.append(rec)
|
| 114 |
+
self.turns += 1
|
| 115 |
+
self.tokens_used += tokens_returned
|
| 116 |
+
return rec
|
| 117 |
+
|
| 118 |
+
# ----- snapshot --------------------------------------------------
|
| 119 |
+
|
| 120 |
+
def state_snapshot(self) -> dict[str, Any]:
|
| 121 |
+
return {
|
| 122 |
+
"episode_id": self.id,
|
| 123 |
+
"task": self.task.visible_payload(),
|
| 124 |
+
"turns": self.turns,
|
| 125 |
+
"tokens_used": self.tokens_used,
|
| 126 |
+
"budget": self.task.budget,
|
| 127 |
+
"episode_cap": self.task.episode_cap,
|
| 128 |
+
"terminated": self.terminated,
|
| 129 |
+
"graph": {
|
| 130 |
+
"modules": [m.model_dump() for m in self.graph.modules],
|
| 131 |
+
"nodes": [n.model_dump() for n in self.graph.nodes],
|
| 132 |
+
"edges": [e.model_dump() for e in self.graph.edges],
|
| 133 |
+
},
|
| 134 |
+
"history": [
|
| 135 |
+
{
|
| 136 |
+
"turn": r.turn,
|
| 137 |
+
"action_kind": r.action_kind,
|
| 138 |
+
"ok": r.ok,
|
| 139 |
+
"reward": r.reward,
|
| 140 |
+
}
|
| 141 |
+
for r in self.history
|
| 142 |
+
],
|
| 143 |
+
"terminal_reward": self.terminal_reward,
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# ---- in-memory store ------------------------------------------------
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
class EpisodeStore:
|
| 151 |
+
"""Thin wrapper around a dict so we can swap in a TTL cache later."""
|
| 152 |
+
|
| 153 |
+
def __init__(self) -> None:
|
| 154 |
+
self._eps: dict[str, Episode] = {}
|
| 155 |
+
|
| 156 |
+
def put(self, ep: Episode) -> None:
|
| 157 |
+
self._eps[ep.id] = ep
|
| 158 |
+
|
| 159 |
+
def get(self, episode_id: str) -> Episode | None:
|
| 160 |
+
return self._eps.get(episode_id)
|
| 161 |
+
|
| 162 |
+
def drop(self, episode_id: str) -> bool:
|
| 163 |
+
return self._eps.pop(episode_id, None) is not None
|
| 164 |
+
|
| 165 |
+
def __len__(self) -> int:
|
| 166 |
+
return len(self._eps)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
# Singleton store. The server module holds onto this for the lifetime of
|
| 170 |
+
# the process. Tests can construct their own EpisodeStore for isolation.
|
| 171 |
+
GLOBAL_STORE = EpisodeStore()
|
graphforge/server/runner.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Episode runner — the per-step orchestration the server endpoints use.
|
| 2 |
+
|
| 3 |
+
Pulls together dispatcher, reward engine, constraint checker, and episode
|
| 4 |
+
state. Kept separate from the FastAPI app so it can be unit-tested without
|
| 5 |
+
spinning up an HTTP server.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from typing import Any
|
| 11 |
+
|
| 12 |
+
from graphforge.actions import dispatch
|
| 13 |
+
from graphforge.actions.schema import Action, Submit
|
| 14 |
+
from graphforge.constraints import evaluate_all
|
| 15 |
+
from graphforge.materializer import materialize
|
| 16 |
+
from graphforge.reward.engine import (
|
| 17 |
+
ActionOutcome,
|
| 18 |
+
TurnReward,
|
| 19 |
+
score_terminal,
|
| 20 |
+
score_turn,
|
| 21 |
+
)
|
| 22 |
+
from graphforge.server.episode import (
|
| 23 |
+
Episode,
|
| 24 |
+
TurnRecord,
|
| 25 |
+
estimate_tokens,
|
| 26 |
+
)
|
| 27 |
+
from graphforge.validator import full_check
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def _classify_outcome(action: Action, ok: bool) -> ActionOutcome:
|
| 31 |
+
# Schema rejection happens before this function (caught by FastAPI's
|
| 32 |
+
# pydantic validation). What we see here is a successfully-parsed
|
| 33 |
+
# action that either succeeded or failed at handler-time.
|
| 34 |
+
return ActionOutcome.SUCCESS if ok else ActionOutcome.FAILURE
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _render_observation(ep: Episode, turn_record: TurnRecord) -> dict[str, Any]:
|
| 38 |
+
return {
|
| 39 |
+
"turn": turn_record.turn,
|
| 40 |
+
"ok": turn_record.ok,
|
| 41 |
+
"outcome": turn_record.outcome,
|
| 42 |
+
"payload": turn_record.payload,
|
| 43 |
+
"reward": turn_record.reward,
|
| 44 |
+
"is_duplicate": turn_record.is_duplicate,
|
| 45 |
+
"tokens_returned": turn_record.tokens_returned,
|
| 46 |
+
"tokens_used_total": ep.tokens_used,
|
| 47 |
+
"turns_total": ep.turns,
|
| 48 |
+
"budget_remaining": max(0, ep.task.budget - ep.tokens_used),
|
| 49 |
+
"episode_cap_remaining": max(0, ep.task.episode_cap - ep.turns),
|
| 50 |
+
}
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
def step(ep: Episode, action: Action) -> dict[str, Any]:
|
| 54 |
+
"""Apply ``action`` to ``ep``. Auto-terminates on submit or cap.
|
| 55 |
+
|
| 56 |
+
Returns a dict in the OpenEnv ``/step`` response shape:
|
| 57 |
+
``{observation, reward, done, info}``.
|
| 58 |
+
"""
|
| 59 |
+
if ep.terminated:
|
| 60 |
+
return {
|
| 61 |
+
"observation": {},
|
| 62 |
+
"reward": 0.0,
|
| 63 |
+
"done": True,
|
| 64 |
+
"info": {"error": "episode_already_terminated"},
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
args = action.model_dump(exclude={"kind"})
|
| 68 |
+
kind = action.kind # type: ignore[attr-defined]
|
| 69 |
+
is_duplicate = ep.is_duplicate(kind, args)
|
| 70 |
+
|
| 71 |
+
result = dispatch(ep.graph, action)
|
| 72 |
+
tokens_returned = estimate_tokens(result.payload)
|
| 73 |
+
outcome = _classify_outcome(action, result.ok)
|
| 74 |
+
turn_reward = score_turn(
|
| 75 |
+
outcome=outcome,
|
| 76 |
+
is_duplicate=is_duplicate,
|
| 77 |
+
tokens_returned=tokens_returned,
|
| 78 |
+
)
|
| 79 |
+
rec = ep.record_turn(
|
| 80 |
+
kind=kind,
|
| 81 |
+
args=args,
|
| 82 |
+
result=result,
|
| 83 |
+
outcome=outcome,
|
| 84 |
+
turn_reward=turn_reward,
|
| 85 |
+
is_duplicate=is_duplicate,
|
| 86 |
+
tokens_returned=tokens_returned,
|
| 87 |
+
)
|
| 88 |
+
|
| 89 |
+
done = False
|
| 90 |
+
info: dict[str, Any] = {}
|
| 91 |
+
|
| 92 |
+
# Terminate on Submit.
|
| 93 |
+
if isinstance(action, Submit):
|
| 94 |
+
done = True
|
| 95 |
+
terminal = _score_terminal(ep)
|
| 96 |
+
ep.terminated = True
|
| 97 |
+
ep.terminal_reward = terminal["total"]
|
| 98 |
+
ep.terminal_payload = terminal
|
| 99 |
+
info["terminal"] = terminal
|
| 100 |
+
|
| 101 |
+
# Terminate on episode cap.
|
| 102 |
+
if not done and ep.turns >= ep.task.episode_cap:
|
| 103 |
+
done = True
|
| 104 |
+
terminal = _score_terminal(ep)
|
| 105 |
+
ep.terminated = True
|
| 106 |
+
ep.terminal_reward = terminal["total"]
|
| 107 |
+
ep.terminal_payload = terminal
|
| 108 |
+
info["terminal"] = terminal
|
| 109 |
+
info["reason"] = "episode_cap_reached"
|
| 110 |
+
|
| 111 |
+
return {
|
| 112 |
+
"observation": _render_observation(ep, rec),
|
| 113 |
+
"reward": rec.reward + (info.get("terminal", {}).get("total", 0.0) if done else 0.0),
|
| 114 |
+
"done": done,
|
| 115 |
+
"info": info,
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
def _score_terminal(ep: Episode) -> dict[str, Any]:
|
| 120 |
+
"""Compute terminal reward + return a serialized payload."""
|
| 121 |
+
sat = evaluate_all(ep.graph, ep.task.all_constraints)
|
| 122 |
+
structural, behavioral = sat.split_by_family()
|
| 123 |
+
|
| 124 |
+
# materialization gate: try to materialize + parse-check.
|
| 125 |
+
materialization_ok = False
|
| 126 |
+
try:
|
| 127 |
+
files = materialize(ep.graph)
|
| 128 |
+
materialization_ok = full_check(files).ok
|
| 129 |
+
except Exception:
|
| 130 |
+
materialization_ok = False
|
| 131 |
+
|
| 132 |
+
reward = score_terminal(
|
| 133 |
+
n_structural_satisfied=len(structural.satisfied),
|
| 134 |
+
n_structural_total=structural.total,
|
| 135 |
+
n_behavioral_passing=len(behavioral.satisfied),
|
| 136 |
+
n_behavioral_total=behavioral.total,
|
| 137 |
+
materialization_ok=materialization_ok,
|
| 138 |
+
type_checks_ok=None, # mypy not wired yet
|
| 139 |
+
tokens_used=ep.tokens_used,
|
| 140 |
+
budget=ep.task.budget,
|
| 141 |
+
)
|
| 142 |
+
out = reward.to_dict()
|
| 143 |
+
out["satisfaction"] = sat.to_dict()
|
| 144 |
+
return out
|
graphforge/task_generator.py
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Auto-generate training tasks from any Python repository.
|
| 2 |
+
|
| 3 |
+
Pipeline
|
| 4 |
+
--------
|
| 5 |
+
1. Parse the repo with AST → KnowledgeGraph
|
| 6 |
+
2. Find public functions that have doctest examples (>>> in docstring)
|
| 7 |
+
3. Extract those examples as runnable assertions
|
| 8 |
+
4. Replace the function body with `raise NotImplementedError` — the agent
|
| 9 |
+
must re-implement it from the docstring alone
|
| 10 |
+
5. Return RepoTask objects ready for GRPO training — no hand-writing needed
|
| 11 |
+
|
| 12 |
+
Usage
|
| 13 |
+
-----
|
| 14 |
+
from graphforge.task_generator import generate_tasks
|
| 15 |
+
tasks = generate_tasks("/tmp/humanize/src/humanize", n_tasks=6)
|
| 16 |
+
for t in tasks:
|
| 17 |
+
print(t.task_id, "→", t.description[:60])
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import ast
|
| 23 |
+
import doctest
|
| 24 |
+
import textwrap
|
| 25 |
+
from dataclasses import dataclass, field
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
from typing import Any
|
| 28 |
+
|
| 29 |
+
from graphforge.knowledge_graph import KGNode, KnowledgeGraph
|
| 30 |
+
from graphforge.repo_parser import parse_repo
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ── Task dataclass (mirrors env.tasks.RepoTask but lives here to avoid circular import) ──
|
| 34 |
+
|
| 35 |
+
@dataclass
|
| 36 |
+
class AutoTask:
|
| 37 |
+
task_id: str
|
| 38 |
+
repo_name: str
|
| 39 |
+
repo_path: str # absolute path to the repo source directory
|
| 40 |
+
description: str
|
| 41 |
+
test_code: str # uses short import: from <repo_name>.<module> import <func>
|
| 42 |
+
stubbed_node_id: str # the node whose body was replaced
|
| 43 |
+
original_source: str # saved so env can restore on reset
|
| 44 |
+
max_turns: int = 12
|
| 45 |
+
difficulty: int = 0
|
| 46 |
+
hints: list[str] = field(default_factory=list)
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# ── Doctest extraction ────────────────────────────────────────────────────────
|
| 50 |
+
|
| 51 |
+
def _extract_all_examples(docstring: str) -> list[tuple[str, str]]:
|
| 52 |
+
"""Return ALL doctest lines as (source, want) — want is '' for setup lines."""
|
| 53 |
+
if not docstring:
|
| 54 |
+
return []
|
| 55 |
+
parser = doctest.DocTestParser()
|
| 56 |
+
try:
|
| 57 |
+
examples = parser.get_examples(docstring, name="<doc>")
|
| 58 |
+
return [(ex.source.strip(), ex.want.strip()) for ex in examples]
|
| 59 |
+
except Exception:
|
| 60 |
+
return []
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _to_assertion(expr: str, expected: str) -> str | None:
|
| 64 |
+
"""Convert one doctest example to a Python assertion.
|
| 65 |
+
|
| 66 |
+
- True/False expected → assert (expr) is True/False
|
| 67 |
+
- Traceback expected → skip
|
| 68 |
+
- Non-literal → skip
|
| 69 |
+
"""
|
| 70 |
+
if not expected or expected.startswith("Traceback"):
|
| 71 |
+
return None
|
| 72 |
+
if expected in ("True", "False"):
|
| 73 |
+
return f"assert ({expr}) is {expected}, f'got {{repr({expr})}}'"
|
| 74 |
+
try:
|
| 75 |
+
ast.literal_eval(expected)
|
| 76 |
+
except (ValueError, SyntaxError):
|
| 77 |
+
return None
|
| 78 |
+
return f"assert {expr} == {expected}, f'got {{repr({expr})}}'"
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
def _build_test_code(func_name: str, module_stem: str, repo_name: str,
|
| 82 |
+
all_examples: list[tuple[str, str]]) -> str | None:
|
| 83 |
+
"""Build complete test code including setup lines then assertions."""
|
| 84 |
+
import_line = f"from {repo_name}.{module_stem} import {func_name}"
|
| 85 |
+
setup_lines: list[str] = []
|
| 86 |
+
assertion_lines: list[str] = []
|
| 87 |
+
|
| 88 |
+
for expr, expected in all_examples:
|
| 89 |
+
if not expected:
|
| 90 |
+
setup_lines.append(expr)
|
| 91 |
+
else:
|
| 92 |
+
a = _to_assertion(expr, expected)
|
| 93 |
+
if a and func_name in a: # only keep assertions that call our function
|
| 94 |
+
assertion_lines.append(a)
|
| 95 |
+
|
| 96 |
+
if len(assertion_lines) < 2:
|
| 97 |
+
return None
|
| 98 |
+
parts = [import_line] + setup_lines + assertion_lines
|
| 99 |
+
return "\n".join(parts)
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
# ── Function stubbing ─────────────────────────────────────────────────────────
|
| 103 |
+
|
| 104 |
+
def _stub_function(source: str) -> str:
|
| 105 |
+
"""Replace a function body with `raise NotImplementedError`, keeping signature + docstring."""
|
| 106 |
+
dedented = textwrap.dedent(source)
|
| 107 |
+
try:
|
| 108 |
+
tree = ast.parse(dedented)
|
| 109 |
+
except SyntaxError:
|
| 110 |
+
return source
|
| 111 |
+
|
| 112 |
+
lines = dedented.splitlines()
|
| 113 |
+
for node in ast.walk(tree):
|
| 114 |
+
if not isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
| 115 |
+
continue
|
| 116 |
+
|
| 117 |
+
body = node.body
|
| 118 |
+
indent = " " * (node.col_offset // 4 + 1)
|
| 119 |
+
|
| 120 |
+
# Keep signature lines (everything up to and including the colon)
|
| 121 |
+
sig_end = body[0].lineno - 1 # 0-indexed line where body starts
|
| 122 |
+
|
| 123 |
+
# Keep docstring if present
|
| 124 |
+
if body and isinstance(body[0], ast.Expr) and isinstance(body[0].value, ast.Constant):
|
| 125 |
+
keep_until = body[0].end_lineno # inclusive, 1-indexed
|
| 126 |
+
else:
|
| 127 |
+
keep_until = sig_end
|
| 128 |
+
|
| 129 |
+
kept = "\n".join(lines[:keep_until])
|
| 130 |
+
stub = kept.rstrip() + f"\n{indent}raise NotImplementedError\n"
|
| 131 |
+
return stub
|
| 132 |
+
|
| 133 |
+
return source
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# ── Candidate selection ──────────────────────────────────────────────��────────
|
| 137 |
+
|
| 138 |
+
def _score_candidate(node: KGNode, examples: list) -> int:
|
| 139 |
+
"""Higher = better training signal. Prefer more examples and longer docstrings."""
|
| 140 |
+
return len(examples) * 3 + min(len(node.docstring or ""), 200) // 20
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def _find_candidates(kg: KnowledgeGraph, repo_name: str) -> list[tuple[KGNode, str, int]]:
|
| 144 |
+
"""Return (node, test_code, score) for all viable candidates."""
|
| 145 |
+
candidates = []
|
| 146 |
+
for node in kg.all_nodes("function"):
|
| 147 |
+
if node.name.startswith("_"):
|
| 148 |
+
continue
|
| 149 |
+
if not node.docstring or not node.source:
|
| 150 |
+
continue
|
| 151 |
+
module_stem = Path(node.file_path).stem if node.file_path else None
|
| 152 |
+
if not module_stem:
|
| 153 |
+
continue
|
| 154 |
+
|
| 155 |
+
examples = _extract_all_examples(node.docstring)
|
| 156 |
+
if not examples:
|
| 157 |
+
continue
|
| 158 |
+
|
| 159 |
+
test_code = _build_test_code(node.name, module_stem, repo_name, examples)
|
| 160 |
+
if not test_code:
|
| 161 |
+
continue
|
| 162 |
+
|
| 163 |
+
score = _score_candidate(node, examples)
|
| 164 |
+
candidates.append((node, test_code, score))
|
| 165 |
+
|
| 166 |
+
candidates.sort(key=lambda x: x[2], reverse=True)
|
| 167 |
+
return candidates
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
# ── Main entry point ──────────────────────────────────────────────────────────
|
| 171 |
+
|
| 172 |
+
def generate_tasks(
|
| 173 |
+
repo_source_dir: str,
|
| 174 |
+
n_tasks: int = 4,
|
| 175 |
+
max_turns: int = 12,
|
| 176 |
+
) -> tuple[KnowledgeGraph, list[AutoTask]]:
|
| 177 |
+
"""Parse a Python repo directory and auto-generate training tasks.
|
| 178 |
+
|
| 179 |
+
Args:
|
| 180 |
+
repo_source_dir: Path to the Python package source directory.
|
| 181 |
+
e.g. '/tmp/humanize/src/humanize'
|
| 182 |
+
n_tasks: How many tasks to generate (picks highest-scoring candidates).
|
| 183 |
+
max_turns: Max turns per episode.
|
| 184 |
+
|
| 185 |
+
Returns:
|
| 186 |
+
(kg, tasks) — the Knowledge Graph and the list of AutoTask objects.
|
| 187 |
+
"""
|
| 188 |
+
repo_source_dir = str(Path(repo_source_dir).resolve())
|
| 189 |
+
repo_name = Path(repo_source_dir).name
|
| 190 |
+
kg = parse_repo(repo_source_dir)
|
| 191 |
+
|
| 192 |
+
candidates = _find_candidates(kg, repo_name)
|
| 193 |
+
if not candidates:
|
| 194 |
+
raise ValueError(
|
| 195 |
+
f"No suitable candidates found in {repo_source_dir}. "
|
| 196 |
+
"Make sure functions have doctest examples (>>> in docstring)."
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
selected = candidates[:n_tasks]
|
| 200 |
+
tasks: list[AutoTask] = []
|
| 201 |
+
|
| 202 |
+
for node, test_code, score in selected:
|
| 203 |
+
stubbed = _stub_function(node.source)
|
| 204 |
+
desc = textwrap.dedent(f"""\
|
| 205 |
+
Implement the function `{node.name}` in `{node.file_path}`.
|
| 206 |
+
|
| 207 |
+
{node.docstring.strip() if node.docstring else 'No docstring available.'}
|
| 208 |
+
""").strip()
|
| 209 |
+
|
| 210 |
+
task = AutoTask(
|
| 211 |
+
task_id=f"auto.{repo_name}.{node.name}",
|
| 212 |
+
repo_name=repo_name,
|
| 213 |
+
repo_path=repo_source_dir,
|
| 214 |
+
description=desc,
|
| 215 |
+
test_code=test_code,
|
| 216 |
+
stubbed_node_id=node.node_id,
|
| 217 |
+
original_source=node.source,
|
| 218 |
+
max_turns=max_turns,
|
| 219 |
+
difficulty=min(2, max(0, score // 8)),
|
| 220 |
+
hints=[
|
| 221 |
+
f"Look at {node.file_path} to understand the module style.",
|
| 222 |
+
f"The function signature is: {node.name}{node.metadata.get('signature', '(...)')}",
|
| 223 |
+
],
|
| 224 |
+
)
|
| 225 |
+
tasks.append(task)
|
| 226 |
+
|
| 227 |
+
return kg, tasks
|
graphforge/tasks/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Task bank and variant generator.
|
| 2 |
+
|
| 3 |
+
Tier-0 ships one hand-written task. Tier-1+ tasks and parametric variant
|
| 4 |
+
generation are TODO. See PROPOSAL.md §2.1, §2.3 for the full design.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from graphforge.tasks.bank import default_task, get_task, list_tasks
|
| 8 |
+
from graphforge.tasks.schema import Task
|
| 9 |
+
|
| 10 |
+
__all__ = ["Task", "default_task", "get_task", "list_tasks"]
|
graphforge/tasks/bank.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Tier-0 task bank.
|
| 2 |
+
|
| 3 |
+
A single hand-written task that exercises every implemented subsystem
|
| 4 |
+
end-to-end: build a one-module ``validators`` package with an ``is_email``
|
| 5 |
+
function attached to ``validate_with_regex(EMAIL)``. Tier-1+ tasks land in
|
| 6 |
+
follow-up modules.
|
| 7 |
+
|
| 8 |
+
Variant generation (PROPOSAL.md §2.3 — ~50 concrete variants per template
|
| 9 |
+
× domain vocabulary) is also TODO; for now we hand-author tasks until the
|
| 10 |
+
env's reward-signal shape is validated end-to-end.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
from graphforge.constraints.schema import (
|
| 16 |
+
AcyclicImports,
|
| 17 |
+
Materializes,
|
| 18 |
+
ModuleCount,
|
| 19 |
+
ModuleResponsibility,
|
| 20 |
+
ModuleSizeMax,
|
| 21 |
+
NodeAbsent,
|
| 22 |
+
NodeExists,
|
| 23 |
+
)
|
| 24 |
+
from graphforge.tasks.schema import Task
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
TIER_0_EMAIL_VALIDATOR = Task(
|
| 28 |
+
id="t0.email_validator",
|
| 29 |
+
tier=0,
|
| 30 |
+
description=(
|
| 31 |
+
"Build a tiny single-module package called 'validators'. It should "
|
| 32 |
+
"expose a function `is_email(s: str) -> bool` that returns True for "
|
| 33 |
+
"well-formed email addresses and False otherwise. Use the "
|
| 34 |
+
"`validate_with_regex` body template with the EMAIL pattern. The "
|
| 35 |
+
"module must materialize cleanly to runnable Python."
|
| 36 |
+
),
|
| 37 |
+
visible_constraints=[
|
| 38 |
+
ModuleCount(n=1),
|
| 39 |
+
ModuleResponsibility(module="validators", responsibility="validation"),
|
| 40 |
+
NodeExists(name="is_email", module="validators"),
|
| 41 |
+
Materializes(),
|
| 42 |
+
],
|
| 43 |
+
hidden_constraints=[
|
| 44 |
+
# The visible constraints already pin most of this; the hidden set
|
| 45 |
+
# adds shape constraints the agent must infer from the description.
|
| 46 |
+
ModuleSizeMax(module="validators", n=1),
|
| 47 |
+
NodeAbsent(name="main", module="validators"),
|
| 48 |
+
AcyclicImports(),
|
| 49 |
+
],
|
| 50 |
+
behavioral_test_names=[], # tier-0 has no behavioral tests
|
| 51 |
+
budget=4000,
|
| 52 |
+
episode_cap=20,
|
| 53 |
+
)
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
_TASKS: dict[str, Task] = {
|
| 57 |
+
TIER_0_EMAIL_VALIDATOR.id: TIER_0_EMAIL_VALIDATOR,
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def list_tasks() -> list[Task]:
|
| 62 |
+
return list(_TASKS.values())
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def get_task(task_id: str) -> Task | None:
|
| 66 |
+
return _TASKS.get(task_id)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def default_task() -> Task:
|
| 70 |
+
"""The task `/reset` picks when no ``task_id`` is specified."""
|
| 71 |
+
return TIER_0_EMAIL_VALIDATOR
|
graphforge/tasks/schema.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Task data model.
|
| 2 |
+
|
| 3 |
+
A *task* is the agent-facing unit of work. The visible portion is what the
|
| 4 |
+
agent sees at reset — natural-language description plus the visible subset
|
| 5 |
+
of constraints. The hidden portion drives reward but is invisible to the
|
| 6 |
+
policy, forcing the agent to interpret the description rather than mechanically
|
| 7 |
+
satisfying a fully-revealed checklist (PROPOSAL.md §2.1).
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
from pydantic import BaseModel, ConfigDict, Field
|
| 13 |
+
|
| 14 |
+
from graphforge.constraints.schema import Constraint
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class Task(BaseModel):
|
| 18 |
+
model_config = ConfigDict(extra="forbid", frozen=True)
|
| 19 |
+
|
| 20 |
+
id: str = Field(..., min_length=1)
|
| 21 |
+
tier: int = Field(..., ge=0, le=3)
|
| 22 |
+
description: str = Field(..., min_length=1)
|
| 23 |
+
visible_constraints: list[Constraint] = Field(default_factory=list)
|
| 24 |
+
hidden_constraints: list[Constraint] = Field(default_factory=list)
|
| 25 |
+
# Behavioral test names are visible to the agent at reset; bodies live in
|
| 26 |
+
# the test runner (TODO) and are hidden. Empty for tier-0.
|
| 27 |
+
behavioral_test_names: list[str] = Field(default_factory=list)
|
| 28 |
+
budget: int = Field(..., gt=0)
|
| 29 |
+
episode_cap: int = Field(..., gt=0)
|
| 30 |
+
|
| 31 |
+
@property
|
| 32 |
+
def all_constraints(self) -> list[Constraint]:
|
| 33 |
+
return list(self.visible_constraints) + list(self.hidden_constraints)
|
| 34 |
+
|
| 35 |
+
def visible_payload(self) -> dict[str, object]:
|
| 36 |
+
"""Subset of the task that's exposed to the agent at reset."""
|
| 37 |
+
return {
|
| 38 |
+
"id": self.id,
|
| 39 |
+
"tier": self.tier,
|
| 40 |
+
"description": self.description,
|
| 41 |
+
"visible_constraints": [c.model_dump() for c in self.visible_constraints],
|
| 42 |
+
"behavioral_test_names": list(self.behavioral_test_names),
|
| 43 |
+
"budget": self.budget,
|
| 44 |
+
"episode_cap": self.episode_cap,
|
| 45 |
+
}
|