shreyas-joshi commited on
Commit
920d80b
·
1 Parent(s): 944123e

Refactor inference and training modules; add Dockerfile and server setup

Browse files

- Consolidated inference logic into a new `inference.py` file.
- Created `inference_training.py` for training-related functionalities.
- Added a Dockerfile to streamline environment setup and dependencies.
- Introduced `openenv.yaml` for task and model configurations.
- Updated `pyproject.toml` to include necessary dependencies and project metadata.
- Implemented server initialization in `server/app.py` to load the subproject dynamically.
- Added `__init__.py` to the server directory for package recognition.
- Established a lock file for dependency management.

Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app/code-review-env
4
+ COPY code-review-env/requirements.txt /app/code-review-env/requirements.txt
5
+ RUN pip install --no-cache-dir -r requirements.txt
6
+ COPY code-review-env /app/code-review-env
7
+
8
+ ENV GRAPHREVIEW_SOURCE_ROOT=/app/code-review-env/sample_project
9
+ RUN python -m db.seed sample_project/ --force
10
+
11
+ CMD ["uvicorn", "server.app:app", "--app-dir", "/app/code-review-env", "--host", "0.0.0.0", "--port", "7860"]
code-review-env/inference.py CHANGED
@@ -1,393 +1,311 @@
1
  from __future__ import annotations
2
 
3
- import argparse
4
- from datetime import UTC, datetime
5
  import json
6
  import os
7
- from pathlib import Path
8
- import uuid
 
 
 
9
 
10
  from openai import OpenAI
11
 
12
- from db.seed import seed_project
13
- from db.store import Store
14
- from env.runtime_config import load_runtime_config
15
- from parser.semantic_checks import detect_semantic_issues
16
- from training.run_manager import TrainingRunManager
17
- from training.weights import WeightSafetyManager
18
 
19
 
20
  # Submission-required runtime variables.
21
- HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HFTOKEN")
 
 
22
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
23
 
24
- # Hosted fallback: if HF_TOKEN exists and endpoint/model are not explicitly provided,
25
- # use Hugging Face Router with a stable instruct model.
26
- if HF_TOKEN and not os.getenv("API_BASE_URL") and not os.getenv("GRAPHREVIEW_LLM_BASE_URL"):
27
- API_BASE_URL = "https://router.huggingface.co/v1"
28
- else:
29
- API_BASE_URL = os.getenv("API_BASE_URL", os.getenv("GRAPHREVIEW_LLM_BASE_URL", "http://localhost:11434/v1"))
30
-
31
- if HF_TOKEN and not os.getenv("MODEL_NAME"):
32
- MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"
33
- else:
34
- MODEL_NAME = os.getenv("MODEL_NAME", "gemma4:e4b")
35
-
36
- # Keep current behavior for local Ollama while supporting hosted providers via HF_TOKEN.
37
- API_KEY = HF_TOKEN or os.getenv("GRAPHREVIEW_LLM_API_KEY", "ollama")
38
-
39
-
40
- def _build_parser() -> argparse.ArgumentParser:
41
- parser = argparse.ArgumentParser(description="GraphReview deterministic inference/training harness")
42
- parser.add_argument("target", help="Path to target Python project")
43
- parser.add_argument("--db-path", default=None, help="Optional DB path")
44
- parser.add_argument("--force-seed", action="store_true", help="Force re-seed")
45
- parser.add_argument(
46
- "--register-weights",
47
- action="store_true",
48
- help="Register model weights and write verification manifest",
49
- )
50
- parser.add_argument(
51
- "--deterministic-output",
52
- default="outputs/training/deterministic_findings.jsonl",
53
- help="Path to write normalized deterministic findings",
54
- )
55
- parser.add_argument("--baseline-precision", type=float, default=None, help="Optional precision floor baseline")
56
- parser.add_argument("--baseline-recall", type=float, default=None, help="Optional recall floor baseline")
57
- parser.add_argument(
58
- "--regression-tolerance",
59
- type=float,
60
- default=0.01,
61
- help="Allowed drop from baseline precision/recall",
62
- )
63
- return parser
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
 
 
65
 
66
- def _finding_key(analyzer: str, module_id: str, rule_id: str, line: int) -> str:
67
- return f"{analyzer}:{module_id}:{rule_id}:{line}"
68
 
 
 
69
 
70
- def _target_key(module_id: str, line: int) -> str:
71
- return f"{module_id}:{line}"
72
 
 
 
 
73
 
74
- def _safe_float(raw: str | None, default: float) -> float:
75
- if raw is None:
76
- return default
77
- try:
78
- return float(raw)
79
- except ValueError:
80
- return default
81
 
 
 
82
 
83
- def _build_agent_prompt(module_id: str, code: str, ast_summary: str) -> str:
84
- return (
85
- "You are reviewing one Python module in a dependency-aware code review environment. "
86
- "Do not rely on prior analyzer findings because they are hidden from you. "
87
- "Find concrete, actionable issues only, with line numbers and confidence.\n\n"
88
- "Your objectives are:\n"
89
- "1) Identify real bug, security, or dependency-risk issues in the provided code.\n"
90
- "2) Prefer deterministic evidence over speculative style feedback.\n"
91
- "3) If you suspect cascade risk, explain likely upstream/downstream impact in rationale.\n"
92
- "4) Return strictly valid JSON matching this schema: "
93
- "{\"findings\": [{\"line\": int, \"category\": \"bug|security|dependency\", \"rule_hint\": str, \"message\": str, \"confidence\": float}]}.\n\n"
94
- f"Module: {module_id}\n"
95
- f"AST Summary: {ast_summary}\n"
96
- "Code:\n"
97
- f"{code}\n"
98
  )
99
 
100
 
101
- def _extract_agent_findings(store: Store, config) -> set[str]:
102
- model = MODEL_NAME
103
- base_url = API_BASE_URL
104
- api_key = API_KEY
105
- enabled = os.getenv("GRAPHREVIEW_AGENT_INFERENCE_ENABLED", "true").strip().lower() == "true"
 
106
 
107
- findings: set[str] = set()
108
- node_snapshot = store.get_full_graph().nodes
109
- use_llm = enabled and base_url and model
110
- client = OpenAI(api_key=api_key, base_url=base_url, timeout=12.0) if use_llm else None
111
 
112
- llm_enabled = client is not None
113
- if llm_enabled:
114
- try:
115
- models = client.models.list()
116
- available = {item.id for item in models.data if getattr(item, "id", None)}
117
- if model not in available:
118
- print(
119
- f"[STEP] agent_llm_disabled reason=model-not-found model={model} "
120
- f"available_count={len(available)}"
121
- )
122
- llm_enabled = False
123
- except Exception as exc:
124
- print(f"[STEP] agent_llm_disabled reason=model-list-failed error={type(exc).__name__}")
125
- llm_enabled = False
126
-
127
- for node in node_snapshot:
128
- node_row = store.get_node(node.module_id)
129
- if node_row is None:
130
- continue
131
-
132
- module_id = node_row.module_id
133
- code = node_row.raw_code
134
- ast_summary = node_row.ast_summary
135
- collected = False
136
-
137
- if llm_enabled and client is not None:
138
- prompt = _build_agent_prompt(module_id=module_id, code=code, ast_summary=ast_summary)
139
- try:
140
- resp = client.chat.completions.create(
141
- model=model,
142
- temperature=0.0,
143
- response_format={"type": "json_object"},
144
- messages=[
145
- {
146
- "role": "system",
147
- "content": "Return only JSON. Do not include markdown. Keep claims concrete and line-specific.",
148
- },
149
- {"role": "user", "content": prompt},
150
- ],
151
- )
152
- text = (resp.choices[0].message.content or "{}").strip()
153
- payload = json.loads(text)
154
- rows = payload.get("findings", []) if isinstance(payload, dict) else []
155
- if isinstance(rows, list):
156
- for item in rows:
157
- if not isinstance(item, dict):
158
- continue
159
- confidence = _safe_float(str(item.get("confidence", "0.0")), 0.0)
160
- if confidence < 0.45:
161
- continue
162
- line = max(1, int(item.get("line", 1)))
163
- category = str(item.get("category", "bug")).lower()
164
- analyzer = "agent-security" if category == "security" else "agent-logic"
165
- rule_hint = str(item.get("rule_hint") or "agent")[:80]
166
- findings.add(_finding_key(analyzer, module_id, rule_hint, line))
167
- collected = True
168
- except Exception as exc:
169
- print(
170
- f"[STEP] agent_llm_disabled reason=completion-failed error={type(exc).__name__} "
171
- f"module={module_id}"
172
- )
173
- llm_enabled = False
174
- collected = False
175
 
176
- if collected:
177
- continue
178
 
179
- # Deterministic fallback so training bootstrap still works offline.
180
- for issue in detect_semantic_issues(code):
181
- findings.add(_finding_key("agent-heuristic", module_id, issue.stage, max(issue.line, 1)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
- return findings
184
 
 
 
 
 
 
 
 
185
 
186
- def main() -> None:
187
- args = _build_parser().parse_args()
188
- config = load_runtime_config()
189
 
190
- target = Path(args.target).resolve()
191
- print(f"[START] target={target} model={MODEL_NAME} mode=deterministic-ground-truth")
 
 
 
 
192
 
193
- weight_manager = WeightSafetyManager(Path(config.llm_weight_manifest_dir))
194
- verified_weight_path: str | None = None
195
- if args.register_weights:
196
- try:
197
- manifest = weight_manager.register_existing(
198
- model_name=MODEL_NAME,
199
- weight_path=Path(config.llm_model_agent_path),
200
- )
201
- print(
202
- "[STEP] weights_registered "
203
- + json.dumps(
204
- {
205
- "model": manifest.model_name,
206
- "sha256": manifest.sha256,
207
- "size_bytes": manifest.size_bytes,
208
- },
209
- sort_keys=True,
210
- )
211
- )
212
- except FileNotFoundError:
213
- print(
214
- f"[STEP] weights_register_skipped reason=missing-local-weights model={MODEL_NAME} "
215
- f"path={config.llm_model_agent_path}"
216
- )
217
 
 
 
 
 
 
 
 
 
218
  try:
219
- verified_weight_path = str(weight_manager.load_verified(MODEL_NAME))
220
- except FileNotFoundError:
221
  try:
222
- manifest = weight_manager.register_existing(
223
- model_name=MODEL_NAME,
224
- weight_path=Path(config.llm_model_agent_path),
225
- )
226
- print(
227
- "[STEP] weights_registered "
228
- + json.dumps(
229
- {
230
- "model": manifest.model_name,
231
- "sha256": manifest.sha256,
232
- "size_bytes": manifest.size_bytes,
233
- },
234
- sort_keys=True,
235
- )
236
- )
237
- verified_weight_path = str(weight_manager.load_verified(MODEL_NAME))
238
- except FileNotFoundError:
239
- print(
240
- f"[STEP] weights_unavailable reason=missing-local-weights model={MODEL_NAME} "
241
- f"path={config.llm_model_agent_path}"
 
 
 
 
 
 
 
 
 
 
 
242
  )
 
 
243
 
244
- if verified_weight_path is not None:
245
- print(f"[STEP] weights_verified path={verified_weight_path}")
246
- else:
247
- print("[STEP] weights_verified path=unavailable mode=api-only")
248
-
249
- seed_result = seed_project(target_dir=target, db_path=args.db_path, force=args.force_seed)
250
- print(f"[STEP] seeded {json.dumps(seed_result, sort_keys=True)}")
251
-
252
- store = Store(source_root=str(target), db_path=args.db_path)
253
- deterministic_findings = store.get_analyzer_findings()
254
- deterministic_keys = {
255
- _finding_key(item.analyzer, item.module_id, item.rule_id, item.line)
256
- for item in deterministic_findings
257
- }
258
- deterministic_targets = {
259
- _target_key(item.module_id, item.line)
260
- for item in deterministic_findings
261
- }
262
-
263
- agent_keys = _extract_agent_findings(store=store, config=config)
264
- agent_targets: set[str] = set()
265
- for item in agent_keys:
266
- parts = item.split(":")
267
- if len(parts) < 4:
268
- continue
269
- module_id = parts[1]
270
  try:
271
- line = int(parts[-1])
272
- except ValueError:
273
- continue
274
- agent_targets.add(_target_key(module_id, line))
275
-
276
- manager = TrainingRunManager()
277
- comparison = manager.compare(deterministic_findings=deterministic_targets, agent_findings=agent_targets)
278
-
279
- records: list[dict[str, object]] = []
280
- for finding in deterministic_findings:
281
- records.append(
282
- manager.build_preference_record(
283
- prompt=(
284
- "Review the module and detect concrete bugs, security issues, and "
285
- "dependency-attributed cascade problems without relying on prior findings."
286
- ),
287
- agent_output="",
288
- deterministic_targets=[
289
- _finding_key(
290
- finding.analyzer,
291
- finding.module_id,
292
- finding.rule_id,
293
- finding.line,
294
- )
295
- ],
296
- reward=0.0,
297
- )
298
- )
299
 
300
- output_path = Path(args.deterministic_output)
301
- manager.save_records(output_path, records)
302
-
303
- baseline_precision = args.baseline_precision
304
- baseline_recall = args.baseline_recall
305
- prior_runs = store.list_training_runs(limit=100)
306
- if baseline_precision is None and prior_runs:
307
- baseline_precision = max(item.precision for item in prior_runs)
308
- if baseline_recall is None and prior_runs:
309
- baseline_recall = max(item.recall for item in prior_runs)
310
-
311
- passed_non_regression = True
312
- if baseline_precision is not None and baseline_recall is not None:
313
- manager.assert_non_regression(
314
- baseline_precision=baseline_precision,
315
- baseline_recall=baseline_recall,
316
- current_precision=comparison.precision,
317
- current_recall=comparison.recall,
318
- tolerance=args.regression_tolerance,
319
- )
320
- print(
321
- "[STEP] non_regression_guard "
322
- + json.dumps(
323
- {
324
- "baseline_precision": baseline_precision,
325
- "baseline_recall": baseline_recall,
326
- "tolerance": args.regression_tolerance,
327
- },
328
- sort_keys=True,
329
- )
330
- )
331
- print(
332
- "[STEP] training_dataset "
333
- + json.dumps(
334
- {
335
- "output": str(output_path),
336
- "records": len(records),
337
- "precision": comparison.precision,
338
- "recall": comparison.recall,
339
- "false_negatives": comparison.false_negatives,
340
- },
341
- sort_keys=True,
342
- )
343
- )
344
 
345
- run_id = f"tr-{datetime.now(UTC).strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:8]}"
346
- run_config = {
347
- "target": str(target),
348
- "model": MODEL_NAME,
349
- "model_path": config.llm_model_agent_path,
350
- "agent_inference_enabled": os.getenv("GRAPHREVIEW_AGENT_INFERENCE_ENABLED", "true"),
351
- "regression_tolerance": args.regression_tolerance,
352
- "baseline_precision": baseline_precision,
353
- "baseline_recall": baseline_recall,
354
- }
355
- sha256 = "unavailable"
356
- if verified_weight_path is not None:
357
- sha256 = weight_manager.checksum(Path(verified_weight_path))
358
- store.create_training_run(
359
- run_id=run_id,
360
- model_name=MODEL_NAME,
361
- model_sha256=sha256,
362
- deterministic_findings=len(deterministic_keys),
363
- agent_findings=len(agent_keys),
364
- true_positives=comparison.true_positives,
365
- false_positives=comparison.false_positives,
366
- false_negatives=comparison.false_negatives,
367
- precision=comparison.precision,
368
- recall=comparison.recall,
369
- passed_non_regression=passed_non_regression,
370
- output_path=str(output_path),
371
- run_config_json=json.dumps(run_config, sort_keys=True),
372
- )
373
- print(f"[STEP] training_run_id={run_id}")
374
 
375
- print(
376
- "[END] "
377
- + json.dumps(
378
- {
379
- "ok": True,
380
- "deterministic_findings": len(deterministic_findings),
381
- "agent_findings": len(agent_keys),
382
- "model_weight": verified_weight_path or "unavailable",
383
- "model": MODEL_NAME,
384
- "precision": comparison.precision,
385
- "recall": comparison.recall,
386
- "run_id": run_id,
387
- },
388
- sort_keys=True,
389
- )
390
- )
391
 
392
 
393
  if __name__ == "__main__":
 
1
  from __future__ import annotations
2
 
 
 
3
  import json
4
  import os
5
+ import sys
6
+ from dataclasses import dataclass
7
+ from typing import Any
8
+ from urllib import error as urlerror
9
+ from urllib import request as urlrequest
10
 
11
  from openai import OpenAI
12
 
13
+ from inference_training import main as training_main
 
 
 
 
 
14
 
15
 
16
  # Submission-required runtime variables.
17
+ API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
18
+ MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
19
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
20
  LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
21
 
22
+ # GraphReview defaults.
23
+ BENCHMARK = os.getenv("GRAPHREVIEW_BENCHMARK", "graphreview")
24
+ ENV_BASE_URL = os.getenv("GRAPHREVIEW_BASE_URL", "http://127.0.0.1:7860")
25
+ TASKS = [
26
+ item.strip()
27
+ for item in os.getenv("GRAPHREVIEW_TASKS", "style_review,logic_review,cascade_review").split(",")
28
+ if item.strip()
29
+ ]
30
+ MAX_STEPS = int(os.getenv("GRAPHREVIEW_MAX_EPISODE_STEPS", "24"))
31
+ TEMPERATURE = float(os.getenv("GRAPHREVIEW_INFER_TEMPERATURE", "0.2"))
32
+ MAX_TOKENS = int(os.getenv("GRAPHREVIEW_INFER_MAX_TOKENS", "180"))
33
+ SUCCESS_SCORE_THRESHOLD = float(os.getenv("GRAPHREVIEW_SUCCESS_THRESHOLD", "0.5"))
34
+
35
+
36
+ @dataclass
37
+ class ReviewAction:
38
+ action_type: str
39
+ target_line: int | None = None
40
+ content: str | None = None
41
+ attributed_to: str | None = None
42
+ context_request: str | None = None
43
+
44
+
45
+ @dataclass
46
+ class GraphReviewObservation:
47
+ module_id: str
48
+ code: str
49
+ task_description: str
50
+ available_actions: list[str]
51
+
52
+
53
+ @dataclass
54
+ class GraphReviewStepResult:
55
+ observation: GraphReviewObservation
56
+ reward: float
57
+ done: bool
58
+
59
+
60
+ class GraphReviewClient:
61
+ def __init__(self, base_url: str) -> None:
62
+ self.base_url = base_url.rstrip("/")
63
+
64
+ def _step_payload(self, action: ReviewAction) -> dict[str, object]:
65
+ payload: dict[str, object] = {"action_type": action.action_type}
66
+ if action.target_line is not None:
67
+ payload["target_line"] = action.target_line
68
+ if action.content:
69
+ payload["content"] = action.content
70
+ if action.attributed_to:
71
+ payload["attributed_to"] = action.attributed_to
72
+ if action.context_request:
73
+ payload["context_request"] = action.context_request
74
+ return {"action": payload}
75
+
76
+ def _request_json(self, path: str, payload: dict[str, object]) -> dict[str, Any]:
77
+ body = json.dumps(payload).encode("utf-8")
78
+ req = urlrequest.Request(
79
+ f"{self.base_url}{path}",
80
+ data=body,
81
+ headers={"Content-Type": "application/json"},
82
+ method="POST",
83
+ )
84
+ try:
85
+ with urlrequest.urlopen(req, timeout=30) as resp:
86
+ raw = resp.read().decode("utf-8")
87
+ return json.loads(raw) if raw else {}
88
+ except urlerror.HTTPError as exc:
89
+ detail = exc.read().decode("utf-8", errors="ignore")
90
+ raise RuntimeError(f"HTTP {exc.code}: {detail}") from exc
91
+ except urlerror.URLError as exc:
92
+ raise RuntimeError(f"Connection error: {exc.reason}") from exc
93
+
94
+ def _parse_result(self, payload: dict[str, Any]) -> GraphReviewStepResult:
95
+ obs = payload.get("observation", {})
96
+ return GraphReviewStepResult(
97
+ observation=GraphReviewObservation(
98
+ module_id=str(obs.get("module_id", "unknown")),
99
+ code=str(obs.get("code", "")),
100
+ task_description=str(obs.get("task_description", "")),
101
+ available_actions=list(obs.get("available_actions", [])),
102
+ ),
103
+ reward=float(payload.get("reward", 0.0) or 0.0),
104
+ done=bool(payload.get("done", False)),
105
+ )
106
 
107
+ def reset(self, task_id: str) -> GraphReviewStepResult:
108
+ return self._parse_result(self._request_json("/reset", {"task_id": task_id}))
109
 
110
+ def step(self, action: ReviewAction) -> GraphReviewStepResult:
111
+ return self._parse_result(self._request_json("/step", self._step_payload(action)))
112
 
113
+ def close(self) -> None:
114
+ return None
115
 
 
 
116
 
117
+ def _is_training_mode(argv: list[str]) -> bool:
118
+ # Keep backward compatibility for existing training endpoints that pass a target path.
119
+ return any(not arg.startswith("-") for arg in argv[1:])
120
 
 
 
 
 
 
 
 
121
 
122
+ def log_start(task: str, env: str, model: str) -> None:
123
+ print(f"[START] task={task} env={env} model={model}", flush=True)
124
 
125
+
126
+ def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
127
+ action_one_line = action.replace("\n", " ").replace("\r", " ").strip()
128
+ error_val = error if error else "null"
129
+ print(
130
+ f"[STEP] step={step} action={action_one_line} reward={reward:.2f} "
131
+ f"done={str(done).lower()} error={error_val}",
132
+ flush=True,
 
 
 
 
 
 
 
133
  )
134
 
135
 
136
+ def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
137
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
138
+ print(
139
+ f"[END] success={str(success).lower()} steps={steps} score={score:.2f} rewards={rewards_str}",
140
+ flush=True,
141
+ )
142
 
 
 
 
 
143
 
144
+ def _build_prompt(observation: GraphReviewObservation, step: int) -> str:
145
+ code = observation.code[:2200]
146
+ actions = ", ".join(observation.available_actions) if observation.available_actions else "APPROVE"
147
+ return (
148
+ "You are reviewing Python code in GraphReview. Return only compact JSON with keys: "
149
+ "action_type, target_line (optional int), content (optional string), "
150
+ "attributed_to (optional string), context_request (optional string).\n"
151
+ f"Step: {step}\n"
152
+ f"Module: {observation.module_id}\n"
153
+ f"Task: {observation.task_description}\n"
154
+ f"Available actions: {actions}\n"
155
+ "Prefer concrete bug/security/dependency findings over style comments.\n"
156
+ "If uncertain, use REQUEST_CONTEXT or ADD_COMMENT instead of hallucinating.\n"
157
+ f"Code:\n{code}"
158
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
 
 
160
 
161
+ def _fallback_action(observation: GraphReviewObservation, step: int) -> ReviewAction:
162
+ if "REQUEST_CONTEXT" in observation.available_actions and step <= 2:
163
+ return ReviewAction(action_type="REQUEST_CONTEXT", context_request="upstream dependency module")
164
+ if "ADD_COMMENT" in observation.available_actions:
165
+ return ReviewAction(
166
+ action_type="ADD_COMMENT",
167
+ target_line=1,
168
+ content="Potential issue requires confirmation from dependency context.",
169
+ )
170
+ if "REQUEST_CHANGES" in observation.available_actions:
171
+ return ReviewAction(action_type="REQUEST_CHANGES")
172
+ return ReviewAction(action_type="APPROVE")
173
+
174
+
175
+ def _action_to_log_string(action: ReviewAction) -> str:
176
+ parts = [f"action_type={action.action_type}"]
177
+ if action.target_line is not None:
178
+ parts.append(f"target_line={action.target_line}")
179
+ if action.content:
180
+ parts.append(f"content={action.content[:90]}")
181
+ if action.attributed_to:
182
+ parts.append(f"attributed_to={action.attributed_to}")
183
+ if action.context_request:
184
+ parts.append(f"context_request={action.context_request}")
185
+ return ";".join(parts)
186
+
187
+
188
+ def _propose_action(client: OpenAI, observation: GraphReviewObservation, step: int) -> ReviewAction:
189
+ prompt = _build_prompt(observation=observation, step=step)
190
+ completion = client.chat.completions.create(
191
+ model=MODEL_NAME,
192
+ messages=[
193
+ {"role": "system", "content": "Return valid JSON only. No markdown."},
194
+ {"role": "user", "content": prompt},
195
+ ],
196
+ temperature=TEMPERATURE,
197
+ max_tokens=MAX_TOKENS,
198
+ stream=False,
199
+ )
200
+ text = (completion.choices[0].message.content or "{}").strip()
201
+ payload = json.loads(text)
202
+ if not isinstance(payload, dict):
203
+ return _fallback_action(observation=observation, step=step)
204
+
205
+ action_type = str(payload.get("action_type", "")).strip().upper()
206
+ if not action_type:
207
+ return _fallback_action(observation=observation, step=step)
208
+
209
+ if observation.available_actions and action_type not in observation.available_actions:
210
+ return _fallback_action(observation=observation, step=step)
211
+
212
+ target_line_raw = payload.get("target_line")
213
+ target_line = None
214
+ if isinstance(target_line_raw, int) and target_line_raw > 0:
215
+ target_line = target_line_raw
216
+
217
+ return ReviewAction(
218
+ action_type=action_type,
219
+ target_line=target_line,
220
+ content=str(payload.get("content", "")).strip() or None,
221
+ attributed_to=str(payload.get("attributed_to", "")).strip() or None,
222
+ context_request=str(payload.get("context_request", "")).strip() or None,
223
+ )
224
 
 
225
 
226
+ def _normalize_score(rewards: list[float]) -> float:
227
+ if not rewards:
228
+ return 0.0
229
+ avg = sum(rewards) / float(len(rewards))
230
+ # Reward scales vary by grader, so use bounded transform to keep score in [0, 1].
231
+ score = 1.0 / (1.0 + (2.718281828 ** (-avg)))
232
+ return max(0.0, min(1.0, score))
233
 
 
 
 
234
 
235
+ def _build_env() -> GraphReviewClient:
236
+ if LOCAL_IMAGE_NAME:
237
+ # LOCAL_IMAGE_NAME is accepted for contract compatibility;
238
+ # this runner connects to the serving endpoint configured in GRAPHREVIEW_BASE_URL.
239
+ pass
240
+ return GraphReviewClient(base_url=ENV_BASE_URL)
241
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
+ def _run_single_task(task_name: str, model_client: OpenAI) -> None:
244
+ env = _build_env()
245
+ rewards: list[float] = []
246
+ steps_taken = 0
247
+ score = 0.0
248
+ success = False
249
+
250
+ log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)
251
  try:
 
 
252
  try:
253
+ result = env.reset(task_id=task_name)
254
+ except Exception:
255
+ return
256
+
257
+ for step in range(1, MAX_STEPS + 1):
258
+ if result.done:
259
+ break
260
+
261
+ try:
262
+ action = _propose_action(model_client, result.observation, step)
263
+ except Exception:
264
+ action = _fallback_action(result.observation, step)
265
+
266
+ error: str | None = None
267
+ try:
268
+ result = env.step(action)
269
+ reward = float(result.reward or 0.0)
270
+ done = bool(result.done)
271
+ except Exception as exc:
272
+ reward = 0.0
273
+ done = False
274
+ error = str(exc)
275
+
276
+ rewards.append(reward)
277
+ steps_taken = step
278
+ log_step(
279
+ step=step,
280
+ action=_action_to_log_string(action),
281
+ reward=reward,
282
+ done=done,
283
+ error=error,
284
  )
285
+ if done:
286
+ break
287
 
288
+ score = _normalize_score(rewards)
289
+ success = score >= SUCCESS_SCORE_THRESHOLD
290
+ finally:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  try:
292
+ env.close()
293
+ finally:
294
+ log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
+ def _run_submission_mode() -> None:
298
+ api_key = HF_TOKEN or ""
299
+ model_client = OpenAI(base_url=API_BASE_URL, api_key=api_key)
300
+ for task in TASKS:
301
+ _run_single_task(task_name=task, model_client=model_client)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
303
+
304
+ def main() -> None:
305
+ if _is_training_mode(sys.argv):
306
+ training_main()
307
+ return
308
+ _run_submission_mode()
 
 
 
 
 
 
 
 
 
 
309
 
310
 
311
  if __name__ == "__main__":
code-review-env/inference_training.py ADDED
@@ -0,0 +1,394 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from datetime import UTC, datetime
5
+ import json
6
+ import os
7
+ from pathlib import Path
8
+ import uuid
9
+
10
+ from openai import OpenAI
11
+
12
+ from db.seed import seed_project
13
+ from db.store import Store
14
+ from env.runtime_config import load_runtime_config
15
+ from parser.semantic_checks import detect_semantic_issues
16
+ from training.run_manager import TrainingRunManager
17
+ from training.weights import WeightSafetyManager
18
+
19
+
20
+ # Submission-required runtime variables.
21
+ HF_TOKEN = os.getenv("HF_TOKEN") or os.getenv("HFTOKEN")
22
+ LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
23
+
24
+ # Hosted fallback: if HF_TOKEN exists and endpoint/model are not explicitly provided,
25
+ # use Hugging Face Router with a stable instruct model.
26
+ if HF_TOKEN and not os.getenv("API_BASE_URL") and not os.getenv("GRAPHREVIEW_LLM_BASE_URL"):
27
+ API_BASE_URL = "https://router.huggingface.co/v1"
28
+ else:
29
+ API_BASE_URL = os.getenv("API_BASE_URL", os.getenv("GRAPHREVIEW_LLM_BASE_URL", "http://localhost:11434/v1"))
30
+
31
+ if HF_TOKEN and not os.getenv("MODEL_NAME"):
32
+ MODEL_NAME = "meta-llama/Meta-Llama-3.1-8B-Instruct"
33
+ else:
34
+ MODEL_NAME = os.getenv("MODEL_NAME", "gemma4:e4b")
35
+
36
+ # Keep current behavior for local Ollama while supporting hosted providers via HF_TOKEN.
37
+ API_KEY = HF_TOKEN or os.getenv("GRAPHREVIEW_LLM_API_KEY", "ollama")
38
+
39
+
40
+ def _build_parser() -> argparse.ArgumentParser:
41
+ parser = argparse.ArgumentParser(description="GraphReview deterministic inference/training harness")
42
+ parser.add_argument("target", help="Path to target Python project")
43
+ parser.add_argument("--db-path", default=None, help="Optional DB path")
44
+ parser.add_argument("--force-seed", action="store_true", help="Force re-seed")
45
+ parser.add_argument(
46
+ "--register-weights",
47
+ action="store_true",
48
+ help="Register model weights and write verification manifest",
49
+ )
50
+ parser.add_argument(
51
+ "--deterministic-output",
52
+ default="outputs/training/deterministic_findings.jsonl",
53
+ help="Path to write normalized deterministic findings",
54
+ )
55
+ parser.add_argument("--baseline-precision", type=float, default=None, help="Optional precision floor baseline")
56
+ parser.add_argument("--baseline-recall", type=float, default=None, help="Optional recall floor baseline")
57
+ parser.add_argument(
58
+ "--regression-tolerance",
59
+ type=float,
60
+ default=0.01,
61
+ help="Allowed drop from baseline precision/recall",
62
+ )
63
+ return parser
64
+
65
+
66
+ def _finding_key(analyzer: str, module_id: str, rule_id: str, line: int) -> str:
67
+ return f"{analyzer}:{module_id}:{rule_id}:{line}"
68
+
69
+
70
+ def _target_key(module_id: str, line: int) -> str:
71
+ return f"{module_id}:{line}"
72
+
73
+
74
+ def _safe_float(raw: str | None, default: float) -> float:
75
+ if raw is None:
76
+ return default
77
+ try:
78
+ return float(raw)
79
+ except ValueError:
80
+ return default
81
+
82
+
83
+ def _build_agent_prompt(module_id: str, code: str, ast_summary: str) -> str:
84
+ return (
85
+ "You are reviewing one Python module in a dependency-aware code review environment. "
86
+ "Do not rely on prior analyzer findings because they are hidden from you. "
87
+ "Find concrete, actionable issues only, with line numbers and confidence.\n\n"
88
+ "Your objectives are:\n"
89
+ "1) Identify real bug, security, or dependency-risk issues in the provided code.\n"
90
+ "2) Prefer deterministic evidence over speculative style feedback.\n"
91
+ "3) If you suspect cascade risk, explain likely upstream/downstream impact in rationale.\n"
92
+ "4) Return strictly valid JSON matching this schema: "
93
+ "{\"findings\": [{\"line\": int, \"category\": \"bug|security|dependency\", \"rule_hint\": str, \"message\": str, \"confidence\": float}]}.\n\n"
94
+ f"Module: {module_id}\n"
95
+ f"AST Summary: {ast_summary}\n"
96
+ "Code:\n"
97
+ f"{code}\n"
98
+ )
99
+
100
+
101
+ def _extract_agent_findings(store: Store, config) -> set[str]:
102
+ model = MODEL_NAME
103
+ base_url = API_BASE_URL
104
+ api_key = API_KEY
105
+ enabled = os.getenv("GRAPHREVIEW_AGENT_INFERENCE_ENABLED", "true").strip().lower() == "true"
106
+
107
+ findings: set[str] = set()
108
+ node_snapshot = store.get_full_graph().nodes
109
+ use_llm = enabled and base_url and model
110
+ client = OpenAI(api_key=api_key, base_url=base_url, timeout=12.0) if use_llm else None
111
+
112
+ llm_enabled = client is not None
113
+ if llm_enabled:
114
+ try:
115
+ models = client.models.list()
116
+ available = {item.id for item in models.data if getattr(item, "id", None)}
117
+ if model not in available:
118
+ print(
119
+ f"[STEP] agent_llm_disabled reason=model-not-found model={model} "
120
+ f"available_count={len(available)}"
121
+ )
122
+ llm_enabled = False
123
+ except Exception as exc:
124
+ print(f"[STEP] agent_llm_disabled reason=model-list-failed error={type(exc).__name__}")
125
+ llm_enabled = False
126
+
127
+ for node in node_snapshot:
128
+ node_row = store.get_node(node.module_id)
129
+ if node_row is None:
130
+ continue
131
+
132
+ module_id = node_row.module_id
133
+ code = node_row.raw_code
134
+ ast_summary = node_row.ast_summary
135
+ collected = False
136
+
137
+ if llm_enabled and client is not None:
138
+ prompt = _build_agent_prompt(module_id=module_id, code=code, ast_summary=ast_summary)
139
+ try:
140
+ resp = client.chat.completions.create(
141
+ model=model,
142
+ temperature=0.0,
143
+ response_format={"type": "json_object"},
144
+ messages=[
145
+ {
146
+ "role": "system",
147
+ "content": "Return only JSON. Do not include markdown. Keep claims concrete and line-specific.",
148
+ },
149
+ {"role": "user", "content": prompt},
150
+ ],
151
+ )
152
+ text = (resp.choices[0].message.content or "{}").strip()
153
+ payload = json.loads(text)
154
+ rows = payload.get("findings", []) if isinstance(payload, dict) else []
155
+ if isinstance(rows, list):
156
+ for item in rows:
157
+ if not isinstance(item, dict):
158
+ continue
159
+ confidence = _safe_float(str(item.get("confidence", "0.0")), 0.0)
160
+ if confidence < 0.45:
161
+ continue
162
+ line = max(1, int(item.get("line", 1)))
163
+ category = str(item.get("category", "bug")).lower()
164
+ analyzer = "agent-security" if category == "security" else "agent-logic"
165
+ rule_hint = str(item.get("rule_hint") or "agent")[:80]
166
+ findings.add(_finding_key(analyzer, module_id, rule_hint, line))
167
+ collected = True
168
+ except Exception as exc:
169
+ print(
170
+ f"[STEP] agent_llm_disabled reason=completion-failed error={type(exc).__name__} "
171
+ f"module={module_id}"
172
+ )
173
+ llm_enabled = False
174
+ collected = False
175
+
176
+ if collected:
177
+ continue
178
+
179
+ # Deterministic fallback so training bootstrap still works offline.
180
+ for issue in detect_semantic_issues(code):
181
+ findings.add(_finding_key("agent-heuristic", module_id, issue.stage, max(issue.line, 1)))
182
+
183
+ return findings
184
+
185
+
186
+ def main() -> None:
187
+ args = _build_parser().parse_args()
188
+ config = load_runtime_config()
189
+
190
+ target = Path(args.target).resolve()
191
+ print(f"[START] target={target} model={MODEL_NAME} mode=deterministic-ground-truth")
192
+
193
+ weight_manager = WeightSafetyManager(Path(config.llm_weight_manifest_dir))
194
+ verified_weight_path: str | None = None
195
+ if args.register_weights:
196
+ try:
197
+ manifest = weight_manager.register_existing(
198
+ model_name=MODEL_NAME,
199
+ weight_path=Path(config.llm_model_agent_path),
200
+ )
201
+ print(
202
+ "[STEP] weights_registered "
203
+ + json.dumps(
204
+ {
205
+ "model": manifest.model_name,
206
+ "sha256": manifest.sha256,
207
+ "size_bytes": manifest.size_bytes,
208
+ },
209
+ sort_keys=True,
210
+ )
211
+ )
212
+ except FileNotFoundError:
213
+ print(
214
+ f"[STEP] weights_register_skipped reason=missing-local-weights model={MODEL_NAME} "
215
+ f"path={config.llm_model_agent_path}"
216
+ )
217
+
218
+ try:
219
+ verified_weight_path = str(weight_manager.load_verified(MODEL_NAME))
220
+ except FileNotFoundError:
221
+ try:
222
+ manifest = weight_manager.register_existing(
223
+ model_name=MODEL_NAME,
224
+ weight_path=Path(config.llm_model_agent_path),
225
+ )
226
+ print(
227
+ "[STEP] weights_registered "
228
+ + json.dumps(
229
+ {
230
+ "model": manifest.model_name,
231
+ "sha256": manifest.sha256,
232
+ "size_bytes": manifest.size_bytes,
233
+ },
234
+ sort_keys=True,
235
+ )
236
+ )
237
+ verified_weight_path = str(weight_manager.load_verified(MODEL_NAME))
238
+ except FileNotFoundError:
239
+ print(
240
+ f"[STEP] weights_unavailable reason=missing-local-weights model={MODEL_NAME} "
241
+ f"path={config.llm_model_agent_path}"
242
+ )
243
+
244
+ if verified_weight_path is not None:
245
+ print(f"[STEP] weights_verified path={verified_weight_path}")
246
+ else:
247
+ print("[STEP] weights_verified path=unavailable mode=api-only")
248
+
249
+ seed_result = seed_project(target_dir=target, db_path=args.db_path, force=args.force_seed)
250
+ print(f"[STEP] seeded {json.dumps(seed_result, sort_keys=True)}")
251
+
252
+ store = Store(source_root=str(target), db_path=args.db_path)
253
+ deterministic_findings = store.get_analyzer_findings()
254
+ deterministic_keys = {
255
+ _finding_key(item.analyzer, item.module_id, item.rule_id, item.line)
256
+ for item in deterministic_findings
257
+ }
258
+ deterministic_targets = {
259
+ _target_key(item.module_id, item.line)
260
+ for item in deterministic_findings
261
+ }
262
+
263
+ agent_keys = _extract_agent_findings(store=store, config=config)
264
+ agent_targets: set[str] = set()
265
+ for item in agent_keys:
266
+ parts = item.split(":")
267
+ if len(parts) < 4:
268
+ continue
269
+ module_id = parts[1]
270
+ try:
271
+ line = int(parts[-1])
272
+ except ValueError:
273
+ continue
274
+ agent_targets.add(_target_key(module_id, line))
275
+
276
+ manager = TrainingRunManager()
277
+ comparison = manager.compare(deterministic_findings=deterministic_targets, agent_findings=agent_targets)
278
+
279
+ records: list[dict[str, object]] = []
280
+ for finding in deterministic_findings:
281
+ records.append(
282
+ manager.build_preference_record(
283
+ prompt=(
284
+ "Review the module and detect concrete bugs, security issues, and "
285
+ "dependency-attributed cascade problems without relying on prior findings."
286
+ ),
287
+ agent_output="",
288
+ deterministic_targets=[
289
+ _finding_key(
290
+ finding.analyzer,
291
+ finding.module_id,
292
+ finding.rule_id,
293
+ finding.line,
294
+ )
295
+ ],
296
+ reward=0.0,
297
+ )
298
+ )
299
+
300
+ output_path = Path(args.deterministic_output)
301
+ manager.save_records(output_path, records)
302
+
303
+ baseline_precision = args.baseline_precision
304
+ baseline_recall = args.baseline_recall
305
+ prior_runs = store.list_training_runs(limit=100)
306
+ if baseline_precision is None and prior_runs:
307
+ baseline_precision = max(item.precision for item in prior_runs)
308
+ if baseline_recall is None and prior_runs:
309
+ baseline_recall = max(item.recall for item in prior_runs)
310
+
311
+ passed_non_regression = True
312
+ if baseline_precision is not None and baseline_recall is not None:
313
+ manager.assert_non_regression(
314
+ baseline_precision=baseline_precision,
315
+ baseline_recall=baseline_recall,
316
+ current_precision=comparison.precision,
317
+ current_recall=comparison.recall,
318
+ tolerance=args.regression_tolerance,
319
+ )
320
+ print(
321
+ "[STEP] non_regression_guard "
322
+ + json.dumps(
323
+ {
324
+ "baseline_precision": baseline_precision,
325
+ "baseline_recall": baseline_recall,
326
+ "tolerance": args.regression_tolerance,
327
+ },
328
+ sort_keys=True,
329
+ )
330
+ )
331
+ print(
332
+ "[STEP] training_dataset "
333
+ + json.dumps(
334
+ {
335
+ "output": str(output_path),
336
+ "records": len(records),
337
+ "precision": comparison.precision,
338
+ "recall": comparison.recall,
339
+ "false_negatives": comparison.false_negatives,
340
+ },
341
+ sort_keys=True,
342
+ )
343
+ )
344
+
345
+ run_id = f"tr-{datetime.now(UTC).strftime('%Y%m%d%H%M%S')}-{uuid.uuid4().hex[:8]}"
346
+ run_config = {
347
+ "target": str(target),
348
+ "model": MODEL_NAME,
349
+ "model_path": config.llm_model_agent_path,
350
+ "agent_inference_enabled": os.getenv("GRAPHREVIEW_AGENT_INFERENCE_ENABLED", "true"),
351
+ "regression_tolerance": args.regression_tolerance,
352
+ "baseline_precision": baseline_precision,
353
+ "baseline_recall": baseline_recall,
354
+ }
355
+ sha256 = "unavailable"
356
+ if verified_weight_path is not None:
357
+ sha256 = weight_manager.checksum(Path(verified_weight_path))
358
+ store.create_training_run(
359
+ run_id=run_id,
360
+ model_name=MODEL_NAME,
361
+ model_sha256=sha256,
362
+ deterministic_findings=len(deterministic_keys),
363
+ agent_findings=len(agent_keys),
364
+ true_positives=comparison.true_positives,
365
+ false_positives=comparison.false_positives,
366
+ false_negatives=comparison.false_negatives,
367
+ precision=comparison.precision,
368
+ recall=comparison.recall,
369
+ passed_non_regression=passed_non_regression,
370
+ output_path=str(output_path),
371
+ run_config_json=json.dumps(run_config, sort_keys=True),
372
+ )
373
+ print(f"[STEP] training_run_id={run_id}")
374
+
375
+ print(
376
+ "[END] "
377
+ + json.dumps(
378
+ {
379
+ "ok": True,
380
+ "deterministic_findings": len(deterministic_findings),
381
+ "agent_findings": len(agent_keys),
382
+ "model_weight": verified_weight_path or "unavailable",
383
+ "model": MODEL_NAME,
384
+ "precision": comparison.precision,
385
+ "recall": comparison.recall,
386
+ "run_id": run_id,
387
+ },
388
+ sort_keys=True,
389
+ )
390
+ )
391
+
392
+
393
+ if __name__ == "__main__":
394
+ main()
inference.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import runpy
5
+ import sys
6
+ from pathlib import Path
7
+
8
+
9
+ def main() -> None:
10
+ repo_root = Path(__file__).resolve().parent
11
+ subproject = repo_root / "code-review-env"
12
+ target = subproject / "inference.py"
13
+
14
+ if not target.exists():
15
+ raise FileNotFoundError(f"Missing required script: {target}")
16
+
17
+ subproject_str = str(subproject)
18
+ if subproject_str not in sys.path:
19
+ sys.path.insert(0, subproject_str)
20
+
21
+ os.chdir(subproject)
22
+ runpy.run_path(str(target), run_name="__main__")
23
+
24
+
25
+ if __name__ == "__main__":
26
+ main()
openenv.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: graphreview
2
+ version: 0.4.0
3
+ description: Dependency-aware RL code review environment with persistent graph state
4
+ runtime:
5
+ api:
6
+ reset: POST /reset
7
+ step: POST /step
8
+ state: GET /state
9
+ health: GET /health
10
+ debug:
11
+ state: GET /debug/state
12
+ reset_annotations: POST /debug/reset-annotations
13
+ tasks:
14
+ - id: style_review
15
+ level: easy
16
+ module_defaults: [cart]
17
+ grader: easy
18
+ - id: logic_review
19
+ level: medium
20
+ module_defaults: [checkout, auth]
21
+ grader: medium
22
+ - id: cascade_review
23
+ level: hard
24
+ module_defaults: [checkout, auth, config]
25
+ grader: hard
26
+ models:
27
+ action: env.action.ReviewAction
28
+ observation: env.observation.CodeObservation
29
+ state: env.state.GraphState
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "nodeaudit-openenv"
7
+ version = "0.1.0"
8
+ description = "Root submission package for NodeAudit OpenEnv GraphReview"
9
+ requires-python = ">=3.11"
10
+ dependencies = [
11
+ "openenv-core>=0.2.3",
12
+ "fastapi>=0.115",
13
+ "uvicorn>=0.30",
14
+ ]
15
+
16
+ [project.scripts]
17
+ server = "server.app:main"
server/__init__.py ADDED
File without changes
server/app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import importlib.util
4
+ from pathlib import Path
5
+ import sys
6
+ from types import ModuleType
7
+ from typing import Any
8
+
9
+
10
+ def _load_subproject_server() -> ModuleType:
11
+ repo_root = Path(__file__).resolve().parents[1]
12
+ subproject_root = repo_root / "code-review-env"
13
+ subproject_root_str = str(subproject_root)
14
+ if subproject_root_str not in sys.path:
15
+ sys.path.insert(0, subproject_root_str)
16
+
17
+ target = repo_root / "code-review-env" / "server" / "app.py"
18
+ if not target.exists():
19
+ raise FileNotFoundError(f"Missing subproject server module: {target}")
20
+
21
+ spec = importlib.util.spec_from_file_location("code_review_env_server_app", target)
22
+ if spec is None or spec.loader is None:
23
+ raise RuntimeError(f"Unable to load module spec for {target}")
24
+
25
+ module = importlib.util.module_from_spec(spec)
26
+ spec.loader.exec_module(module)
27
+ return module
28
+
29
+
30
+ _subserver = _load_subproject_server()
31
+ app: Any = _subserver.app
32
+
33
+
34
+ def main() -> Any:
35
+ return app
36
+
37
+
38
+ if __name__ == "__main__":
39
+ import uvicorn
40
+
41
+ uvicorn.run("server.app:app", host="0.0.0.0", port=7860)
uv.lock ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ version = 1
2
+ revision = 3
3
+ requires-python = ">=3.11"
4
+
5
+ [[package]]
6
+ name = "code-review-env"
7
+ version = "0.1.0"
8
+ source = { editable = "." }