Krsnapriya commited on
Commit
1cef78b
·
verified ·
1 Parent(s): aa466c2

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. server/app.py +25 -68
server/app.py CHANGED
@@ -1,6 +1,6 @@
1
  """
2
- FastAPI server wrapping CodeReviewEnv-RX.
3
- Exposes OpenEnv-compliant endpoints: /reset, /step, /state, /grader, /tasks, /baseline, /health
4
  """
5
 
6
  from fastapi import FastAPI, HTTPException, Query
@@ -9,27 +9,23 @@ from contextlib import asynccontextmanager
9
  import sys
10
  import os
11
 
12
- # Add parent directory to path so we can import models/tasks
13
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14
 
15
- from models import Action, TaskInfo # pyre-ignore
16
- from server.env import CodeReviewEnv # pyre-ignore
17
- from tasks import get_tasks # pyre-ignore
18
 
19
 
20
  @asynccontextmanager
21
  async def lifespan(app: FastAPI):
22
- """Initialize default environment on startup."""
23
  app.state.envs = {}
24
  yield
25
 
26
 
27
  app = FastAPI(
28
- title="CodeReviewEnv-RX",
29
- description="A research-grade, interactive, step-based environment where AI agents "
30
- "navigate multi-file codebases, inspect lines, trace function calls, "
31
- "and flag issues under time and step constraints.",
32
- version="2.0.0",
33
  lifespan=lifespan,
34
  )
35
 
@@ -41,95 +37,56 @@ app.add_middleware(
41
  )
42
 
43
 
44
- def _get_env() -> CodeReviewEnv:
45
- """Get the current environment instance or raise 400."""
46
  env = app.state.envs.get("current")
47
  if env is None:
48
  raise HTTPException(status_code=400, detail="No active environment. Call /reset first.")
49
  return env
50
 
51
 
52
- # ── Endpoints ──────────────────────────────────────────
53
-
54
-
55
  @app.post("/reset")
56
  def reset(
57
- task_level: str = Query("easy", description="Task level: easy, medium, hard, security, concurrency, hard_rx"),
58
- seed: int = Query(42, description="Random seed for deterministic episodes"),
 
59
  ):
60
- """Reset the environment with a specific task and seed. Returns the first observation."""
61
- valid_levels = list(get_tasks().keys())
62
- if task_level not in valid_levels:
63
- raise HTTPException(
64
- status_code=400,
65
- detail=f"Invalid task_level: {task_level}. Must be one of {valid_levels}"
66
- )
67
- env = CodeReviewEnv(task_level=task_level, seed=seed)
68
  app.state.envs["current"] = env
69
- obs = env.reset()
70
  return obs.model_dump()
71
 
72
 
73
  @app.post("/step")
74
  def step(action: Action):
75
- """Submit an action (open_file, inspect, flag, trace). Returns observation + reward."""
76
  env = _get_env()
77
  try:
78
- obs, reward = env.step(action)
79
  except ValueError as e:
80
  raise HTTPException(status_code=400, detail=str(e))
81
  return {
82
  "observation": obs.model_dump(),
83
- "reward": reward.model_dump(),
 
 
84
  }
85
 
86
 
87
  @app.get("/state")
88
  def state():
89
- """Return full internal state of the current environment."""
90
  env = _get_env()
91
- return env.state()
92
 
93
 
94
  @app.get("/grader")
95
  def grader():
96
- """Grade the current episode. Returns deterministic score 0.0–1.0 with breakdown."""
97
  env = _get_env()
98
- result = env.grade_episode()
99
- return result.model_dump()
100
-
101
-
102
- @app.get("/tasks")
103
- def list_tasks():
104
- """List all available tasks with metadata."""
105
- tasks = get_tasks()
106
- task_list = []
107
- for level, task in tasks.items():
108
- task_list.append(
109
- TaskInfo(
110
- name=task.name,
111
- level=task.level,
112
- description=task.description,
113
- num_files=len(task.files),
114
- num_required_issues=len([i for i in task.issues if i.confidence >= 0.5]),
115
- num_optional_issues=len(task.optional_issues),
116
- ).model_dump()
117
- )
118
- return {"tasks": task_list}
119
-
120
-
121
- @app.post("/baseline")
122
- def run_baseline():
123
- """
124
- Run a deterministic heuristic baseline across all tasks.
125
- Returns scores per task level.
126
- """
127
- from baseline import run_heuristic_baseline # pyre-ignore
128
- scores = run_heuristic_baseline()
129
- return {"baseline_scores": scores}
130
 
131
 
132
  @app.get("/health")
133
  def health():
134
- """Health check endpoint."""
135
- return {"status": "ok", "environment": "CodeReviewEnv-RX", "version": "2.0.0"}
 
1
  """
2
+ FastAPI server wrapping DebugOps-RX.
3
+ Exposes OpenEnv-compliant endpoints: /reset, /step, /state, /grader, /health
4
  """
5
 
6
  from fastapi import FastAPI, HTTPException, Query
 
9
  import sys
10
  import os
11
 
12
+ # Add parent directory to path
13
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
14
 
15
+ from models import Action # pyre-ignore
16
+ from server.env import DebugOpsEnv # pyre-ignore
 
17
 
18
 
19
  @asynccontextmanager
20
  async def lifespan(app: FastAPI):
 
21
  app.state.envs = {}
22
  yield
23
 
24
 
25
  app = FastAPI(
26
+ title="DebugOps-RX API",
27
+ description="Realistic eXecution benchmark for debugging agents.",
28
+ version="1.0.0",
 
 
29
  lifespan=lifespan,
30
  )
31
 
 
37
  )
38
 
39
 
40
+ def _get_env() -> DebugOpsEnv:
 
41
  env = app.state.envs.get("current")
42
  if env is None:
43
  raise HTTPException(status_code=400, detail="No active environment. Call /reset first.")
44
  return env
45
 
46
 
 
 
 
47
  @app.post("/reset")
48
  def reset(
49
+ difficulty: str = Query("easy", description="easy, medium, hard, extreme"),
50
+ split: str = Query("test", description="train, test, ood"),
51
+ seed: int = Query(42),
52
  ):
53
+ env = DebugOpsEnv(seed=seed)
 
 
 
 
 
 
 
54
  app.state.envs["current"] = env
55
+ obs = env.reset(difficulty=difficulty, split=split)
56
  return obs.model_dump()
57
 
58
 
59
  @app.post("/step")
60
  def step(action: Action):
 
61
  env = _get_env()
62
  try:
63
+ obs, reward, done, info = env.step(action)
64
  except ValueError as e:
65
  raise HTTPException(status_code=400, detail=str(e))
66
  return {
67
  "observation": obs.model_dump(),
68
+ "reward": reward,
69
+ "done": done,
70
+ "info": info
71
  }
72
 
73
 
74
  @app.get("/state")
75
  def state():
 
76
  env = _get_env()
77
+ return env.state.model_dump()
78
 
79
 
80
  @app.get("/grader")
81
  def grader():
 
82
  env = _get_env()
83
+ score = env.grade(env.trajectory)
84
+ return {
85
+ "score": score.final(),
86
+ "breakdown": score.model_dump()
87
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
  @app.get("/health")
91
  def health():
92
+ return {"status": "ok", "environment": "DebugOps-RX", "version": "1.0.0"}