Siteshcodes commited on
Commit
4ba1053
Β·
1 Parent(s): ca5a648

Fix all things

Browse files
Files changed (4) hide show
  1. inference.py +8 -10
  2. model.py +3 -6
  3. server/app.py +3 -19
  4. server/task.py +6 -6
inference.py CHANGED
@@ -19,7 +19,6 @@ from typing import List, Optional
19
  from openai import OpenAI
20
  from model import TriageAction, TriageObservation, BugReport
21
 
22
- # ── config ───────────────────────────────────────────────────────────────
23
 
24
  API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
25
  API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
@@ -42,7 +41,7 @@ print(f"[CONFIG] MODEL_NAME={MODEL_NAME}", flush=True)
42
  print(f"[CONFIG] ENV_BASE_URL={ENV_BASE_URL}", flush=True)
43
  print(f"[CONFIG] API_KEY={'set' if API_KEY else 'MISSING'}", flush=True)
44
 
45
- # ── inlined client ────────────────────────────────────────────────────────
46
 
47
  def _parse_observation(data: dict) -> TriageObservation:
48
  try:
@@ -121,7 +120,7 @@ class BugTriageClient:
121
  self.close()
122
 
123
 
124
- # ── prompt ────────────────────────────────────────────────────────────────
125
 
126
  SYSTEM_PROMPT = textwrap.dedent("""
127
  You are a senior software engineering manager.
@@ -148,7 +147,7 @@ SYSTEM_PROMPT = textwrap.dedent("""
148
  """).strip()
149
 
150
 
151
- # ── logging ───────────────────────────────────────────────────────────────
152
 
153
  def log_start(task: str, env: str, model: str) -> None:
154
  print(f"[START] task={task} env={env} model={model}", flush=True)
@@ -177,7 +176,7 @@ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> No
177
  )
178
 
179
 
180
- # ── helpers ───────────────────────────────────────────────────────────────
181
 
182
  def format_bug(obs: TriageObservation) -> str:
183
  bug = obs.bug_report
@@ -235,13 +234,12 @@ def call_model(client: OpenAI, bug_text: str) -> TriageAction:
235
  return action
236
 
237
 
238
- # ── main ──────────────────────────────────────────────────────────────────
239
 
240
  def main() -> None:
241
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
242
 
243
- # Run each task separately with its own [START]/[STEP]/[END] block
244
- # so the validator can count 3 distinct tasks with grader scores.
245
  all_scores = []
246
 
247
  with BugTriageClient(base_url=ENV_BASE_URL) as env:
@@ -277,7 +275,7 @@ def main() -> None:
277
  done=True,
278
  )
279
 
280
- # Score for this task
281
  score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
282
  score = min(max(score, 0.01), 0.99)
283
  success = score >= SUCCESS_SCORE_THRESHOLD
@@ -294,7 +292,7 @@ def main() -> None:
294
 
295
  time.sleep(0.5)
296
 
297
- # Summary
298
  avg_score = sum(all_scores) / len(all_scores) if all_scores else 0.0
299
  print(f"[SUMMARY] tasks={len(all_scores)} avg_score={avg_score:.2f} scores={all_scores}", flush=True)
300
 
 
19
  from openai import OpenAI
20
  from model import TriageAction, TriageObservation, BugReport
21
 
 
22
 
23
  API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
24
  API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
 
41
  print(f"[CONFIG] ENV_BASE_URL={ENV_BASE_URL}", flush=True)
42
  print(f"[CONFIG] API_KEY={'set' if API_KEY else 'MISSING'}", flush=True)
43
 
44
+ #inlined client
45
 
46
  def _parse_observation(data: dict) -> TriageObservation:
47
  try:
 
120
  self.close()
121
 
122
 
123
+
124
 
125
  SYSTEM_PROMPT = textwrap.dedent("""
126
  You are a senior software engineering manager.
 
147
  """).strip()
148
 
149
 
150
+
151
 
152
  def log_start(task: str, env: str, model: str) -> None:
153
  print(f"[START] task={task} env={env} model={model}", flush=True)
 
176
  )
177
 
178
 
179
+
180
 
181
  def format_bug(obs: TriageObservation) -> str:
182
  bug = obs.bug_report
 
234
  return action
235
 
236
 
237
+
238
 
239
  def main() -> None:
240
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
241
 
242
+
 
243
  all_scores = []
244
 
245
  with BugTriageClient(base_url=ENV_BASE_URL) as env:
 
275
  done=True,
276
  )
277
 
278
+
279
  score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
280
  score = min(max(score, 0.01), 0.99)
281
  success = score >= SUCCESS_SCORE_THRESHOLD
 
292
 
293
  time.sleep(0.5)
294
 
295
+
296
  avg_score = sum(all_scores) / len(all_scores) if all_scores else 0.0
297
  print(f"[SUMMARY] tasks={len(all_scores)} avg_score={avg_score:.2f} scores={all_scores}", flush=True)
298
 
model.py CHANGED
@@ -5,9 +5,8 @@ from openenv.core.env_server import Action, Observation
5
  from openenv.core.env_server.types import State
6
 
7
 
8
- # ─────────────────────────────────────────────
9
- # BugReport β€” plain Pydantic model
10
- # ─────────────────────────────────────────────
11
 
12
  class BugReport(BaseModel):
13
  """A single GitHub-style bug report."""
@@ -22,9 +21,7 @@ class BugReport(BaseModel):
22
  arbitrary_types_allowed = True
23
 
24
 
25
- # ─────────────────────────────────────────────
26
- # OpenEnv typed models β€” ALL pure Pydantic
27
- # ─────────────────────────────────────────────
28
 
29
  class TriageAction(Action):
30
  """What the agent submits as its triage decision."""
 
5
  from openenv.core.env_server.types import State
6
 
7
 
8
+
9
+
 
10
 
11
  class BugReport(BaseModel):
12
  """A single GitHub-style bug report."""
 
21
  arbitrary_types_allowed = True
22
 
23
 
24
+
 
 
25
 
26
  class TriageAction(Action):
27
  """What the agent submits as its triage decision."""
server/app.py CHANGED
@@ -49,21 +49,12 @@ TASKS_META = [
49
  }
50
  ]
51
 
52
- # ─────────────────────────────────────────────
53
- # GLOBAL STATEFUL ENVIRONMENT
54
- # The OpenEnv create_app() creates stateless endpoints that spin up
55
- # a new environment per request. This breaks our reset→step flow
56
- # because step() needs the bug from reset().
57
- # We maintain a shared global instance to fix this.
58
- # ─────────────────────────────────────────────
59
  _global_env = BugTriageEnvironment()
60
 
61
 
62
- # ─────────────────────────────────────────────
63
- # REMOVE the framework's stateless /reset and /step routes,
64
- # then add our own stateful versions.
65
- # ─────────────────────────────────────────────
66
- # Remove existing /reset and /step routes registered by create_app()
67
  routes_to_remove = []
68
  for route in app.routes:
69
  if hasattr(route, "path") and route.path in ("/reset", "/step", "/state"):
@@ -105,9 +96,6 @@ def task_hard():
105
  return TASKS_META[2]
106
 
107
 
108
- # ─────────────────────────────────────────────
109
- # CUSTOM STATEFUL /reset and /step endpoints
110
- # ─────────────────────────────────────────────
111
 
112
  @app.post("/reset")
113
  async def custom_reset(request: Request):
@@ -190,10 +178,6 @@ def custom_state():
190
  return state.dict()
191
 
192
 
193
- # ─────────────────────────────────────────────
194
- # Per-task reset shortcuts (convenience)
195
- # ─────────────────────────────────────────────
196
-
197
  @app.post("/tasks/easy/reset")
198
  def reset_easy():
199
  global _global_env
 
49
  }
50
  ]
51
 
52
+
53
+
 
 
 
 
 
54
  _global_env = BugTriageEnvironment()
55
 
56
 
57
+
 
 
 
 
58
  routes_to_remove = []
59
  for route in app.routes:
60
  if hasattr(route, "path") and route.path in ("/reset", "/step", "/state"):
 
96
  return TASKS_META[2]
97
 
98
 
 
 
 
99
 
100
  @app.post("/reset")
101
  async def custom_reset(request: Request):
 
178
  return state.dict()
179
 
180
 
 
 
 
 
181
  @app.post("/tasks/easy/reset")
182
  def reset_easy():
183
  global _global_env
server/task.py CHANGED
@@ -6,9 +6,9 @@ sys.path.insert(0, "/app")
6
  from typing import Tuple, List
7
  from model import BugReport, TriageAction
8
 
9
- # ─────────────────────────────────────────────
10
  # BUG REPORT DATASET
11
- # ─────────────────────────────────────────────
12
 
13
  TASKS = {
14
  "easy": {
@@ -228,18 +228,18 @@ TASKS = {
228
  }
229
 
230
 
231
- # ─────────────────────────────────────────────
232
  # TASK SAMPLER β€” picks a random bug each reset
233
- # ─────────────────────────────────────────────
234
 
235
  def sample_bug(task_key: str) -> BugReport:
236
  """Return a random bug from the given task's pool."""
237
  return random.choice(TASKS[task_key]["bugs"])
238
 
239
 
240
- # ─────────────────────────────────────────────
241
  # GRADERS
242
- # ─────────────────────────────────────────────
243
 
244
  PRIORITY_ORDER = {"P0": 0, "P1": 1, "P2": 2, "P3": 3}
245
 
 
6
  from typing import Tuple, List
7
  from model import BugReport, TriageAction
8
 
9
+
10
  # BUG REPORT DATASET
11
+
12
 
13
  TASKS = {
14
  "easy": {
 
228
  }
229
 
230
 
231
+
232
  # TASK SAMPLER β€” picks a random bug each reset
233
+
234
 
235
  def sample_bug(task_key: str) -> BugReport:
236
  """Return a random bug from the given task's pool."""
237
  return random.choice(TASKS[task_key]["bugs"])
238
 
239
 
240
+
241
  # GRADERS
242
+
243
 
244
  PRIORITY_ORDER = {"P0": 0, "P1": 1, "P2": 2, "P3": 3}
245