Spaces:

Siteshcodes
/

bug-triage-env

Sleeping

App Files Files Community

Siteshcodes commited on Apr 12

Commit

4ba1053

1 Parent(s): ca5a648

Fix all things

Browse files

Files changed (4) hide show

inference.py +8 -10
model.py +3 -6
server/app.py +3 -19
server/task.py +6 -6

inference.py CHANGED Viewed

@@ -19,7 +19,6 @@ from typing import List, Optional
 from openai import OpenAI
 from model import TriageAction, TriageObservation, BugReport
-# ── config ───────────────────────────────────────────────────────────────
 API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
 API_KEY      = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
@@ -42,7 +41,7 @@ print(f"[CONFIG] MODEL_NAME={MODEL_NAME}", flush=True)
 print(f"[CONFIG] ENV_BASE_URL={ENV_BASE_URL}", flush=True)
 print(f"[CONFIG] API_KEY={'set' if API_KEY else 'MISSING'}", flush=True)
-# ── inlined client ────────────────────────────────────────────────────────
 def _parse_observation(data: dict) -> TriageObservation:
     try:
@@ -121,7 +120,7 @@ class BugTriageClient:
         self.close()
-# ── prompt ────────────────────────────────────────────────────────────────
 SYSTEM_PROMPT = textwrap.dedent("""
     You are a senior software engineering manager.
@@ -148,7 +147,7 @@ SYSTEM_PROMPT = textwrap.dedent("""
 """).strip()
-# ── logging ───────────────────────────────────────────────────────────────
 def log_start(task: str, env: str, model: str) -> None:
     print(f"[START] task={task} env={env} model={model}", flush=True)
@@ -177,7 +176,7 @@ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> No
     )
-# ── helpers ───────────────────────────────────────────────────────────────
 def format_bug(obs: TriageObservation) -> str:
     bug = obs.bug_report
@@ -235,13 +234,12 @@ def call_model(client: OpenAI, bug_text: str) -> TriageAction:
     return action
-# ── main ──────────────────────────────────────────────────────────────────
 def main() -> None:
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
-    # Run each task separately with its own [START]/[STEP]/[END] block
-    # so the validator can count 3 distinct tasks with grader scores.
     all_scores = []
     with BugTriageClient(base_url=ENV_BASE_URL) as env:
@@ -277,7 +275,7 @@ def main() -> None:
                     done=True,
                 )
-                # Score for this task
                 score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
                 score = min(max(score, 0.01), 0.99)
                 success = score >= SUCCESS_SCORE_THRESHOLD
@@ -294,7 +292,7 @@ def main() -> None:
             time.sleep(0.5)
-    # Summary
     avg_score = sum(all_scores) / len(all_scores) if all_scores else 0.0
     print(f"[SUMMARY] tasks={len(all_scores)} avg_score={avg_score:.2f} scores={all_scores}", flush=True)

 from openai import OpenAI
 from model import TriageAction, TriageObservation, BugReport
 API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
 API_KEY      = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
 print(f"[CONFIG] ENV_BASE_URL={ENV_BASE_URL}", flush=True)
 print(f"[CONFIG] API_KEY={'set' if API_KEY else 'MISSING'}", flush=True)
+#inlined client
 def _parse_observation(data: dict) -> TriageObservation:
     try:
         self.close()
 SYSTEM_PROMPT = textwrap.dedent("""
     You are a senior software engineering manager.
 """).strip()
 def log_start(task: str, env: str, model: str) -> None:
     print(f"[START] task={task} env={env} model={model}", flush=True)
     )
 def format_bug(obs: TriageObservation) -> str:
     bug = obs.bug_report
     return action
 def main() -> None:
     client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
     all_scores = []
     with BugTriageClient(base_url=ENV_BASE_URL) as env:
                     done=True,
                 )
                 score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
                 score = min(max(score, 0.01), 0.99)
                 success = score >= SUCCESS_SCORE_THRESHOLD
             time.sleep(0.5)
     avg_score = sum(all_scores) / len(all_scores) if all_scores else 0.0
     print(f"[SUMMARY] tasks={len(all_scores)} avg_score={avg_score:.2f} scores={all_scores}", flush=True)

model.py CHANGED Viewed

@@ -5,9 +5,8 @@ from openenv.core.env_server import Action, Observation
 from openenv.core.env_server.types import State
-# ─────────────────────────────────────────────
-# BugReport — plain Pydantic model
-# ─────────────────────────────────────────────
 class BugReport(BaseModel):
     """A single GitHub-style bug report."""
@@ -22,9 +21,7 @@ class BugReport(BaseModel):
         arbitrary_types_allowed = True
-# ─────────────────────────────────────────────
-# OpenEnv typed models — ALL pure Pydantic
-# ─────────────────────────────────────────────
 class TriageAction(Action):
     """What the agent submits as its triage decision."""

 from openenv.core.env_server.types import State
 class BugReport(BaseModel):
     """A single GitHub-style bug report."""
         arbitrary_types_allowed = True
 class TriageAction(Action):
     """What the agent submits as its triage decision."""

server/app.py CHANGED Viewed

@@ -49,21 +49,12 @@ TASKS_META = [
     }
 ]
-# ─────────────────────────────────────────────
-# GLOBAL STATEFUL ENVIRONMENT
-# The OpenEnv create_app() creates stateless endpoints that spin up
-# a new environment per request. This breaks our reset→step flow
-# because step() needs the bug from reset().
-# We maintain a shared global instance to fix this.
-# ─────────────────────────────────────────────
 _global_env = BugTriageEnvironment()
-# ─────────────────────────────────────────────
-# REMOVE the framework's stateless /reset and /step routes,
-# then add our own stateful versions.
-# ─────────────────────────────────────────────
-# Remove existing /reset and /step routes registered by create_app()
 routes_to_remove = []
 for route in app.routes:
     if hasattr(route, "path") and route.path in ("/reset", "/step", "/state"):
@@ -105,9 +96,6 @@ def task_hard():
     return TASKS_META[2]
-# ─────────────────────────────────────────────
-# CUSTOM STATEFUL /reset and /step endpoints
-# ─────────────────────────────────────────────
 @app.post("/reset")
 async def custom_reset(request: Request):
@@ -190,10 +178,6 @@ def custom_state():
         return state.dict()
-# ─────────────────────────────────────────────
-# Per-task reset shortcuts (convenience)
-# ─────────────────────────────────────────────
 @app.post("/tasks/easy/reset")
 def reset_easy():
     global _global_env

     }
 ]
 _global_env = BugTriageEnvironment()
 routes_to_remove = []
 for route in app.routes:
     if hasattr(route, "path") and route.path in ("/reset", "/step", "/state"):
     return TASKS_META[2]
 @app.post("/reset")
 async def custom_reset(request: Request):
         return state.dict()
 @app.post("/tasks/easy/reset")
 def reset_easy():
     global _global_env

server/task.py CHANGED Viewed

@@ -6,9 +6,9 @@ sys.path.insert(0, "/app")
 from typing import Tuple, List
 from model import BugReport, TriageAction
-# ─────────────────────────────────────────────
 # BUG REPORT DATASET
-# ─────────────────────────────────────────────
 TASKS = {
     "easy": {
@@ -228,18 +228,18 @@ TASKS = {
 }
-# ─────────────────────────────────────────────
 # TASK SAMPLER  — picks a random bug each reset
-# ─────────────────────────────────────────────
 def sample_bug(task_key: str) -> BugReport:
     """Return a random bug from the given task's pool."""
     return random.choice(TASKS[task_key]["bugs"])
-# ─────────────────────────────────────────────
 # GRADERS
-# ─────────────────────────────────────────────
 PRIORITY_ORDER = {"P0": 0, "P1": 1, "P2": 2, "P3": 3}

 from typing import Tuple, List
 from model import BugReport, TriageAction
 # BUG REPORT DATASET
 TASKS = {
     "easy": {
 }
 # TASK SAMPLER  — picks a random bug each reset
 def sample_bug(task_key: str) -> BugReport:
     """Return a random bug from the given task's pool."""
     return random.choice(TASKS[task_key]["bugs"])
 # GRADERS
 PRIORITY_ORDER = {"P0": 0, "P1": 1, "P2": 2, "P3": 3}