Spaces:
Sleeping
Sleeping
Commit Β·
4ba1053
1
Parent(s): ca5a648
Fix all things
Browse files- inference.py +8 -10
- model.py +3 -6
- server/app.py +3 -19
- server/task.py +6 -6
inference.py
CHANGED
|
@@ -19,7 +19,6 @@ from typing import List, Optional
|
|
| 19 |
from openai import OpenAI
|
| 20 |
from model import TriageAction, TriageObservation, BugReport
|
| 21 |
|
| 22 |
-
# ββ config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 23 |
|
| 24 |
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 25 |
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
|
|
@@ -42,7 +41,7 @@ print(f"[CONFIG] MODEL_NAME={MODEL_NAME}", flush=True)
|
|
| 42 |
print(f"[CONFIG] ENV_BASE_URL={ENV_BASE_URL}", flush=True)
|
| 43 |
print(f"[CONFIG] API_KEY={'set' if API_KEY else 'MISSING'}", flush=True)
|
| 44 |
|
| 45 |
-
#
|
| 46 |
|
| 47 |
def _parse_observation(data: dict) -> TriageObservation:
|
| 48 |
try:
|
|
@@ -121,7 +120,7 @@ class BugTriageClient:
|
|
| 121 |
self.close()
|
| 122 |
|
| 123 |
|
| 124 |
-
|
| 125 |
|
| 126 |
SYSTEM_PROMPT = textwrap.dedent("""
|
| 127 |
You are a senior software engineering manager.
|
|
@@ -148,7 +147,7 @@ SYSTEM_PROMPT = textwrap.dedent("""
|
|
| 148 |
""").strip()
|
| 149 |
|
| 150 |
|
| 151 |
-
|
| 152 |
|
| 153 |
def log_start(task: str, env: str, model: str) -> None:
|
| 154 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
|
@@ -177,7 +176,7 @@ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> No
|
|
| 177 |
)
|
| 178 |
|
| 179 |
|
| 180 |
-
|
| 181 |
|
| 182 |
def format_bug(obs: TriageObservation) -> str:
|
| 183 |
bug = obs.bug_report
|
|
@@ -235,13 +234,12 @@ def call_model(client: OpenAI, bug_text: str) -> TriageAction:
|
|
| 235 |
return action
|
| 236 |
|
| 237 |
|
| 238 |
-
|
| 239 |
|
| 240 |
def main() -> None:
|
| 241 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 242 |
|
| 243 |
-
|
| 244 |
-
# so the validator can count 3 distinct tasks with grader scores.
|
| 245 |
all_scores = []
|
| 246 |
|
| 247 |
with BugTriageClient(base_url=ENV_BASE_URL) as env:
|
|
@@ -277,7 +275,7 @@ def main() -> None:
|
|
| 277 |
done=True,
|
| 278 |
)
|
| 279 |
|
| 280 |
-
|
| 281 |
score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
|
| 282 |
score = min(max(score, 0.01), 0.99)
|
| 283 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
|
@@ -294,7 +292,7 @@ def main() -> None:
|
|
| 294 |
|
| 295 |
time.sleep(0.5)
|
| 296 |
|
| 297 |
-
|
| 298 |
avg_score = sum(all_scores) / len(all_scores) if all_scores else 0.0
|
| 299 |
print(f"[SUMMARY] tasks={len(all_scores)} avg_score={avg_score:.2f} scores={all_scores}", flush=True)
|
| 300 |
|
|
|
|
| 19 |
from openai import OpenAI
|
| 20 |
from model import TriageAction, TriageObservation, BugReport
|
| 21 |
|
|
|
|
| 22 |
|
| 23 |
API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
|
| 24 |
API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or os.getenv("OPENAI_API_KEY")
|
|
|
|
| 41 |
print(f"[CONFIG] ENV_BASE_URL={ENV_BASE_URL}", flush=True)
|
| 42 |
print(f"[CONFIG] API_KEY={'set' if API_KEY else 'MISSING'}", flush=True)
|
| 43 |
|
| 44 |
+
#inlined client
|
| 45 |
|
| 46 |
def _parse_observation(data: dict) -> TriageObservation:
|
| 47 |
try:
|
|
|
|
| 120 |
self.close()
|
| 121 |
|
| 122 |
|
| 123 |
+
|
| 124 |
|
| 125 |
SYSTEM_PROMPT = textwrap.dedent("""
|
| 126 |
You are a senior software engineering manager.
|
|
|
|
| 147 |
""").strip()
|
| 148 |
|
| 149 |
|
| 150 |
+
|
| 151 |
|
| 152 |
def log_start(task: str, env: str, model: str) -> None:
|
| 153 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
|
|
|
| 176 |
)
|
| 177 |
|
| 178 |
|
| 179 |
+
|
| 180 |
|
| 181 |
def format_bug(obs: TriageObservation) -> str:
|
| 182 |
bug = obs.bug_report
|
|
|
|
| 234 |
return action
|
| 235 |
|
| 236 |
|
| 237 |
+
|
| 238 |
|
| 239 |
def main() -> None:
|
| 240 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 241 |
|
| 242 |
+
|
|
|
|
| 243 |
all_scores = []
|
| 244 |
|
| 245 |
with BugTriageClient(base_url=ENV_BASE_URL) as env:
|
|
|
|
| 275 |
done=True,
|
| 276 |
)
|
| 277 |
|
| 278 |
+
|
| 279 |
score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
|
| 280 |
score = min(max(score, 0.01), 0.99)
|
| 281 |
success = score >= SUCCESS_SCORE_THRESHOLD
|
|
|
|
| 292 |
|
| 293 |
time.sleep(0.5)
|
| 294 |
|
| 295 |
+
|
| 296 |
avg_score = sum(all_scores) / len(all_scores) if all_scores else 0.0
|
| 297 |
print(f"[SUMMARY] tasks={len(all_scores)} avg_score={avg_score:.2f} scores={all_scores}", flush=True)
|
| 298 |
|
model.py
CHANGED
|
@@ -5,9 +5,8 @@ from openenv.core.env_server import Action, Observation
|
|
| 5 |
from openenv.core.env_server.types import State
|
| 6 |
|
| 7 |
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 11 |
|
| 12 |
class BugReport(BaseModel):
|
| 13 |
"""A single GitHub-style bug report."""
|
|
@@ -22,9 +21,7 @@ class BugReport(BaseModel):
|
|
| 22 |
arbitrary_types_allowed = True
|
| 23 |
|
| 24 |
|
| 25 |
-
|
| 26 |
-
# OpenEnv typed models β ALL pure Pydantic
|
| 27 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 28 |
|
| 29 |
class TriageAction(Action):
|
| 30 |
"""What the agent submits as its triage decision."""
|
|
|
|
| 5 |
from openenv.core.env_server.types import State
|
| 6 |
|
| 7 |
|
| 8 |
+
|
| 9 |
+
|
|
|
|
| 10 |
|
| 11 |
class BugReport(BaseModel):
|
| 12 |
"""A single GitHub-style bug report."""
|
|
|
|
| 21 |
arbitrary_types_allowed = True
|
| 22 |
|
| 23 |
|
| 24 |
+
|
|
|
|
|
|
|
| 25 |
|
| 26 |
class TriageAction(Action):
|
| 27 |
"""What the agent submits as its triage decision."""
|
server/app.py
CHANGED
|
@@ -49,21 +49,12 @@ TASKS_META = [
|
|
| 49 |
}
|
| 50 |
]
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
# The OpenEnv create_app() creates stateless endpoints that spin up
|
| 55 |
-
# a new environment per request. This breaks our resetβstep flow
|
| 56 |
-
# because step() needs the bug from reset().
|
| 57 |
-
# We maintain a shared global instance to fix this.
|
| 58 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 59 |
_global_env = BugTriageEnvironment()
|
| 60 |
|
| 61 |
|
| 62 |
-
|
| 63 |
-
# REMOVE the framework's stateless /reset and /step routes,
|
| 64 |
-
# then add our own stateful versions.
|
| 65 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 66 |
-
# Remove existing /reset and /step routes registered by create_app()
|
| 67 |
routes_to_remove = []
|
| 68 |
for route in app.routes:
|
| 69 |
if hasattr(route, "path") and route.path in ("/reset", "/step", "/state"):
|
|
@@ -105,9 +96,6 @@ def task_hard():
|
|
| 105 |
return TASKS_META[2]
|
| 106 |
|
| 107 |
|
| 108 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 109 |
-
# CUSTOM STATEFUL /reset and /step endpoints
|
| 110 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 111 |
|
| 112 |
@app.post("/reset")
|
| 113 |
async def custom_reset(request: Request):
|
|
@@ -190,10 +178,6 @@ def custom_state():
|
|
| 190 |
return state.dict()
|
| 191 |
|
| 192 |
|
| 193 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 194 |
-
# Per-task reset shortcuts (convenience)
|
| 195 |
-
# βββββββββββββββββββββββββββββββββββββββββββββ
|
| 196 |
-
|
| 197 |
@app.post("/tasks/easy/reset")
|
| 198 |
def reset_easy():
|
| 199 |
global _global_env
|
|
|
|
| 49 |
}
|
| 50 |
]
|
| 51 |
|
| 52 |
+
|
| 53 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
_global_env = BugTriageEnvironment()
|
| 55 |
|
| 56 |
|
| 57 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
routes_to_remove = []
|
| 59 |
for route in app.routes:
|
| 60 |
if hasattr(route, "path") and route.path in ("/reset", "/step", "/state"):
|
|
|
|
| 96 |
return TASKS_META[2]
|
| 97 |
|
| 98 |
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
@app.post("/reset")
|
| 101 |
async def custom_reset(request: Request):
|
|
|
|
| 178 |
return state.dict()
|
| 179 |
|
| 180 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
@app.post("/tasks/easy/reset")
|
| 182 |
def reset_easy():
|
| 183 |
global _global_env
|
server/task.py
CHANGED
|
@@ -6,9 +6,9 @@ sys.path.insert(0, "/app")
|
|
| 6 |
from typing import Tuple, List
|
| 7 |
from model import BugReport, TriageAction
|
| 8 |
|
| 9 |
-
|
| 10 |
# BUG REPORT DATASET
|
| 11 |
-
|
| 12 |
|
| 13 |
TASKS = {
|
| 14 |
"easy": {
|
|
@@ -228,18 +228,18 @@ TASKS = {
|
|
| 228 |
}
|
| 229 |
|
| 230 |
|
| 231 |
-
|
| 232 |
# TASK SAMPLER β picks a random bug each reset
|
| 233 |
-
|
| 234 |
|
| 235 |
def sample_bug(task_key: str) -> BugReport:
|
| 236 |
"""Return a random bug from the given task's pool."""
|
| 237 |
return random.choice(TASKS[task_key]["bugs"])
|
| 238 |
|
| 239 |
|
| 240 |
-
|
| 241 |
# GRADERS
|
| 242 |
-
|
| 243 |
|
| 244 |
PRIORITY_ORDER = {"P0": 0, "P1": 1, "P2": 2, "P3": 3}
|
| 245 |
|
|
|
|
| 6 |
from typing import Tuple, List
|
| 7 |
from model import BugReport, TriageAction
|
| 8 |
|
| 9 |
+
|
| 10 |
# BUG REPORT DATASET
|
| 11 |
+
|
| 12 |
|
| 13 |
TASKS = {
|
| 14 |
"easy": {
|
|
|
|
| 228 |
}
|
| 229 |
|
| 230 |
|
| 231 |
+
|
| 232 |
# TASK SAMPLER β picks a random bug each reset
|
| 233 |
+
|
| 234 |
|
| 235 |
def sample_bug(task_key: str) -> BugReport:
|
| 236 |
"""Return a random bug from the given task's pool."""
|
| 237 |
return random.choice(TASKS[task_key]["bugs"])
|
| 238 |
|
| 239 |
|
| 240 |
+
|
| 241 |
# GRADERS
|
| 242 |
+
|
| 243 |
|
| 244 |
PRIORITY_ORDER = {"P0": 0, "P1": 1, "P2": 2, "P3": 3}
|
| 245 |
|