Spaces:
Running
Running
Commit Β·
29416b7
1
Parent(s): 407fc42
error fixed
Browse files- inference.py +29 -11
inference.py
CHANGED
|
@@ -8,10 +8,10 @@ Usage:
|
|
| 8 |
python inference.py --url https://Souravdanyal-code-debug-env.hf.space
|
| 9 |
python inference.py --difficulty easy
|
| 10 |
|
| 11 |
-
STDOUT FORMAT (strictly required by evaluator):
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
"""
|
| 16 |
|
| 17 |
import os, sys, json, time, argparse, requests, re
|
|
@@ -41,18 +41,36 @@ SUCCESS_SCORE_THRESHOLD = 0.5
|
|
| 41 |
|
| 42 |
client = OpenAI(api_key=API_KEY or "dummy", base_url=API_BASE_URL)
|
| 43 |
|
| 44 |
-
# ββ Logging β STRICT FORMAT βββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
def log_start(task_id: str, env: str, model: str) -> None:
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
|
| 53 |
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
# ββ Env client ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 58 |
def env_reset(url: str, difficulty: str) -> dict:
|
|
|
|
| 8 |
python inference.py --url https://Souravdanyal-code-debug-env.hf.space
|
| 9 |
python inference.py --difficulty easy
|
| 10 |
|
| 11 |
+
STDOUT FORMAT (strictly required by evaluator - JSON):
|
| 12 |
+
{"type": "START", "task": "<id>", "env": "<benchmark>", "model": "<model>"}
|
| 13 |
+
{"type": "STEP", "step": <n>, "action": "<str>", "reward": <0.00>, "done": <bool>, "error": <msg|null>}
|
| 14 |
+
{"type": "END", "success": <bool>, "steps": <n>, "score": <0.000>, "rewards": [<r1>, <r2>, ...]}
|
| 15 |
"""
|
| 16 |
|
| 17 |
import os, sys, json, time, argparse, requests, re
|
|
|
|
| 41 |
|
| 42 |
client = OpenAI(api_key=API_KEY or "dummy", base_url=API_BASE_URL)
|
| 43 |
|
| 44 |
+
# ββ Logging β STRICT JSON FORMAT βββββββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
def log_start(task_id: str, env: str, model: str) -> None:
|
| 46 |
+
log_entry = {
|
| 47 |
+
"type": "START",
|
| 48 |
+
"task": task_id,
|
| 49 |
+
"env": env,
|
| 50 |
+
"model": model
|
| 51 |
+
}
|
| 52 |
+
print(json.dumps(log_entry), flush=True)
|
| 53 |
|
| 54 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
|
| 55 |
+
log_entry = {
|
| 56 |
+
"type": "STEP",
|
| 57 |
+
"step": step,
|
| 58 |
+
"action": action,
|
| 59 |
+
"reward": round(reward, 2),
|
| 60 |
+
"done": done,
|
| 61 |
+
"error": error
|
| 62 |
+
}
|
| 63 |
+
print(json.dumps(log_entry), flush=True)
|
| 64 |
|
| 65 |
def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
|
| 66 |
+
log_entry = {
|
| 67 |
+
"type": "END",
|
| 68 |
+
"success": success,
|
| 69 |
+
"steps": steps,
|
| 70 |
+
"score": round(score, 3),
|
| 71 |
+
"rewards": [round(r, 2) for r in rewards]
|
| 72 |
+
}
|
| 73 |
+
print(json.dumps(log_entry), flush=True)
|
| 74 |
|
| 75 |
# ββ Env client ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 76 |
def env_reset(url: str, difficulty: str) -> dict:
|