Spaces:
Sleeping
Sleeping
Commit Β·
70022c4
1
Parent(s): de442f8
Add structured output in stdout in inference.py
Browse files- inference.py +3 -0
inference.py
CHANGED
|
@@ -75,6 +75,7 @@ def step_env(response: str) -> dict:
|
|
| 75 |
def run_task(task_id: str) -> dict:
|
| 76 |
"""Run one full episode of a task and return results."""
|
| 77 |
print(f"\nββ Task: {task_id} βββββββββββββββββββββββββββββββββ")
|
|
|
|
| 78 |
|
| 79 |
reset_data = reset_env(task_id)
|
| 80 |
obs = reset_data["observation"]
|
|
@@ -102,6 +103,7 @@ def run_task(task_id: str) -> dict:
|
|
| 102 |
turns += 1
|
| 103 |
|
| 104 |
print(f" Reward: {reward:.3f} | Breakdown: {result['reward']['breakdown']}")
|
|
|
|
| 105 |
|
| 106 |
if result["done"]:
|
| 107 |
break
|
|
@@ -111,6 +113,7 @@ def run_task(task_id: str) -> dict:
|
|
| 111 |
|
| 112 |
final_score = round(min(total_score / max(turns, 1), 1.0), 3)
|
| 113 |
print(f" ββ Final Score: {final_score} ({'PASS' if final_score >= 0.5 else 'FAIL'})")
|
|
|
|
| 114 |
|
| 115 |
return {
|
| 116 |
"task": task_id,
|
|
|
|
| 75 |
def run_task(task_id: str) -> dict:
|
| 76 |
"""Run one full episode of a task and return results."""
|
| 77 |
print(f"\nββ Task: {task_id} βββββββββββββββββββββββββββββββββ")
|
| 78 |
+
print(f"[START] task={task_id}", flush=True)
|
| 79 |
|
| 80 |
reset_data = reset_env(task_id)
|
| 81 |
obs = reset_data["observation"]
|
|
|
|
| 103 |
turns += 1
|
| 104 |
|
| 105 |
print(f" Reward: {reward:.3f} | Breakdown: {result['reward']['breakdown']}")
|
| 106 |
+
print(f"[STEP] step={turns} reward={reward}", flush=True)
|
| 107 |
|
| 108 |
if result["done"]:
|
| 109 |
break
|
|
|
|
| 113 |
|
| 114 |
final_score = round(min(total_score / max(turns, 1), 1.0), 3)
|
| 115 |
print(f" ββ Final Score: {final_score} ({'PASS' if final_score >= 0.5 else 'FAIL'})")
|
| 116 |
+
print(f"[END] task={task_id} score={final_score} steps={turns}", flush=True)
|
| 117 |
|
| 118 |
return {
|
| 119 |
"task": task_id,
|