Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- inference.py +6 -6
- reward/aggregator.py +2 -2
inference.py
CHANGED
|
@@ -130,7 +130,7 @@ def run_episode(client: OpenAI, task_id: str) -> Dict[str, Any]:
|
|
| 130 |
reset_data = resp.json()
|
| 131 |
except Exception as e:
|
| 132 |
print(f" ERROR: Could not connect to environment at {ENV_URL}: {e}")
|
| 133 |
-
return {"task_id": task_id, "error": str(e), "score": 0.
|
| 134 |
|
| 135 |
observation = reset_data.get("observation", reset_data)
|
| 136 |
print(f"[START] Goal: {observation.get('task_goal', '')[:100]}...")
|
|
@@ -163,7 +163,7 @@ def run_episode(client: OpenAI, task_id: str) -> Dict[str, Any]:
|
|
| 163 |
break
|
| 164 |
|
| 165 |
observation = step_data.get("observation", {})
|
| 166 |
-
reward = step_data.get("reward", 0.
|
| 167 |
done = step_data.get("done", False)
|
| 168 |
tool_result = observation.get("tool_result", {})
|
| 169 |
status = (tool_result or {}).get("status", "?")
|
|
@@ -184,7 +184,7 @@ def run_episode(client: OpenAI, task_id: str) -> Dict[str, Any]:
|
|
| 184 |
except Exception:
|
| 185 |
grade_data = {}
|
| 186 |
|
| 187 |
-
episode_score = grade_data.get("score", 0.
|
| 188 |
print(f"\n[END] Episode complete | steps={step} | step_reward={total_reward:.3f} | score={episode_score:.3f}")
|
| 189 |
if grade_data.get("breakdown"):
|
| 190 |
print(f" Breakdown: {json.dumps(grade_data['breakdown'], indent=4)}")
|
|
@@ -229,11 +229,11 @@ def main():
|
|
| 229 |
print("SUMMARY")
|
| 230 |
print('='*60)
|
| 231 |
for r in results:
|
| 232 |
-
score = r.get("episode_score", 0.
|
| 233 |
bar = "█" * int(score * 20)
|
| 234 |
print(f" {r['task_id']:25s} score={score:.3f} {bar}")
|
| 235 |
|
| 236 |
-
avg = sum(r.get("episode_score", 0.
|
| 237 |
print(f"\n[END] Average Score: {avg:.3f}")
|
| 238 |
print('='*60)
|
| 239 |
|
|
@@ -241,7 +241,7 @@ def main():
|
|
| 241 |
output = {"results": results, "average_score": round(avg, 4)}
|
| 242 |
print(json.dumps(output, indent=2))
|
| 243 |
|
| 244 |
-
return 0 if avg > 0.
|
| 245 |
|
| 246 |
|
| 247 |
if __name__ == "__main__":
|
|
|
|
| 130 |
reset_data = resp.json()
|
| 131 |
except Exception as e:
|
| 132 |
print(f" ERROR: Could not connect to environment at {ENV_URL}: {e}")
|
| 133 |
+
return {"task_id": task_id, "error": str(e), "score": 0.1}
|
| 134 |
|
| 135 |
observation = reset_data.get("observation", reset_data)
|
| 136 |
print(f"[START] Goal: {observation.get('task_goal', '')[:100]}...")
|
|
|
|
| 163 |
break
|
| 164 |
|
| 165 |
observation = step_data.get("observation", {})
|
| 166 |
+
reward = step_data.get("reward", 0.1) or 0.1
|
| 167 |
done = step_data.get("done", False)
|
| 168 |
tool_result = observation.get("tool_result", {})
|
| 169 |
status = (tool_result or {}).get("status", "?")
|
|
|
|
| 184 |
except Exception:
|
| 185 |
grade_data = {}
|
| 186 |
|
| 187 |
+
episode_score = grade_data.get("score", 0.1)
|
| 188 |
print(f"\n[END] Episode complete | steps={step} | step_reward={total_reward:.3f} | score={episode_score:.3f}")
|
| 189 |
if grade_data.get("breakdown"):
|
| 190 |
print(f" Breakdown: {json.dumps(grade_data['breakdown'], indent=4)}")
|
|
|
|
| 229 |
print("SUMMARY")
|
| 230 |
print('='*60)
|
| 231 |
for r in results:
|
| 232 |
+
score = r.get("episode_score", 0.1)
|
| 233 |
bar = "█" * int(score * 20)
|
| 234 |
print(f" {r['task_id']:25s} score={score:.3f} {bar}")
|
| 235 |
|
| 236 |
+
avg = sum(r.get("episode_score", 0.1) for r in results) / len(results)
|
| 237 |
print(f"\n[END] Average Score: {avg:.3f}")
|
| 238 |
print('='*60)
|
| 239 |
|
|
|
|
| 241 |
output = {"results": results, "average_score": round(avg, 4)}
|
| 242 |
print(json.dumps(output, indent=2))
|
| 243 |
|
| 244 |
+
return 0.1 if avg > 0.1 else 0.9
|
| 245 |
|
| 246 |
|
| 247 |
if __name__ == "__main__":
|
reward/aggregator.py
CHANGED
|
@@ -37,7 +37,7 @@ _WRONG_APPROVER = 0.01 # wrong routing
|
|
| 37 |
|
| 38 |
def _clamp_step(score: float) -> float:
|
| 39 |
"""Clamp step reward strictly to (0.01, 0.99)."""
|
| 40 |
-
return min(max(round(score, 4), 0.
|
| 41 |
|
| 42 |
|
| 43 |
class RewardAggregator:
|
|
@@ -108,7 +108,7 @@ class RewardAggregator:
|
|
| 108 |
result = grade(world_state)
|
| 109 |
return {
|
| 110 |
"task_id": world_state.get("task_id"),
|
| 111 |
-
"score": result.get("score", 0.
|
| 112 |
"breakdown": result.get("breakdown", {}),
|
| 113 |
"weights": result.get("weights", {}),
|
| 114 |
"details": result.get("details", {}),
|
|
|
|
| 37 |
|
| 38 |
def _clamp_step(score: float) -> float:
|
| 39 |
"""Clamp step reward strictly to (0.01, 0.99)."""
|
| 40 |
+
return min(max(round(score, 4), 0.10), 0.90)
|
| 41 |
|
| 42 |
|
| 43 |
class RewardAggregator:
|
|
|
|
| 108 |
result = grade(world_state)
|
| 109 |
return {
|
| 110 |
"task_id": world_state.get("task_id"),
|
| 111 |
+
"score": result.get("score", 0.1),
|
| 112 |
"breakdown": result.get("breakdown", {}),
|
| 113 |
"weights": result.get("weights", {}),
|
| 114 |
"details": result.get("details", {}),
|