Krooz commited on
Commit
bf37d71
·
verified ·
1 Parent(s): b4cadb7

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. inference.py +6 -6
  2. reward/aggregator.py +2 -2
inference.py CHANGED
@@ -130,7 +130,7 @@ def run_episode(client: OpenAI, task_id: str) -> Dict[str, Any]:
130
  reset_data = resp.json()
131
  except Exception as e:
132
  print(f" ERROR: Could not connect to environment at {ENV_URL}: {e}")
133
- return {"task_id": task_id, "error": str(e), "score": 0.0}
134
 
135
  observation = reset_data.get("observation", reset_data)
136
  print(f"[START] Goal: {observation.get('task_goal', '')[:100]}...")
@@ -163,7 +163,7 @@ def run_episode(client: OpenAI, task_id: str) -> Dict[str, Any]:
163
  break
164
 
165
  observation = step_data.get("observation", {})
166
- reward = step_data.get("reward", 0.0) or 0.0
167
  done = step_data.get("done", False)
168
  tool_result = observation.get("tool_result", {})
169
  status = (tool_result or {}).get("status", "?")
@@ -184,7 +184,7 @@ def run_episode(client: OpenAI, task_id: str) -> Dict[str, Any]:
184
  except Exception:
185
  grade_data = {}
186
 
187
- episode_score = grade_data.get("score", 0.0)
188
  print(f"\n[END] Episode complete | steps={step} | step_reward={total_reward:.3f} | score={episode_score:.3f}")
189
  if grade_data.get("breakdown"):
190
  print(f" Breakdown: {json.dumps(grade_data['breakdown'], indent=4)}")
@@ -229,11 +229,11 @@ def main():
229
  print("SUMMARY")
230
  print('='*60)
231
  for r in results:
232
- score = r.get("episode_score", 0.0)
233
  bar = "█" * int(score * 20)
234
  print(f" {r['task_id']:25s} score={score:.3f} {bar}")
235
 
236
- avg = sum(r.get("episode_score", 0.0) for r in results) / len(results)
237
  print(f"\n[END] Average Score: {avg:.3f}")
238
  print('='*60)
239
 
@@ -241,7 +241,7 @@ def main():
241
  output = {"results": results, "average_score": round(avg, 4)}
242
  print(json.dumps(output, indent=2))
243
 
244
- return 0 if avg > 0.0 else 1
245
 
246
 
247
  if __name__ == "__main__":
 
130
  reset_data = resp.json()
131
  except Exception as e:
132
  print(f" ERROR: Could not connect to environment at {ENV_URL}: {e}")
133
+ return {"task_id": task_id, "error": str(e), "score": 0.1}
134
 
135
  observation = reset_data.get("observation", reset_data)
136
  print(f"[START] Goal: {observation.get('task_goal', '')[:100]}...")
 
163
  break
164
 
165
  observation = step_data.get("observation", {})
166
+ reward = step_data.get("reward", 0.1) or 0.1
167
  done = step_data.get("done", False)
168
  tool_result = observation.get("tool_result", {})
169
  status = (tool_result or {}).get("status", "?")
 
184
  except Exception:
185
  grade_data = {}
186
 
187
+ episode_score = grade_data.get("score", 0.1)
188
  print(f"\n[END] Episode complete | steps={step} | step_reward={total_reward:.3f} | score={episode_score:.3f}")
189
  if grade_data.get("breakdown"):
190
  print(f" Breakdown: {json.dumps(grade_data['breakdown'], indent=4)}")
 
229
  print("SUMMARY")
230
  print('='*60)
231
  for r in results:
232
+ score = r.get("episode_score", 0.1)
233
  bar = "█" * int(score * 20)
234
  print(f" {r['task_id']:25s} score={score:.3f} {bar}")
235
 
236
+ avg = sum(r.get("episode_score", 0.1) for r in results) / len(results)
237
  print(f"\n[END] Average Score: {avg:.3f}")
238
  print('='*60)
239
 
 
241
  output = {"results": results, "average_score": round(avg, 4)}
242
  print(json.dumps(output, indent=2))
243
 
244
+ return 0.1 if avg > 0.1 else 0.9
245
 
246
 
247
  if __name__ == "__main__":
reward/aggregator.py CHANGED
@@ -37,7 +37,7 @@ _WRONG_APPROVER = 0.01 # wrong routing
37
 
38
  def _clamp_step(score: float) -> float:
39
  """Clamp step reward strictly to (0.01, 0.99)."""
40
- return min(max(round(score, 4), 0.01), 0.99)
41
 
42
 
43
  class RewardAggregator:
@@ -108,7 +108,7 @@ class RewardAggregator:
108
  result = grade(world_state)
109
  return {
110
  "task_id": world_state.get("task_id"),
111
- "score": result.get("score", 0.0),
112
  "breakdown": result.get("breakdown", {}),
113
  "weights": result.get("weights", {}),
114
  "details": result.get("details", {}),
 
37
 
38
  def _clamp_step(score: float) -> float:
39
  """Clamp step reward strictly to (0.01, 0.99)."""
40
+ return min(max(round(score, 4), 0.10), 0.90)
41
 
42
 
43
  class RewardAggregator:
 
108
  result = grade(world_state)
109
  return {
110
  "task_id": world_state.get("task_id"),
111
+ "score": result.get("score", 0.1),
112
  "breakdown": result.get("breakdown", {}),
113
  "weights": result.get("weights", {}),
114
  "details": result.get("details", {}),