Rockerleo commited on
Commit
69c75cc
Β·
verified Β·
1 Parent(s): 459e79a

Upload folder using huggingface_hub

Browse files
server/inference.py CHANGED
@@ -188,10 +188,12 @@ def log_step(
188
 
189
 
190
  def log_end(
191
- success: bool, steps: int, score: float = 0.0, rewards: List[float] = None
192
  ) -> None:
193
  if rewards is None:
194
  rewards = []
 
 
195
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
196
  print(
197
  f"[END] success={str(success).lower()} steps={steps} score={score:.4f} rewards={rewards_str}",
@@ -284,11 +286,11 @@ def _update_openenv_state(
284
  OPENENV_STATE.seed = seed
285
  OPENENV_STATE.step_count = step_count
286
  OPENENV_STATE.max_steps = max_steps
287
- OPENENV_STATE.end_score = end_score
288
  OPENENV_STATE.rewards = rewards
289
  OPENENV_STATE.artifacts_read = artifacts_read
290
  OPENENV_STATE.timestamp = ts
291
- OPENENV_STATE.scores[task_id] = end_score
292
 
293
 
294
  def call_llm(messages: List[Dict], model_name: Optional[str] = None) -> str:
@@ -572,6 +574,8 @@ def run_task(task_id: str, seed: int = 42) -> float:
572
  except Exception as e:
573
  print(f" [ERROR] Task {task_id} failed: {e}", flush=True, file=sys.stderr)
574
  finally:
 
 
575
  success = final_score >= SUCCESS_THRESHOLD
576
  log_end(success=success, steps=step_num, score=final_score, rewards=rewards)
577
 
 
188
 
189
 
190
  def log_end(
191
+ success: bool, steps: int, score: float = 0.01, rewards: List[float] = None
192
  ) -> None:
193
  if rewards is None:
194
  rewards = []
195
+ # Ensure score is strictly between 0 and 1
196
+ score = max(0.01, min(0.99, score))
197
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
198
  print(
199
  f"[END] success={str(success).lower()} steps={steps} score={score:.4f} rewards={rewards_str}",
 
286
  OPENENV_STATE.seed = seed
287
  OPENENV_STATE.step_count = step_count
288
  OPENENV_STATE.max_steps = max_steps
289
+ OPENENV_STATE.end_score = max(0.01, min(0.99, end_score))
290
  OPENENV_STATE.rewards = rewards
291
  OPENENV_STATE.artifacts_read = artifacts_read
292
  OPENENV_STATE.timestamp = ts
293
+ OPENENV_STATE.scores[task_id] = max(0.01, min(0.99, end_score))
294
 
295
 
296
  def call_llm(messages: List[Dict], model_name: Optional[str] = None) -> str:
 
574
  except Exception as e:
575
  print(f" [ERROR] Task {task_id} failed: {e}", flush=True, file=sys.stderr)
576
  finally:
577
+ # Validator requires scores strictly between 0 and 1
578
+ final_score = max(0.01, min(0.99, final_score))
579
  success = final_score >= SUCCESS_THRESHOLD
580
  log_end(success=success, steps=step_num, score=final_score, rewards=rewards)
581
 
server/mlops_environment.py CHANGED
@@ -144,7 +144,7 @@ class MLOpsEnvironment:
144
  self._last_read_filters: Dict[str, str] = {}
145
  self._sanity_checks_run: List[str] = []
146
  self._duplicate_queries = 0
147
- self._current_score = 0.0
148
  self._messages: List[str] = []
149
 
150
  # ── OpenEnv API ───────────────────────────────────────────────────────────
 
144
  self._last_read_filters: Dict[str, str] = {}
145
  self._sanity_checks_run: List[str] = []
146
  self._duplicate_queries = 0
147
+ self._current_score = 0.01
148
  self._messages: List[str] = []
149
 
150
  # ── OpenEnv API ───────────────────────────────────────────────────────────
server/openenv_state.py CHANGED
@@ -26,8 +26,8 @@ OPENENV_STATE: OpenEnvState = OpenEnvState(
26
  seed=0,
27
  step_count=0,
28
  max_steps=30,
29
- scores={"easy": 0.0, "medium": 0.0, "hard": 0.0},
30
- end_score=0.0,
31
  rewards=[],
32
  artifacts_read=[],
33
  timestamp=datetime.utcnow().isoformat(),
 
26
  seed=0,
27
  step_count=0,
28
  max_steps=30,
29
+ scores={"easy": 0.01, "medium": 0.01, "hard": 0.01},
30
+ end_score=0.01,
31
  rewards=[],
32
  artifacts_read=[],
33
  timestamp=datetime.utcnow().isoformat(),