Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- server/inference.py +7 -3
- server/mlops_environment.py +1 -1
- server/openenv_state.py +2 -2
server/inference.py
CHANGED
|
@@ -188,10 +188,12 @@ def log_step(
|
|
| 188 |
|
| 189 |
|
| 190 |
def log_end(
|
| 191 |
-
success: bool, steps: int, score: float = 0.
|
| 192 |
) -> None:
|
| 193 |
if rewards is None:
|
| 194 |
rewards = []
|
|
|
|
|
|
|
| 195 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 196 |
print(
|
| 197 |
f"[END] success={str(success).lower()} steps={steps} score={score:.4f} rewards={rewards_str}",
|
|
@@ -284,11 +286,11 @@ def _update_openenv_state(
|
|
| 284 |
OPENENV_STATE.seed = seed
|
| 285 |
OPENENV_STATE.step_count = step_count
|
| 286 |
OPENENV_STATE.max_steps = max_steps
|
| 287 |
-
OPENENV_STATE.end_score = end_score
|
| 288 |
OPENENV_STATE.rewards = rewards
|
| 289 |
OPENENV_STATE.artifacts_read = artifacts_read
|
| 290 |
OPENENV_STATE.timestamp = ts
|
| 291 |
-
OPENENV_STATE.scores[task_id] = end_score
|
| 292 |
|
| 293 |
|
| 294 |
def call_llm(messages: List[Dict], model_name: Optional[str] = None) -> str:
|
|
@@ -572,6 +574,8 @@ def run_task(task_id: str, seed: int = 42) -> float:
|
|
| 572 |
except Exception as e:
|
| 573 |
print(f" [ERROR] Task {task_id} failed: {e}", flush=True, file=sys.stderr)
|
| 574 |
finally:
|
|
|
|
|
|
|
| 575 |
success = final_score >= SUCCESS_THRESHOLD
|
| 576 |
log_end(success=success, steps=step_num, score=final_score, rewards=rewards)
|
| 577 |
|
|
|
|
| 188 |
|
| 189 |
|
| 190 |
def log_end(
|
| 191 |
+
success: bool, steps: int, score: float = 0.01, rewards: List[float] = None
|
| 192 |
) -> None:
|
| 193 |
if rewards is None:
|
| 194 |
rewards = []
|
| 195 |
+
# Ensure score is strictly between 0 and 1
|
| 196 |
+
score = max(0.01, min(0.99, score))
|
| 197 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 198 |
print(
|
| 199 |
f"[END] success={str(success).lower()} steps={steps} score={score:.4f} rewards={rewards_str}",
|
|
|
|
| 286 |
OPENENV_STATE.seed = seed
|
| 287 |
OPENENV_STATE.step_count = step_count
|
| 288 |
OPENENV_STATE.max_steps = max_steps
|
| 289 |
+
OPENENV_STATE.end_score = max(0.01, min(0.99, end_score))
|
| 290 |
OPENENV_STATE.rewards = rewards
|
| 291 |
OPENENV_STATE.artifacts_read = artifacts_read
|
| 292 |
OPENENV_STATE.timestamp = ts
|
| 293 |
+
OPENENV_STATE.scores[task_id] = max(0.01, min(0.99, end_score))
|
| 294 |
|
| 295 |
|
| 296 |
def call_llm(messages: List[Dict], model_name: Optional[str] = None) -> str:
|
|
|
|
| 574 |
except Exception as e:
|
| 575 |
print(f" [ERROR] Task {task_id} failed: {e}", flush=True, file=sys.stderr)
|
| 576 |
finally:
|
| 577 |
+
# Validator requires scores strictly between 0 and 1
|
| 578 |
+
final_score = max(0.01, min(0.99, final_score))
|
| 579 |
success = final_score >= SUCCESS_THRESHOLD
|
| 580 |
log_end(success=success, steps=step_num, score=final_score, rewards=rewards)
|
| 581 |
|
server/mlops_environment.py
CHANGED
|
@@ -144,7 +144,7 @@ class MLOpsEnvironment:
|
|
| 144 |
self._last_read_filters: Dict[str, str] = {}
|
| 145 |
self._sanity_checks_run: List[str] = []
|
| 146 |
self._duplicate_queries = 0
|
| 147 |
-
self._current_score = 0.
|
| 148 |
self._messages: List[str] = []
|
| 149 |
|
| 150 |
# ββ OpenEnv API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 144 |
self._last_read_filters: Dict[str, str] = {}
|
| 145 |
self._sanity_checks_run: List[str] = []
|
| 146 |
self._duplicate_queries = 0
|
| 147 |
+
self._current_score = 0.01
|
| 148 |
self._messages: List[str] = []
|
| 149 |
|
| 150 |
# ββ OpenEnv API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
server/openenv_state.py
CHANGED
|
@@ -26,8 +26,8 @@ OPENENV_STATE: OpenEnvState = OpenEnvState(
|
|
| 26 |
seed=0,
|
| 27 |
step_count=0,
|
| 28 |
max_steps=30,
|
| 29 |
-
scores={"easy": 0.
|
| 30 |
-
end_score=0.
|
| 31 |
rewards=[],
|
| 32 |
artifacts_read=[],
|
| 33 |
timestamp=datetime.utcnow().isoformat(),
|
|
|
|
| 26 |
seed=0,
|
| 27 |
step_count=0,
|
| 28 |
max_steps=30,
|
| 29 |
+
scores={"easy": 0.01, "medium": 0.01, "hard": 0.01},
|
| 30 |
+
end_score=0.01,
|
| 31 |
rewards=[],
|
| 32 |
artifacts_read=[],
|
| 33 |
timestamp=datetime.utcnow().isoformat(),
|