Upload folder using huggingface_hub
Browse files- app/environment/core.py +8 -8
- app/environment/graders.py +5 -5
- data/learning_archive.json +184 -0
- data/learning_memory.json +9 -9
- data/trajectory_history.jsonl +5 -0
app/environment/core.py
CHANGED
|
@@ -463,13 +463,13 @@ class EmergencyEnv:
|
|
| 463 |
def _improvement_bonus(self, status: str) -> float:
|
| 464 |
if self.last_outcome_status is None:
|
| 465 |
self.last_outcome_status = status
|
| 466 |
-
return
|
| 467 |
|
| 468 |
delta = OUTCOME_SCORE[status] - OUTCOME_SCORE[self.last_outcome_status]
|
| 469 |
self.last_outcome_status = status
|
| 470 |
if delta > 0:
|
| 471 |
return 0.04
|
| 472 |
-
return
|
| 473 |
|
| 474 |
def _specialization_match(self, hospital: HospitalState) -> bool:
|
| 475 |
assert self.state_data is not None
|
|
@@ -653,7 +653,7 @@ class EmergencyEnv:
|
|
| 653 |
assert self.state_data is not None
|
| 654 |
|
| 655 |
if selected.icu_display != "unknown":
|
| 656 |
-
return arrival_outcome,
|
| 657 |
|
| 658 |
difficulty = self.state_data.scenario_difficulty
|
| 659 |
guess_success_prob = {
|
|
@@ -666,7 +666,7 @@ class EmergencyEnv:
|
|
| 666 |
if guess_correct:
|
| 667 |
return (
|
| 668 |
arrival_outcome,
|
| 669 |
-
|
| 670 |
"Hidden case: risky ICU-unknown guess was correct this time.",
|
| 671 |
)
|
| 672 |
|
|
@@ -690,7 +690,7 @@ class EmergencyEnv:
|
|
| 690 |
) -> tuple[ArrivalOutcome, float, str | None]:
|
| 691 |
"""Late-arrival operational shocks: ICU/doctor/bed/equipment can fail at handover."""
|
| 692 |
if arrival_outcome.status == "rejected":
|
| 693 |
-
return arrival_outcome,
|
| 694 |
|
| 695 |
shock_prob = {
|
| 696 |
"easy": 0.03,
|
|
@@ -698,11 +698,11 @@ class EmergencyEnv:
|
|
| 698 |
"hard": 0.10,
|
| 699 |
}.get(difficulty, 0.14)
|
| 700 |
if self._rng.random() >= shock_prob:
|
| 701 |
-
return arrival_outcome,
|
| 702 |
|
| 703 |
v = arrival_outcome.validation_details
|
| 704 |
if v is None:
|
| 705 |
-
return arrival_outcome,
|
| 706 |
|
| 707 |
shock = self._rng.choice([
|
| 708 |
"doctor_unavailable",
|
|
@@ -1104,7 +1104,7 @@ class EmergencyEnv:
|
|
| 1104 |
|
| 1105 |
def _progress_score(self) -> float:
|
| 1106 |
if not self.trajectory:
|
| 1107 |
-
return
|
| 1108 |
raw = sum(float(t["reward"]) for t in self.trajectory) / len(self.trajectory)
|
| 1109 |
return max(MIN_REWARD, min(MAX_REWARD, raw))
|
| 1110 |
|
|
|
|
| 463 |
def _improvement_bonus(self, status: str) -> float:
|
| 464 |
if self.last_outcome_status is None:
|
| 465 |
self.last_outcome_status = status
|
| 466 |
+
return MIN_REWARD
|
| 467 |
|
| 468 |
delta = OUTCOME_SCORE[status] - OUTCOME_SCORE[self.last_outcome_status]
|
| 469 |
self.last_outcome_status = status
|
| 470 |
if delta > 0:
|
| 471 |
return 0.04
|
| 472 |
+
return MIN_REWARD
|
| 473 |
|
| 474 |
def _specialization_match(self, hospital: HospitalState) -> bool:
|
| 475 |
assert self.state_data is not None
|
|
|
|
| 653 |
assert self.state_data is not None
|
| 654 |
|
| 655 |
if selected.icu_display != "unknown":
|
| 656 |
+
return arrival_outcome, MIN_REWARD, None
|
| 657 |
|
| 658 |
difficulty = self.state_data.scenario_difficulty
|
| 659 |
guess_success_prob = {
|
|
|
|
| 666 |
if guess_correct:
|
| 667 |
return (
|
| 668 |
arrival_outcome,
|
| 669 |
+
MIN_REWARD,
|
| 670 |
"Hidden case: risky ICU-unknown guess was correct this time.",
|
| 671 |
)
|
| 672 |
|
|
|
|
| 690 |
) -> tuple[ArrivalOutcome, float, str | None]:
|
| 691 |
"""Late-arrival operational shocks: ICU/doctor/bed/equipment can fail at handover."""
|
| 692 |
if arrival_outcome.status == "rejected":
|
| 693 |
+
return arrival_outcome, MIN_REWARD, None
|
| 694 |
|
| 695 |
shock_prob = {
|
| 696 |
"easy": 0.03,
|
|
|
|
| 698 |
"hard": 0.10,
|
| 699 |
}.get(difficulty, 0.14)
|
| 700 |
if self._rng.random() >= shock_prob:
|
| 701 |
+
return arrival_outcome, MIN_REWARD, None
|
| 702 |
|
| 703 |
v = arrival_outcome.validation_details
|
| 704 |
if v is None:
|
| 705 |
+
return arrival_outcome, MIN_REWARD, None
|
| 706 |
|
| 707 |
shock = self._rng.choice([
|
| 708 |
"doctor_unavailable",
|
|
|
|
| 1104 |
|
| 1105 |
def _progress_score(self) -> float:
|
| 1106 |
if not self.trajectory:
|
| 1107 |
+
return MIN_REWARD
|
| 1108 |
raw = sum(float(t["reward"]) for t in self.trajectory) / len(self.trajectory)
|
| 1109 |
return max(MIN_REWARD, min(MAX_REWARD, raw))
|
| 1110 |
|
app/environment/graders.py
CHANGED
|
@@ -7,8 +7,8 @@ MAX_SCORE = 0.999
|
|
| 7 |
|
| 8 |
def _norm_margin(travel_time: float, critical_limit: float) -> float:
|
| 9 |
if critical_limit <= 0:
|
| 10 |
-
return
|
| 11 |
-
return max(
|
| 12 |
|
| 13 |
|
| 14 |
def grade_task(
|
|
@@ -78,14 +78,14 @@ def grade_task(
|
|
| 78 |
|
| 79 |
if difficulty == "easy":
|
| 80 |
threshold = 0.73
|
| 81 |
-
score = min(
|
| 82 |
elif difficulty == "medium":
|
| 83 |
threshold = 0.62
|
| 84 |
score = base
|
| 85 |
else: # hard
|
| 86 |
threshold = 0.53
|
| 87 |
-
hard_bonus = 0.15 if success_rate >= 0.5 else (0.05 if success_rate > 0.0 else
|
| 88 |
-
score = min(
|
| 89 |
|
| 90 |
score = max(MIN_SCORE, min(MAX_SCORE, score))
|
| 91 |
|
|
|
|
| 7 |
|
| 8 |
def _norm_margin(travel_time: float, critical_limit: float) -> float:
|
| 9 |
if critical_limit <= 0:
|
| 10 |
+
return MIN_SCORE
|
| 11 |
+
return max(MIN_SCORE, min(MAX_SCORE, (critical_limit - travel_time) / critical_limit))
|
| 12 |
|
| 13 |
|
| 14 |
def grade_task(
|
|
|
|
| 78 |
|
| 79 |
if difficulty == "easy":
|
| 80 |
threshold = 0.73
|
| 81 |
+
score = min(MAX_SCORE, base + 0.1)
|
| 82 |
elif difficulty == "medium":
|
| 83 |
threshold = 0.62
|
| 84 |
score = base
|
| 85 |
else: # hard
|
| 86 |
threshold = 0.53
|
| 87 |
+
hard_bonus = 0.15 if success_rate >= 0.5 else (0.05 if success_rate > 0.0 else MIN_SCORE)
|
| 88 |
+
score = min(MAX_SCORE, base + hard_bonus)
|
| 89 |
|
| 90 |
score = max(MIN_SCORE, min(MAX_SCORE, score))
|
| 91 |
|
data/learning_archive.json
CHANGED
|
@@ -7227,6 +7227,149 @@
|
|
| 7227 |
"best_scenario_name": "Wildfire Front (Evacuation Gridlock)",
|
| 7228 |
"best_difficulty": "hard",
|
| 7229 |
"best_required_specialization": "general"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7230 |
}
|
| 7231 |
},
|
| 7232 |
"episodes": [
|
|
@@ -9064,6 +9207,47 @@
|
|
| 9064 |
"H3"
|
| 9065 |
],
|
| 9066 |
"timestamp": "2026-04-09T04:44:45.351743+00:00"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9067 |
}
|
| 9068 |
]
|
| 9069 |
}
|
|
|
|
| 7227 |
"best_scenario_name": "Wildfire Front (Evacuation Gridlock)",
|
| 7228 |
"best_difficulty": "hard",
|
| 7229 |
"best_required_specialization": "general"
|
| 7230 |
+
},
|
| 7231 |
+
"42|acde_easy": {
|
| 7232 |
+
"attempts": 1,
|
| 7233 |
+
"best_score": 0.99,
|
| 7234 |
+
"best_actions": [
|
| 7235 |
+
"H1"
|
| 7236 |
+
],
|
| 7237 |
+
"best_steps": 1,
|
| 7238 |
+
"step_stats": {
|
| 7239 |
+
"1": {
|
| 7240 |
+
"H1": {
|
| 7241 |
+
"count": 1,
|
| 7242 |
+
"success": 1,
|
| 7243 |
+
"accepted": 1,
|
| 7244 |
+
"partial": 0,
|
| 7245 |
+
"rejected": 0,
|
| 7246 |
+
"total_reward": 0.999,
|
| 7247 |
+
"avg_reward": 0.999,
|
| 7248 |
+
"last_status": "ACCEPTED",
|
| 7249 |
+
"last_reason": "Patient admitted and treatment began",
|
| 7250 |
+
"success_rate": 1.0
|
| 7251 |
+
}
|
| 7252 |
+
}
|
| 7253 |
+
},
|
| 7254 |
+
"last_score": 0.99,
|
| 7255 |
+
"last_success": true,
|
| 7256 |
+
"last_run_at": "2026-04-09T04:57:25.535395+00:00",
|
| 7257 |
+
"last_actions": [
|
| 7258 |
+
"H1"
|
| 7259 |
+
],
|
| 7260 |
+
"last_required_specialization": "general",
|
| 7261 |
+
"last_scenario_type": "fire",
|
| 7262 |
+
"last_scenario_name": "Apartment Fire (Smoke Inhalation)",
|
| 7263 |
+
"best_success": true,
|
| 7264 |
+
"best_scenario_name": "Apartment Fire (Smoke Inhalation)",
|
| 7265 |
+
"best_difficulty": "easy",
|
| 7266 |
+
"best_required_specialization": "general"
|
| 7267 |
+
},
|
| 7268 |
+
"43|acde_medium": {
|
| 7269 |
+
"attempts": 1,
|
| 7270 |
+
"best_score": 0.21539999999999998,
|
| 7271 |
+
"best_actions": [
|
| 7272 |
+
"H2"
|
| 7273 |
+
],
|
| 7274 |
+
"best_steps": 1,
|
| 7275 |
+
"step_stats": {
|
| 7276 |
+
"1": {
|
| 7277 |
+
"H2": {
|
| 7278 |
+
"count": 1,
|
| 7279 |
+
"success": 0,
|
| 7280 |
+
"accepted": 0,
|
| 7281 |
+
"partial": 0,
|
| 7282 |
+
"rejected": 1,
|
| 7283 |
+
"total_reward": 0.109,
|
| 7284 |
+
"avg_reward": 0.109,
|
| 7285 |
+
"last_status": "REJECTED",
|
| 7286 |
+
"last_reason": "Condition became non-transferable during delay; immediate critical care failed",
|
| 7287 |
+
"success_rate": 0.0
|
| 7288 |
+
}
|
| 7289 |
+
}
|
| 7290 |
+
},
|
| 7291 |
+
"last_score": 0.21539999999999998,
|
| 7292 |
+
"last_success": false,
|
| 7293 |
+
"last_run_at": "2026-04-09T04:57:25.817773+00:00",
|
| 7294 |
+
"last_actions": [
|
| 7295 |
+
"H2"
|
| 7296 |
+
],
|
| 7297 |
+
"last_required_specialization": "cardiac",
|
| 7298 |
+
"last_scenario_type": "medical",
|
| 7299 |
+
"last_scenario_name": "Heart Attack (Unstable)",
|
| 7300 |
+
"best_success": false,
|
| 7301 |
+
"best_scenario_name": "Heart Attack (Unstable)",
|
| 7302 |
+
"best_difficulty": "medium",
|
| 7303 |
+
"best_required_specialization": "cardiac"
|
| 7304 |
+
},
|
| 7305 |
+
"44|acde_hard": {
|
| 7306 |
+
"attempts": 1,
|
| 7307 |
+
"best_score": 0.15059999999999998,
|
| 7308 |
+
"best_actions": [
|
| 7309 |
+
"H5",
|
| 7310 |
+
"H2",
|
| 7311 |
+
"H5"
|
| 7312 |
+
],
|
| 7313 |
+
"best_steps": 3,
|
| 7314 |
+
"step_stats": {
|
| 7315 |
+
"1": {
|
| 7316 |
+
"H5": {
|
| 7317 |
+
"count": 1,
|
| 7318 |
+
"success": 0,
|
| 7319 |
+
"accepted": 0,
|
| 7320 |
+
"partial": 0,
|
| 7321 |
+
"rejected": 1,
|
| 7322 |
+
"total_reward": 0.001,
|
| 7323 |
+
"avg_reward": 0.001,
|
| 7324 |
+
"last_status": "REJECTED",
|
| 7325 |
+
"last_reason": "Hospital cannot admit: ICU unavailable",
|
| 7326 |
+
"success_rate": 0.0
|
| 7327 |
+
}
|
| 7328 |
+
},
|
| 7329 |
+
"2": {
|
| 7330 |
+
"H2": {
|
| 7331 |
+
"count": 1,
|
| 7332 |
+
"success": 0,
|
| 7333 |
+
"accepted": 0,
|
| 7334 |
+
"partial": 0,
|
| 7335 |
+
"rejected": 1,
|
| 7336 |
+
"total_reward": 0.001,
|
| 7337 |
+
"avg_reward": 0.001,
|
| 7338 |
+
"last_status": "REJECTED",
|
| 7339 |
+
"last_reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required.",
|
| 7340 |
+
"success_rate": 0.0
|
| 7341 |
+
}
|
| 7342 |
+
},
|
| 7343 |
+
"3": {
|
| 7344 |
+
"H5": {
|
| 7345 |
+
"count": 1,
|
| 7346 |
+
"success": 0,
|
| 7347 |
+
"accepted": 0,
|
| 7348 |
+
"partial": 0,
|
| 7349 |
+
"rejected": 1,
|
| 7350 |
+
"total_reward": 0.001,
|
| 7351 |
+
"avg_reward": 0.001,
|
| 7352 |
+
"last_status": "REJECTED",
|
| 7353 |
+
"last_reason": "Condition became non-transferable during delay; immediate critical care failed",
|
| 7354 |
+
"success_rate": 0.0
|
| 7355 |
+
}
|
| 7356 |
+
}
|
| 7357 |
+
},
|
| 7358 |
+
"last_score": 0.15059999999999998,
|
| 7359 |
+
"last_success": false,
|
| 7360 |
+
"last_run_at": "2026-04-09T04:57:26.758443+00:00",
|
| 7361 |
+
"last_actions": [
|
| 7362 |
+
"H5",
|
| 7363 |
+
"H2",
|
| 7364 |
+
"H5"
|
| 7365 |
+
],
|
| 7366 |
+
"last_required_specialization": "trauma",
|
| 7367 |
+
"last_scenario_type": "accident",
|
| 7368 |
+
"last_scenario_name": "Bridge Crash (Infrastructure Blocked)",
|
| 7369 |
+
"best_success": false,
|
| 7370 |
+
"best_scenario_name": "Bridge Crash (Infrastructure Blocked)",
|
| 7371 |
+
"best_difficulty": "hard",
|
| 7372 |
+
"best_required_specialization": "trauma"
|
| 7373 |
}
|
| 7374 |
},
|
| 7375 |
"episodes": [
|
|
|
|
| 9207 |
"H3"
|
| 9208 |
],
|
| 9209 |
"timestamp": "2026-04-09T04:44:45.351743+00:00"
|
| 9210 |
+
},
|
| 9211 |
+
{
|
| 9212 |
+
"seed": 42,
|
| 9213 |
+
"task_id": "acde_easy",
|
| 9214 |
+
"difficulty": "easy",
|
| 9215 |
+
"required_specialization": "general",
|
| 9216 |
+
"scenario_name": "Apartment Fire (Smoke Inhalation)",
|
| 9217 |
+
"score": 0.99,
|
| 9218 |
+
"success": true,
|
| 9219 |
+
"actions": [
|
| 9220 |
+
"H1"
|
| 9221 |
+
],
|
| 9222 |
+
"timestamp": "2026-04-09T04:57:25.535395+00:00"
|
| 9223 |
+
},
|
| 9224 |
+
{
|
| 9225 |
+
"seed": 43,
|
| 9226 |
+
"task_id": "acde_medium",
|
| 9227 |
+
"difficulty": "medium",
|
| 9228 |
+
"required_specialization": "cardiac",
|
| 9229 |
+
"scenario_name": "Heart Attack (Unstable)",
|
| 9230 |
+
"score": 0.21539999999999998,
|
| 9231 |
+
"success": false,
|
| 9232 |
+
"actions": [
|
| 9233 |
+
"H2"
|
| 9234 |
+
],
|
| 9235 |
+
"timestamp": "2026-04-09T04:57:25.817773+00:00"
|
| 9236 |
+
},
|
| 9237 |
+
{
|
| 9238 |
+
"seed": 44,
|
| 9239 |
+
"task_id": "acde_hard",
|
| 9240 |
+
"difficulty": "hard",
|
| 9241 |
+
"required_specialization": "trauma",
|
| 9242 |
+
"scenario_name": "Bridge Crash (Infrastructure Blocked)",
|
| 9243 |
+
"score": 0.15059999999999998,
|
| 9244 |
+
"success": false,
|
| 9245 |
+
"actions": [
|
| 9246 |
+
"H5",
|
| 9247 |
+
"H2",
|
| 9248 |
+
"H5"
|
| 9249 |
+
],
|
| 9250 |
+
"timestamp": "2026-04-09T04:57:26.758443+00:00"
|
| 9251 |
}
|
| 9252 |
]
|
| 9253 |
}
|
data/learning_memory.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
"H2": {
|
| 3 |
"success": 109,
|
| 4 |
-
"fail":
|
| 5 |
-
"avg": 0.
|
| 6 |
"accepted": 109,
|
| 7 |
-
"rejected":
|
| 8 |
},
|
| 9 |
"H6": {
|
| 10 |
"success": 50,
|
|
@@ -15,16 +15,16 @@
|
|
| 15 |
},
|
| 16 |
"H5": {
|
| 17 |
"success": 112,
|
| 18 |
-
"fail":
|
| 19 |
-
"avg": 0.
|
| 20 |
"accepted": 112,
|
| 21 |
-
"rejected":
|
| 22 |
},
|
| 23 |
"H1": {
|
| 24 |
-
"success":
|
| 25 |
"fail": 107,
|
| 26 |
-
"avg": 0.
|
| 27 |
-
"accepted":
|
| 28 |
"rejected": 107
|
| 29 |
},
|
| 30 |
"H3": {
|
|
|
|
| 1 |
{
|
| 2 |
"H2": {
|
| 3 |
"success": 109,
|
| 4 |
+
"fail": 211,
|
| 5 |
+
"avg": 0.3393390625000006,
|
| 6 |
"accepted": 109,
|
| 7 |
+
"rejected": 211
|
| 8 |
},
|
| 9 |
"H6": {
|
| 10 |
"success": 50,
|
|
|
|
| 15 |
},
|
| 16 |
"H5": {
|
| 17 |
"success": 112,
|
| 18 |
+
"fail": 176,
|
| 19 |
+
"avg": 0.38393784722222196,
|
| 20 |
"accepted": 112,
|
| 21 |
+
"rejected": 176
|
| 22 |
},
|
| 23 |
"H1": {
|
| 24 |
+
"success": 110,
|
| 25 |
"fail": 107,
|
| 26 |
+
"avg": 0.4174359447004605,
|
| 27 |
+
"accepted": 110,
|
| 28 |
"rejected": 107
|
| 29 |
},
|
| 30 |
"H3": {
|
data/trajectory_history.jsonl
CHANGED
|
@@ -321,3 +321,8 @@
|
|
| 321 |
{"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.24347869935725594, "strategy": "best-route retain"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: Hospital overloaded"}, "reward": 0.001}
|
| 322 |
{"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H1", "policy_score": 0.39922290989538656, "strategy": "risk-aware policy + anti-stupidity guard"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
|
| 323 |
{"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2", "H4"], "visited_hospitals": ["H2", "H4"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H3", "policy_score": 0.08687125701068692, "strategy": "risk-aware policy + anti-stupidity guard + immediate-retry override"}, "outcome": {"status": "ACCEPTED", "reason": "Condition stabilized after progressive treatment"}, "reward": 0.4640000000000001}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
{"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.24347869935725594, "strategy": "best-route retain"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: Hospital overloaded"}, "reward": 0.001}
|
| 322 |
{"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H1", "policy_score": 0.39922290989538656, "strategy": "risk-aware policy + anti-stupidity guard"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
|
| 323 |
{"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2", "H4"], "visited_hospitals": ["H2", "H4"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H3", "policy_score": 0.08687125701068692, "strategy": "risk-aware policy + anti-stupidity guard + immediate-retry override"}, "outcome": {"status": "ACCEPTED", "reason": "Condition stabilized after progressive treatment"}, "reward": 0.4640000000000001}
|
| 324 |
+
{"seed": 42, "task": "acde_easy", "difficulty": "easy", "step": 1, "state": {"patient_condition": "serious", "remaining_time_minutes": 18.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H1", "policy_score": 0.2700762940680355, "strategy": "safe policy"}, "outcome": {"status": "ACCEPTED", "reason": "Patient admitted and treatment began"}, "reward": 0.999}
|
| 325 |
+
{"seed": 43, "task": "acde_medium", "difficulty": "medium", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 14.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.3231158217480875, "strategy": "safe policy + critical triage + guided-exploration"}, "outcome": {"status": "REJECTED", "reason": "Condition became non-transferable during delay; immediate critical care failed"}, "reward": 0.109}
|
| 326 |
+
{"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H5", "policy_score": 0.4862574941416685, "strategy": "safe policy + critical triage"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: ICU unavailable"}, "reward": 0.001}
|
| 327 |
+
{"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H2", "policy_score": 0.07027740357273131, "strategy": "risk-aware policy + immediate-retry override"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
|
| 328 |
+
{"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H5", "policy_score": 0.47341627825998767, "strategy": "risk-aware policy"}, "outcome": {"status": "REJECTED", "reason": "Condition became non-transferable during delay; immediate critical care failed"}, "reward": 0.001}
|