paramjitbaral commited on
Commit
1cb11b0
·
verified ·
1 Parent(s): 61d594c

Upload folder using huggingface_hub

Browse files
app/environment/core.py CHANGED
@@ -463,13 +463,13 @@ class EmergencyEnv:
463
  def _improvement_bonus(self, status: str) -> float:
464
  if self.last_outcome_status is None:
465
  self.last_outcome_status = status
466
- return 0.0
467
 
468
  delta = OUTCOME_SCORE[status] - OUTCOME_SCORE[self.last_outcome_status]
469
  self.last_outcome_status = status
470
  if delta > 0:
471
  return 0.04
472
- return 0.0
473
 
474
  def _specialization_match(self, hospital: HospitalState) -> bool:
475
  assert self.state_data is not None
@@ -653,7 +653,7 @@ class EmergencyEnv:
653
  assert self.state_data is not None
654
 
655
  if selected.icu_display != "unknown":
656
- return arrival_outcome, 0.0, None
657
 
658
  difficulty = self.state_data.scenario_difficulty
659
  guess_success_prob = {
@@ -666,7 +666,7 @@ class EmergencyEnv:
666
  if guess_correct:
667
  return (
668
  arrival_outcome,
669
- 0.0,
670
  "Hidden case: risky ICU-unknown guess was correct this time.",
671
  )
672
 
@@ -690,7 +690,7 @@ class EmergencyEnv:
690
  ) -> tuple[ArrivalOutcome, float, str | None]:
691
  """Late-arrival operational shocks: ICU/doctor/bed/equipment can fail at handover."""
692
  if arrival_outcome.status == "rejected":
693
- return arrival_outcome, 0.0, None
694
 
695
  shock_prob = {
696
  "easy": 0.03,
@@ -698,11 +698,11 @@ class EmergencyEnv:
698
  "hard": 0.10,
699
  }.get(difficulty, 0.14)
700
  if self._rng.random() >= shock_prob:
701
- return arrival_outcome, 0.0, None
702
 
703
  v = arrival_outcome.validation_details
704
  if v is None:
705
- return arrival_outcome, 0.0, None
706
 
707
  shock = self._rng.choice([
708
  "doctor_unavailable",
@@ -1104,7 +1104,7 @@ class EmergencyEnv:
1104
 
1105
  def _progress_score(self) -> float:
1106
  if not self.trajectory:
1107
- return 0.0
1108
  raw = sum(float(t["reward"]) for t in self.trajectory) / len(self.trajectory)
1109
  return max(MIN_REWARD, min(MAX_REWARD, raw))
1110
 
 
463
  def _improvement_bonus(self, status: str) -> float:
464
  if self.last_outcome_status is None:
465
  self.last_outcome_status = status
466
+ return MIN_REWARD
467
 
468
  delta = OUTCOME_SCORE[status] - OUTCOME_SCORE[self.last_outcome_status]
469
  self.last_outcome_status = status
470
  if delta > 0:
471
  return 0.04
472
+ return MIN_REWARD
473
 
474
  def _specialization_match(self, hospital: HospitalState) -> bool:
475
  assert self.state_data is not None
 
653
  assert self.state_data is not None
654
 
655
  if selected.icu_display != "unknown":
656
+ return arrival_outcome, MIN_REWARD, None
657
 
658
  difficulty = self.state_data.scenario_difficulty
659
  guess_success_prob = {
 
666
  if guess_correct:
667
  return (
668
  arrival_outcome,
669
+ MIN_REWARD,
670
  "Hidden case: risky ICU-unknown guess was correct this time.",
671
  )
672
 
 
690
  ) -> tuple[ArrivalOutcome, float, str | None]:
691
  """Late-arrival operational shocks: ICU/doctor/bed/equipment can fail at handover."""
692
  if arrival_outcome.status == "rejected":
693
+ return arrival_outcome, MIN_REWARD, None
694
 
695
  shock_prob = {
696
  "easy": 0.03,
 
698
  "hard": 0.10,
699
  }.get(difficulty, 0.14)
700
  if self._rng.random() >= shock_prob:
701
+ return arrival_outcome, MIN_REWARD, None
702
 
703
  v = arrival_outcome.validation_details
704
  if v is None:
705
+ return arrival_outcome, MIN_REWARD, None
706
 
707
  shock = self._rng.choice([
708
  "doctor_unavailable",
 
1104
 
1105
  def _progress_score(self) -> float:
1106
  if not self.trajectory:
1107
+ return MIN_REWARD
1108
  raw = sum(float(t["reward"]) for t in self.trajectory) / len(self.trajectory)
1109
  return max(MIN_REWARD, min(MAX_REWARD, raw))
1110
 
app/environment/graders.py CHANGED
@@ -7,8 +7,8 @@ MAX_SCORE = 0.999
7
 
8
  def _norm_margin(travel_time: float, critical_limit: float) -> float:
9
  if critical_limit <= 0:
10
- return 0.0
11
- return max(0.0, min(1.0, (critical_limit - travel_time) / critical_limit))
12
 
13
 
14
  def grade_task(
@@ -78,14 +78,14 @@ def grade_task(
78
 
79
  if difficulty == "easy":
80
  threshold = 0.73
81
- score = min(1.0, base + 0.1)
82
  elif difficulty == "medium":
83
  threshold = 0.62
84
  score = base
85
  else: # hard
86
  threshold = 0.53
87
- hard_bonus = 0.15 if success_rate >= 0.5 else (0.05 if success_rate > 0.0 else 0.0)
88
- score = min(1.0, base + hard_bonus)
89
 
90
  score = max(MIN_SCORE, min(MAX_SCORE, score))
91
 
 
7
 
8
  def _norm_margin(travel_time: float, critical_limit: float) -> float:
9
  if critical_limit <= 0:
10
+ return MIN_SCORE
11
+ return max(MIN_SCORE, min(MAX_SCORE, (critical_limit - travel_time) / critical_limit))
12
 
13
 
14
  def grade_task(
 
78
 
79
  if difficulty == "easy":
80
  threshold = 0.73
81
+ score = min(MAX_SCORE, base + 0.1)
82
  elif difficulty == "medium":
83
  threshold = 0.62
84
  score = base
85
  else: # hard
86
  threshold = 0.53
87
+ hard_bonus = 0.15 if success_rate >= 0.5 else (0.05 if success_rate > 0.0 else MIN_SCORE)
88
+ score = min(MAX_SCORE, base + hard_bonus)
89
 
90
  score = max(MIN_SCORE, min(MAX_SCORE, score))
91
 
data/learning_archive.json CHANGED
@@ -7227,6 +7227,149 @@
7227
  "best_scenario_name": "Wildfire Front (Evacuation Gridlock)",
7228
  "best_difficulty": "hard",
7229
  "best_required_specialization": "general"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7230
  }
7231
  },
7232
  "episodes": [
@@ -9064,6 +9207,47 @@
9064
  "H3"
9065
  ],
9066
  "timestamp": "2026-04-09T04:44:45.351743+00:00"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9067
  }
9068
  ]
9069
  }
 
7227
  "best_scenario_name": "Wildfire Front (Evacuation Gridlock)",
7228
  "best_difficulty": "hard",
7229
  "best_required_specialization": "general"
7230
+ },
7231
+ "42|acde_easy": {
7232
+ "attempts": 1,
7233
+ "best_score": 0.99,
7234
+ "best_actions": [
7235
+ "H1"
7236
+ ],
7237
+ "best_steps": 1,
7238
+ "step_stats": {
7239
+ "1": {
7240
+ "H1": {
7241
+ "count": 1,
7242
+ "success": 1,
7243
+ "accepted": 1,
7244
+ "partial": 0,
7245
+ "rejected": 0,
7246
+ "total_reward": 0.999,
7247
+ "avg_reward": 0.999,
7248
+ "last_status": "ACCEPTED",
7249
+ "last_reason": "Patient admitted and treatment began",
7250
+ "success_rate": 1.0
7251
+ }
7252
+ }
7253
+ },
7254
+ "last_score": 0.99,
7255
+ "last_success": true,
7256
+ "last_run_at": "2026-04-09T04:57:25.535395+00:00",
7257
+ "last_actions": [
7258
+ "H1"
7259
+ ],
7260
+ "last_required_specialization": "general",
7261
+ "last_scenario_type": "fire",
7262
+ "last_scenario_name": "Apartment Fire (Smoke Inhalation)",
7263
+ "best_success": true,
7264
+ "best_scenario_name": "Apartment Fire (Smoke Inhalation)",
7265
+ "best_difficulty": "easy",
7266
+ "best_required_specialization": "general"
7267
+ },
7268
+ "43|acde_medium": {
7269
+ "attempts": 1,
7270
+ "best_score": 0.21539999999999998,
7271
+ "best_actions": [
7272
+ "H2"
7273
+ ],
7274
+ "best_steps": 1,
7275
+ "step_stats": {
7276
+ "1": {
7277
+ "H2": {
7278
+ "count": 1,
7279
+ "success": 0,
7280
+ "accepted": 0,
7281
+ "partial": 0,
7282
+ "rejected": 1,
7283
+ "total_reward": 0.109,
7284
+ "avg_reward": 0.109,
7285
+ "last_status": "REJECTED",
7286
+ "last_reason": "Condition became non-transferable during delay; immediate critical care failed",
7287
+ "success_rate": 0.0
7288
+ }
7289
+ }
7290
+ },
7291
+ "last_score": 0.21539999999999998,
7292
+ "last_success": false,
7293
+ "last_run_at": "2026-04-09T04:57:25.817773+00:00",
7294
+ "last_actions": [
7295
+ "H2"
7296
+ ],
7297
+ "last_required_specialization": "cardiac",
7298
+ "last_scenario_type": "medical",
7299
+ "last_scenario_name": "Heart Attack (Unstable)",
7300
+ "best_success": false,
7301
+ "best_scenario_name": "Heart Attack (Unstable)",
7302
+ "best_difficulty": "medium",
7303
+ "best_required_specialization": "cardiac"
7304
+ },
7305
+ "44|acde_hard": {
7306
+ "attempts": 1,
7307
+ "best_score": 0.15059999999999998,
7308
+ "best_actions": [
7309
+ "H5",
7310
+ "H2",
7311
+ "H5"
7312
+ ],
7313
+ "best_steps": 3,
7314
+ "step_stats": {
7315
+ "1": {
7316
+ "H5": {
7317
+ "count": 1,
7318
+ "success": 0,
7319
+ "accepted": 0,
7320
+ "partial": 0,
7321
+ "rejected": 1,
7322
+ "total_reward": 0.001,
7323
+ "avg_reward": 0.001,
7324
+ "last_status": "REJECTED",
7325
+ "last_reason": "Hospital cannot admit: ICU unavailable",
7326
+ "success_rate": 0.0
7327
+ }
7328
+ },
7329
+ "2": {
7330
+ "H2": {
7331
+ "count": 1,
7332
+ "success": 0,
7333
+ "accepted": 0,
7334
+ "partial": 0,
7335
+ "rejected": 1,
7336
+ "total_reward": 0.001,
7337
+ "avg_reward": 0.001,
7338
+ "last_status": "REJECTED",
7339
+ "last_reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required.",
7340
+ "success_rate": 0.0
7341
+ }
7342
+ },
7343
+ "3": {
7344
+ "H5": {
7345
+ "count": 1,
7346
+ "success": 0,
7347
+ "accepted": 0,
7348
+ "partial": 0,
7349
+ "rejected": 1,
7350
+ "total_reward": 0.001,
7351
+ "avg_reward": 0.001,
7352
+ "last_status": "REJECTED",
7353
+ "last_reason": "Condition became non-transferable during delay; immediate critical care failed",
7354
+ "success_rate": 0.0
7355
+ }
7356
+ }
7357
+ },
7358
+ "last_score": 0.15059999999999998,
7359
+ "last_success": false,
7360
+ "last_run_at": "2026-04-09T04:57:26.758443+00:00",
7361
+ "last_actions": [
7362
+ "H5",
7363
+ "H2",
7364
+ "H5"
7365
+ ],
7366
+ "last_required_specialization": "trauma",
7367
+ "last_scenario_type": "accident",
7368
+ "last_scenario_name": "Bridge Crash (Infrastructure Blocked)",
7369
+ "best_success": false,
7370
+ "best_scenario_name": "Bridge Crash (Infrastructure Blocked)",
7371
+ "best_difficulty": "hard",
7372
+ "best_required_specialization": "trauma"
7373
  }
7374
  },
7375
  "episodes": [
 
9207
  "H3"
9208
  ],
9209
  "timestamp": "2026-04-09T04:44:45.351743+00:00"
9210
+ },
9211
+ {
9212
+ "seed": 42,
9213
+ "task_id": "acde_easy",
9214
+ "difficulty": "easy",
9215
+ "required_specialization": "general",
9216
+ "scenario_name": "Apartment Fire (Smoke Inhalation)",
9217
+ "score": 0.99,
9218
+ "success": true,
9219
+ "actions": [
9220
+ "H1"
9221
+ ],
9222
+ "timestamp": "2026-04-09T04:57:25.535395+00:00"
9223
+ },
9224
+ {
9225
+ "seed": 43,
9226
+ "task_id": "acde_medium",
9227
+ "difficulty": "medium",
9228
+ "required_specialization": "cardiac",
9229
+ "scenario_name": "Heart Attack (Unstable)",
9230
+ "score": 0.21539999999999998,
9231
+ "success": false,
9232
+ "actions": [
9233
+ "H2"
9234
+ ],
9235
+ "timestamp": "2026-04-09T04:57:25.817773+00:00"
9236
+ },
9237
+ {
9238
+ "seed": 44,
9239
+ "task_id": "acde_hard",
9240
+ "difficulty": "hard",
9241
+ "required_specialization": "trauma",
9242
+ "scenario_name": "Bridge Crash (Infrastructure Blocked)",
9243
+ "score": 0.15059999999999998,
9244
+ "success": false,
9245
+ "actions": [
9246
+ "H5",
9247
+ "H2",
9248
+ "H5"
9249
+ ],
9250
+ "timestamp": "2026-04-09T04:57:26.758443+00:00"
9251
  }
9252
  ]
9253
  }
data/learning_memory.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
  "H2": {
3
  "success": 109,
4
- "fail": 208,
5
- "avg": 0.3422003154574139,
6
  "accepted": 109,
7
- "rejected": 208
8
  },
9
  "H6": {
10
  "success": 50,
@@ -15,16 +15,16 @@
15
  },
16
  "H5": {
17
  "success": 112,
18
- "fail": 175,
19
- "avg": 0.3852721254355398,
20
  "accepted": 112,
21
- "rejected": 175
22
  },
23
  "H1": {
24
- "success": 109,
25
  "fail": 107,
26
- "avg": 0.4147435185185182,
27
- "accepted": 109,
28
  "rejected": 107
29
  },
30
  "H3": {
 
1
  {
2
  "H2": {
3
  "success": 109,
4
+ "fail": 211,
5
+ "avg": 0.3393390625000006,
6
  "accepted": 109,
7
+ "rejected": 211
8
  },
9
  "H6": {
10
  "success": 50,
 
15
  },
16
  "H5": {
17
  "success": 112,
18
+ "fail": 176,
19
+ "avg": 0.38393784722222196,
20
  "accepted": 112,
21
+ "rejected": 176
22
  },
23
  "H1": {
24
+ "success": 110,
25
  "fail": 107,
26
+ "avg": 0.4174359447004605,
27
+ "accepted": 110,
28
  "rejected": 107
29
  },
30
  "H3": {
data/trajectory_history.jsonl CHANGED
@@ -321,3 +321,8 @@
321
  {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.24347869935725594, "strategy": "best-route retain"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: Hospital overloaded"}, "reward": 0.001}
322
  {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H1", "policy_score": 0.39922290989538656, "strategy": "risk-aware policy + anti-stupidity guard"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
323
  {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2", "H4"], "visited_hospitals": ["H2", "H4"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H3", "policy_score": 0.08687125701068692, "strategy": "risk-aware policy + anti-stupidity guard + immediate-retry override"}, "outcome": {"status": "ACCEPTED", "reason": "Condition stabilized after progressive treatment"}, "reward": 0.4640000000000001}
 
 
 
 
 
 
321
  {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.24347869935725594, "strategy": "best-route retain"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: Hospital overloaded"}, "reward": 0.001}
322
  {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H1", "policy_score": 0.39922290989538656, "strategy": "risk-aware policy + anti-stupidity guard"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
323
  {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2", "H4"], "visited_hospitals": ["H2", "H4"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H3", "policy_score": 0.08687125701068692, "strategy": "risk-aware policy + anti-stupidity guard + immediate-retry override"}, "outcome": {"status": "ACCEPTED", "reason": "Condition stabilized after progressive treatment"}, "reward": 0.4640000000000001}
324
+ {"seed": 42, "task": "acde_easy", "difficulty": "easy", "step": 1, "state": {"patient_condition": "serious", "remaining_time_minutes": 18.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H1", "policy_score": 0.2700762940680355, "strategy": "safe policy"}, "outcome": {"status": "ACCEPTED", "reason": "Patient admitted and treatment began"}, "reward": 0.999}
325
+ {"seed": 43, "task": "acde_medium", "difficulty": "medium", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 14.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.3231158217480875, "strategy": "safe policy + critical triage + guided-exploration"}, "outcome": {"status": "REJECTED", "reason": "Condition became non-transferable during delay; immediate critical care failed"}, "reward": 0.109}
326
+ {"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H5", "policy_score": 0.4862574941416685, "strategy": "safe policy + critical triage"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: ICU unavailable"}, "reward": 0.001}
327
+ {"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H2", "policy_score": 0.07027740357273131, "strategy": "risk-aware policy + immediate-retry override"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
328
+ {"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H5", "policy_score": 0.47341627825998767, "strategy": "risk-aware policy"}, "outcome": {"status": "REJECTED", "reason": "Condition became non-transferable during delay; immediate critical care failed"}, "reward": 0.001}