Spaces:

Jash05
/

test

Sleeping

App Files Files Community

paramjitbaral commited on Apr 9

Commit

1cb11b0

verified ·

1 Parent(s): 61d594c

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

app/environment/core.py +8 -8
app/environment/graders.py +5 -5
data/learning_archive.json +184 -0
data/learning_memory.json +9 -9
data/trajectory_history.jsonl +5 -0

app/environment/core.py CHANGED Viewed

@@ -463,13 +463,13 @@ class EmergencyEnv:
     def _improvement_bonus(self, status: str) -> float:
         if self.last_outcome_status is None:
             self.last_outcome_status = status
-            return 0.0
         delta = OUTCOME_SCORE[status] - OUTCOME_SCORE[self.last_outcome_status]
         self.last_outcome_status = status
         if delta > 0:
             return 0.04
-        return 0.0
     def _specialization_match(self, hospital: HospitalState) -> bool:
         assert self.state_data is not None
@@ -653,7 +653,7 @@ class EmergencyEnv:
         assert self.state_data is not None
         if selected.icu_display != "unknown":
-            return arrival_outcome, 0.0, None
         difficulty = self.state_data.scenario_difficulty
         guess_success_prob = {
@@ -666,7 +666,7 @@ class EmergencyEnv:
         if guess_correct:
             return (
                 arrival_outcome,
-                0.0,
                 "Hidden case: risky ICU-unknown guess was correct this time.",
             )
@@ -690,7 +690,7 @@ class EmergencyEnv:
     ) -> tuple[ArrivalOutcome, float, str | None]:
         """Late-arrival operational shocks: ICU/doctor/bed/equipment can fail at handover."""
         if arrival_outcome.status == "rejected":
-            return arrival_outcome, 0.0, None
         shock_prob = {
             "easy": 0.03,
@@ -698,11 +698,11 @@ class EmergencyEnv:
             "hard": 0.10,
         }.get(difficulty, 0.14)
         if self._rng.random() >= shock_prob:
-            return arrival_outcome, 0.0, None
         v = arrival_outcome.validation_details
         if v is None:
-            return arrival_outcome, 0.0, None
         shock = self._rng.choice([
             "doctor_unavailable",
@@ -1104,7 +1104,7 @@ class EmergencyEnv:
     def _progress_score(self) -> float:
         if not self.trajectory:
-            return 0.0
         raw = sum(float(t["reward"]) for t in self.trajectory) / len(self.trajectory)
         return max(MIN_REWARD, min(MAX_REWARD, raw))

     def _improvement_bonus(self, status: str) -> float:
         if self.last_outcome_status is None:
             self.last_outcome_status = status
+            return MIN_REWARD
         delta = OUTCOME_SCORE[status] - OUTCOME_SCORE[self.last_outcome_status]
         self.last_outcome_status = status
         if delta > 0:
             return 0.04
+        return MIN_REWARD
     def _specialization_match(self, hospital: HospitalState) -> bool:
         assert self.state_data is not None
         assert self.state_data is not None
         if selected.icu_display != "unknown":
+            return arrival_outcome, MIN_REWARD, None
         difficulty = self.state_data.scenario_difficulty
         guess_success_prob = {
         if guess_correct:
             return (
                 arrival_outcome,
+                MIN_REWARD,
                 "Hidden case: risky ICU-unknown guess was correct this time.",
             )
     ) -> tuple[ArrivalOutcome, float, str | None]:
         """Late-arrival operational shocks: ICU/doctor/bed/equipment can fail at handover."""
         if arrival_outcome.status == "rejected":
+            return arrival_outcome, MIN_REWARD, None
         shock_prob = {
             "easy": 0.03,
             "hard": 0.10,
         }.get(difficulty, 0.14)
         if self._rng.random() >= shock_prob:
+            return arrival_outcome, MIN_REWARD, None
         v = arrival_outcome.validation_details
         if v is None:
+            return arrival_outcome, MIN_REWARD, None
         shock = self._rng.choice([
             "doctor_unavailable",
     def _progress_score(self) -> float:
         if not self.trajectory:
+            return MIN_REWARD
         raw = sum(float(t["reward"]) for t in self.trajectory) / len(self.trajectory)
         return max(MIN_REWARD, min(MAX_REWARD, raw))

app/environment/graders.py CHANGED Viewed

@@ -7,8 +7,8 @@ MAX_SCORE = 0.999
 def _norm_margin(travel_time: float, critical_limit: float) -> float:
     if critical_limit <= 0:
-        return 0.0
-    return max(0.0, min(1.0, (critical_limit - travel_time) / critical_limit))
 def grade_task(
@@ -78,14 +78,14 @@ def grade_task(
     if difficulty == "easy":
         threshold = 0.73
-        score = min(1.0, base + 0.1)
     elif difficulty == "medium":
         threshold = 0.62
         score = base
     else:  # hard
         threshold = 0.53
-        hard_bonus = 0.15 if success_rate >= 0.5 else (0.05 if success_rate > 0.0 else 0.0)
-        score = min(1.0, base + hard_bonus)
     score = max(MIN_SCORE, min(MAX_SCORE, score))

 def _norm_margin(travel_time: float, critical_limit: float) -> float:
     if critical_limit <= 0:
+        return MIN_SCORE
+    return max(MIN_SCORE, min(MAX_SCORE, (critical_limit - travel_time) / critical_limit))
 def grade_task(
     if difficulty == "easy":
         threshold = 0.73
+        score = min(MAX_SCORE, base + 0.1)
     elif difficulty == "medium":
         threshold = 0.62
         score = base
     else:  # hard
         threshold = 0.53
+        hard_bonus = 0.15 if success_rate >= 0.5 else (0.05 if success_rate > 0.0 else MIN_SCORE)
+        score = min(MAX_SCORE, base + hard_bonus)
     score = max(MIN_SCORE, min(MAX_SCORE, score))

data/learning_archive.json CHANGED Viewed

@@ -7227,6 +7227,149 @@
       "best_scenario_name": "Wildfire Front (Evacuation Gridlock)",
       "best_difficulty": "hard",
       "best_required_specialization": "general"
     }
   },
   "episodes": [
@@ -9064,6 +9207,47 @@
         "H3"
       ],
       "timestamp": "2026-04-09T04:44:45.351743+00:00"
     }
   ]
 }

       "best_scenario_name": "Wildfire Front (Evacuation Gridlock)",
       "best_difficulty": "hard",
       "best_required_specialization": "general"
+    },
+    "42|acde_easy": {
+      "attempts": 1,
+      "best_score": 0.99,
+      "best_actions": [
+        "H1"
+      ],
+      "best_steps": 1,
+      "step_stats": {
+        "1": {
+          "H1": {
+            "count": 1,
+            "success": 1,
+            "accepted": 1,
+            "partial": 0,
+            "rejected": 0,
+            "total_reward": 0.999,
+            "avg_reward": 0.999,
+            "last_status": "ACCEPTED",
+            "last_reason": "Patient admitted and treatment began",
+            "success_rate": 1.0
+          }
+        }
+      },
+      "last_score": 0.99,
+      "last_success": true,
+      "last_run_at": "2026-04-09T04:57:25.535395+00:00",
+      "last_actions": [
+        "H1"
+      ],
+      "last_required_specialization": "general",
+      "last_scenario_type": "fire",
+      "last_scenario_name": "Apartment Fire (Smoke Inhalation)",
+      "best_success": true,
+      "best_scenario_name": "Apartment Fire (Smoke Inhalation)",
+      "best_difficulty": "easy",
+      "best_required_specialization": "general"
+    },
+    "43|acde_medium": {
+      "attempts": 1,
+      "best_score": 0.21539999999999998,
+      "best_actions": [
+        "H2"
+      ],
+      "best_steps": 1,
+      "step_stats": {
+        "1": {
+          "H2": {
+            "count": 1,
+            "success": 0,
+            "accepted": 0,
+            "partial": 0,
+            "rejected": 1,
+            "total_reward": 0.109,
+            "avg_reward": 0.109,
+            "last_status": "REJECTED",
+            "last_reason": "Condition became non-transferable during delay; immediate critical care failed",
+            "success_rate": 0.0
+          }
+        }
+      },
+      "last_score": 0.21539999999999998,
+      "last_success": false,
+      "last_run_at": "2026-04-09T04:57:25.817773+00:00",
+      "last_actions": [
+        "H2"
+      ],
+      "last_required_specialization": "cardiac",
+      "last_scenario_type": "medical",
+      "last_scenario_name": "Heart Attack (Unstable)",
+      "best_success": false,
+      "best_scenario_name": "Heart Attack (Unstable)",
+      "best_difficulty": "medium",
+      "best_required_specialization": "cardiac"
+    },
+    "44|acde_hard": {
+      "attempts": 1,
+      "best_score": 0.15059999999999998,
+      "best_actions": [
+        "H5",
+        "H2",
+        "H5"
+      ],
+      "best_steps": 3,
+      "step_stats": {
+        "1": {
+          "H5": {
+            "count": 1,
+            "success": 0,
+            "accepted": 0,
+            "partial": 0,
+            "rejected": 1,
+            "total_reward": 0.001,
+            "avg_reward": 0.001,
+            "last_status": "REJECTED",
+            "last_reason": "Hospital cannot admit: ICU unavailable",
+            "success_rate": 0.0
+          }
+        },
+        "2": {
+          "H2": {
+            "count": 1,
+            "success": 0,
+            "accepted": 0,
+            "partial": 0,
+            "rejected": 1,
+            "total_reward": 0.001,
+            "avg_reward": 0.001,
+            "last_status": "REJECTED",
+            "last_reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required.",
+            "success_rate": 0.0
+          }
+        },
+        "3": {
+          "H5": {
+            "count": 1,
+            "success": 0,
+            "accepted": 0,
+            "partial": 0,
+            "rejected": 1,
+            "total_reward": 0.001,
+            "avg_reward": 0.001,
+            "last_status": "REJECTED",
+            "last_reason": "Condition became non-transferable during delay; immediate critical care failed",
+            "success_rate": 0.0
+          }
+        }
+      },
+      "last_score": 0.15059999999999998,
+      "last_success": false,
+      "last_run_at": "2026-04-09T04:57:26.758443+00:00",
+      "last_actions": [
+        "H5",
+        "H2",
+        "H5"
+      ],
+      "last_required_specialization": "trauma",
+      "last_scenario_type": "accident",
+      "last_scenario_name": "Bridge Crash (Infrastructure Blocked)",
+      "best_success": false,
+      "best_scenario_name": "Bridge Crash (Infrastructure Blocked)",
+      "best_difficulty": "hard",
+      "best_required_specialization": "trauma"
     }
   },
   "episodes": [
         "H3"
       ],
       "timestamp": "2026-04-09T04:44:45.351743+00:00"
+    },
+    {
+      "seed": 42,
+      "task_id": "acde_easy",
+      "difficulty": "easy",
+      "required_specialization": "general",
+      "scenario_name": "Apartment Fire (Smoke Inhalation)",
+      "score": 0.99,
+      "success": true,
+      "actions": [
+        "H1"
+      ],
+      "timestamp": "2026-04-09T04:57:25.535395+00:00"
+    },
+    {
+      "seed": 43,
+      "task_id": "acde_medium",
+      "difficulty": "medium",
+      "required_specialization": "cardiac",
+      "scenario_name": "Heart Attack (Unstable)",
+      "score": 0.21539999999999998,
+      "success": false,
+      "actions": [
+        "H2"
+      ],
+      "timestamp": "2026-04-09T04:57:25.817773+00:00"
+    },
+    {
+      "seed": 44,
+      "task_id": "acde_hard",
+      "difficulty": "hard",
+      "required_specialization": "trauma",
+      "scenario_name": "Bridge Crash (Infrastructure Blocked)",
+      "score": 0.15059999999999998,
+      "success": false,
+      "actions": [
+        "H5",
+        "H2",
+        "H5"
+      ],
+      "timestamp": "2026-04-09T04:57:26.758443+00:00"
     }
   ]
 }

data/learning_memory.json CHANGED Viewed

@@ -1,10 +1,10 @@
 {
   "H2": {
     "success": 109,
-    "fail": 208,
-    "avg": 0.3422003154574139,
     "accepted": 109,
-    "rejected": 208
   },
   "H6": {
     "success": 50,
@@ -15,16 +15,16 @@
   },
   "H5": {
     "success": 112,
-    "fail": 175,
-    "avg": 0.3852721254355398,
     "accepted": 112,
-    "rejected": 175
   },
   "H1": {
-    "success": 109,
     "fail": 107,
-    "avg": 0.4147435185185182,
-    "accepted": 109,
     "rejected": 107
   },
   "H3": {

 {
   "H2": {
     "success": 109,
+    "fail": 211,
+    "avg": 0.3393390625000006,
     "accepted": 109,
+    "rejected": 211
   },
   "H6": {
     "success": 50,
   },
   "H5": {
     "success": 112,
+    "fail": 176,
+    "avg": 0.38393784722222196,
     "accepted": 112,
+    "rejected": 176
   },
   "H1": {
+    "success": 110,
     "fail": 107,
+    "avg": 0.4174359447004605,
+    "accepted": 110,
     "rejected": 107
   },
   "H3": {

data/trajectory_history.jsonl CHANGED Viewed

@@ -321,3 +321,8 @@
 {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.24347869935725594, "strategy": "best-route retain"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: Hospital overloaded"}, "reward": 0.001}
 {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H1", "policy_score": 0.39922290989538656, "strategy": "risk-aware policy + anti-stupidity guard"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
 {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2", "H4"], "visited_hospitals": ["H2", "H4"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H3", "policy_score": 0.08687125701068692, "strategy": "risk-aware policy + anti-stupidity guard + immediate-retry override"}, "outcome": {"status": "ACCEPTED", "reason": "Condition stabilized after progressive treatment"}, "reward": 0.4640000000000001}

 {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.24347869935725594, "strategy": "best-route retain"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: Hospital overloaded"}, "reward": 0.001}
 {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H1", "policy_score": 0.39922290989538656, "strategy": "risk-aware policy + anti-stupidity guard"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
 {"seed": 125, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 11.0, "failed_hospitals": ["H2", "H4"], "visited_hospitals": ["H2", "H4"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H3", "policy_score": 0.08687125701068692, "strategy": "risk-aware policy + anti-stupidity guard + immediate-retry override"}, "outcome": {"status": "ACCEPTED", "reason": "Condition stabilized after progressive treatment"}, "reward": 0.4640000000000001}
+{"seed": 42, "task": "acde_easy", "difficulty": "easy", "step": 1, "state": {"patient_condition": "serious", "remaining_time_minutes": 18.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H1", "policy_score": 0.2700762940680355, "strategy": "safe policy"}, "outcome": {"status": "ACCEPTED", "reason": "Patient admitted and treatment began"}, "reward": 0.999}
+{"seed": 43, "task": "acde_medium", "difficulty": "medium", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 14.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H2", "policy_score": 0.3231158217480875, "strategy": "safe policy + critical triage + guided-exploration"}, "outcome": {"status": "REJECTED", "reason": "Condition became non-transferable during delay; immediate critical care failed"}, "reward": 0.109}
+{"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 1, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": [], "visited_hospitals": [], "ambulance_status": "en_route"}, "action": {"hospital_id": "H5", "policy_score": 0.4862574941416685, "strategy": "safe policy + critical triage"}, "outcome": {"status": "REJECTED", "reason": "Hospital cannot admit: ICU unavailable"}, "reward": 0.001}
+{"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 2, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H2", "policy_score": 0.07027740357273131, "strategy": "risk-aware policy + immediate-retry override"}, "outcome": {"status": "REJECTED", "reason": "Hidden mismatch at arrival (wrong risky guess). Rerouting required."}, "reward": 0.001}
+{"seed": 44, "task": "acde_hard", "difficulty": "hard", "step": 3, "state": {"patient_condition": "critical", "remaining_time_minutes": 13.0, "failed_hospitals": ["H2"], "visited_hospitals": ["H2"], "ambulance_status": "rerouting"}, "action": {"hospital_id": "H5", "policy_score": 0.47341627825998767, "strategy": "risk-aware policy"}, "outcome": {"status": "REJECTED", "reason": "Condition became non-transferable during delay; immediate critical care failed"}, "reward": 0.001}