Spaces:

samrat-rm
/

WhyDidItFail

Sleeping

samrat-rm commited on 11 days ago

Commit

a0518e7

1 Parent(s): 9f554a9

fix: seed and episode_id in reset()

Files changed (1) hide show

server/WhyDidItFail_environment.py CHANGED Viewed

@@ -29,10 +29,12 @@ class WhyDidItFailEnvironment(Environment):
     def __init__(self):
         self._state = State(episode_id=str(uuid4()), step_count=0)
         self.scenario = None
-        self.inspected = set()   # tracks what the agent has already looked at
     def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> WhyDidItFailObservation:
-        self._state = State(episode_id=str(uuid4()), step_count=0)
         self.scenario = random.choice(list(SCENARIOS.values()))
         self.inspected = set()
         return WhyDidItFailObservation(
@@ -106,10 +108,11 @@ class WhyDidItFailEnvironment(Environment):
             if w not in _STOP_WORDS and len(w) > 1
         ]
         return all(kw in submitted_norm for kw in keywords)
-    # TODO : Partial credit scoreing, Configurable keyword aliases per scenario, False positive Gaurd,
     def grade(self, action: WhyDidItFailAction) -> tuple[float, str, bool]:
         """Score a submit_diagnosis action against the current scenario."""
         if self.scenario is None:
             raise RuntimeError("Environment must be reset before calling grade.")
         diagnosis = (action.diagnosis or "").strip().lower()

     def __init__(self):
         self._state = State(episode_id=str(uuid4()), step_count=0)
         self.scenario = None
+        self.inspected = set()   # tracks what the agent has already looked at  TODO : implement inspected logic
     def reset(self, seed: Optional[int] = None, episode_id: Optional[str] = None, **kwargs: Any) -> WhyDidItFailObservation:
+        if seed is not None:
+            random.seed(seed)
+        self._state = State(episode_id=episode_id or str(uuid4()), step_count=0)
         self.scenario = random.choice(list(SCENARIOS.values()))
         self.inspected = set()
         return WhyDidItFailObservation(
             if w not in _STOP_WORDS and len(w) > 1
         ]
         return all(kw in submitted_norm for kw in keywords)
+    # TODO : Improve scoring : Partial credit scoreing, Configurable keyword aliases per scenario, False positive Gaurd,
     def grade(self, action: WhyDidItFailAction) -> tuple[float, str, bool]:
         """Score a submit_diagnosis action against the current scenario."""
+        # TODO : use step count in reward calc
         if self.scenario is None:
             raise RuntimeError("Environment must be reset before calling grade.")
         diagnosis = (action.diagnosis or "").strip().lower()