Spaces:

farffadet
/

syllogym-env

Running

App Files Files Community

farffadet commited on Mar 14

Commit

349dcd3

verified ·

1 Parent(s): deae344

refactor: multi-turn Judge Agent — diversity + UCC generators

Browse files

Files changed (1) hide show

client.py +30 -34

client.py CHANGED Viewed

@@ -1,23 +1,34 @@
 """
 SylloGym Environment Client.
-Typed client for connecting to a running SylloGym server.
 Example:
     >>> env = SylloGymEnv(base_url="http://localhost:8000")
     >>> result = env.reset()
     >>> obs = result.observation
     >>> print(obs.rule)
     >>> print(obs.facts)
     >>>
-    >>> from syllogym_env.models import SylloAction
-    >>> action = SylloAction(
-    ...     reasoning="<reasoning>The rule states X applies when Y. The facts show Y. Therefore X applies.</reasoning>",
-    ...     answer="<answer>Yes</answer>"
-    ... )
-    >>> result = env.step(action)
-    >>> print(result.observation.reward)   # 0.0 to 1.3
-    >>> print(result.observation.done)     # True
     >>> env.close()
 """
@@ -31,28 +42,14 @@ from .models import SylloAction, SylloObservation, SylloState
 class SylloGymEnv(EnvClient[SylloAction, SylloObservation, SylloState]):
     """
-    Client for the SylloGym legal reasoning environment.
-    Connects to a SylloGym server that serves LegalBench-based
-    syllogistic reasoning tasks. Each episode is a single-step interaction:
-      1. reset() → receive a legal rule + case facts
-      2. step(SylloAction) → submit reasoning + answer, receive reward
-    Args:
-        base_url: URL of the running SylloGym server.
-    Example:
-        >>> env = SylloGymEnv(base_url="http://localhost:8000")
-        >>> result = env.reset()
-        >>> obs = result.observation
-        >>>
-        >>> action = SylloAction(
-        ...     reasoning="<reasoning>Applying the rule to the facts...</reasoning>",
-        ...     answer="<answer>Yes</answer>"
-        ... )
-        >>> result = env.step(action)
-        >>> print(f"Reward: {result.observation.reward}")
-        >>> env.close()
     """
     def _step_payload(self, action: SylloAction) -> dict:
@@ -64,11 +61,13 @@ class SylloGymEnv(EnvClient[SylloAction, SylloObservation, SylloState]):
     def _parse_result(self, payload: dict) -> StepResult[SylloObservation]:
         obs_data = payload.get("observation", {})
         reward = payload.get("reward")
-        done = bool(payload.get("done", True))
         # Mirror reward/done into the observation for convenience
         obs_data["reward"] = reward
         obs_data["done"] = done
-        obs = SylloObservation(**obs_data)
         return StepResult(
             observation=obs,
             reward=reward,
@@ -77,11 +76,8 @@ class SylloGymEnv(EnvClient[SylloAction, SylloObservation, SylloState]):
     def _parse_state(self, payload: dict) -> SylloState:
         return SylloState(
-            episode_id=payload.get("episode_id", ""),
-            step_count=payload.get("step_count", 0),
             task_name=payload.get("task_name", ""),
             task_mode=payload.get("task_mode", "mixed"),
-            current_difficulty=payload.get("current_difficulty", 1.0),
             total_correct=payload.get("total_correct", 0),
             total_steps=payload.get("total_steps", 0),
         )

 """
 SylloGym Environment Client.
+Multi-turn typed client for connecting to a running SylloGym server.
+The agent plays a judge who receives new facts turn by turn.
+Each episode: reset() → step() → step() → ... → done=True
 Example:
+    >>> from syllogym_env import SylloGymEnv
+    >>> from syllogym_env.models import SylloAction
+    >>>
     >>> env = SylloGymEnv(base_url="http://localhost:8000")
     >>> result = env.reset()
     >>> obs = result.observation
     >>> print(obs.rule)
     >>> print(obs.facts)
+    >>> print(obs.question)  # Turn 0 question
     >>>
+    >>> while not obs.done:
+    ...     action = SylloAction(
+    ...         reasoning="<reasoning>Applying the rule...</reasoning>",
+    ...         answer="<answer>Yes</answer>",
+    ...     )
+    ...     result = env.step(action)
+    ...     obs = result.observation
+    ...     if not obs.done:
+    ...         print(f"Turn {obs.layer_index}: {obs.new_info}")
+    ...         print(f"Next question: {obs.question}")
+    ...
+    >>> print(f"Final reward: {obs.reward}")
     >>> env.close()
 """
 class SylloGymEnv(EnvClient[SylloAction, SylloObservation, SylloState]):
     """
+    Client for the SylloGym multi-turn legal reasoning environment.
+    Each episode is a sequence of steps:
+      reset()      → Turn 0 observation (rule + initial facts + first question)
+      step(action) → Turn 1 observation (new_info revealed + next question), reward=1.0
+      step(action) → ... until done=True
+    Reward is dense: 1.0 for each correct answer, 0.0 terminates the episode.
     """
     def _step_payload(self, action: SylloAction) -> dict:
     def _parse_result(self, payload: dict) -> StepResult[SylloObservation]:
         obs_data = payload.get("observation", {})
         reward = payload.get("reward")
+        done = bool(payload.get("done", False))
         # Mirror reward/done into the observation for convenience
         obs_data["reward"] = reward
         obs_data["done"] = done
+        # Only pass fields that SylloObservation knows about
+        valid_fields = SylloObservation.model_fields
+        obs = SylloObservation(**{k: v for k, v in obs_data.items() if k in valid_fields})
         return StepResult(
             observation=obs,
             reward=reward,
     def _parse_state(self, payload: dict) -> SylloState:
         return SylloState(
             task_name=payload.get("task_name", ""),
             task_mode=payload.get("task_mode", "mixed"),
             total_correct=payload.get("total_correct", 0),
             total_steps=payload.get("total_steps", 0),
         )