Spaces:
Paused
Paused
| from __future__ import annotations | |
| import json | |
| from dataclasses import is_dataclass | |
| from permanence.agent_interface.parser import _safe_parse_float, parse_agent_output | |
| from permanence.env import PermanenceEnv | |
| from permanence.episode_tracker import EpisodeResult, PredictionRecord | |
| from permanence.reward.engine import MAX_EPISODE_CATASTROPHE_PENALTY, RewardEngine | |
| def _assert_jsonable(value): | |
| if value is None or isinstance(value, (str, int, float, bool)): | |
| return | |
| if isinstance(value, dict): | |
| for item in value.values(): | |
| _assert_jsonable(item) | |
| return | |
| if isinstance(value, list): | |
| for item in value: | |
| _assert_jsonable(item) | |
| return | |
| raise AssertionError(f"Non-serializable value found: {type(value)!r}") | |
| def test_reset_returns_json_serializable_info(): | |
| env = PermanenceEnv() | |
| observation, info = env.reset(seed=123) | |
| assert isinstance(observation["text"], str) | |
| _assert_jsonable(info) | |
| assert json.dumps(info) | |
| def test_parser_handles_multiline_action_and_safe_float(): | |
| parsed = parse_agent_output( | |
| "<thinking>reasoning</thinking>\n" | |
| '<action id="communicate_resolution_externally"\n' | |
| ' client_id="client_a"\n' | |
| ' resolution_terms="full_refund"\n' | |
| ' final_amount="240000"/>\n' | |
| '<reversibility level="R4" confidence="0.87"/>' | |
| ) | |
| assert parsed.action_id == "communicate_resolution_externally" | |
| assert parsed.parameters["client_id"] == "client_a" | |
| assert parsed.predicted_r_level == 4 | |
| assert abs(parsed.predicted_confidence - 0.87) < 0.01 | |
| assert _safe_parse_float("0.9 (very sure)") == 0.9 | |
| assert _safe_parse_float("High") is None | |
| def test_reward_missing_confidence_scores_zero(): | |
| result = EpisodeResult( | |
| task_id="task_demo", | |
| task_name="Demo", | |
| scenario_id="demo:1", | |
| terminated_by="success", | |
| step_count=1, | |
| max_steps=15, | |
| success=True, | |
| prediction_records=[ | |
| PredictionRecord( | |
| step=1, | |
| action_id="test", | |
| predicted_r_level=3, | |
| predicted_confidence=None, | |
| actual_r_level=3, | |
| ) | |
| ], | |
| final_world_state_summary={}, | |
| final_locked_actions=[], | |
| final_critical_options={}, | |
| available_actions=[], | |
| preservation_targets=[], | |
| ) | |
| score = RewardEngine().compute_prediction_score(result) | |
| assert score == 0.0 | |
| def test_catastrophe_penalty_is_capped(): | |
| result = EpisodeResult( | |
| task_id="task_demo", | |
| task_name="Demo", | |
| scenario_id="demo:1", | |
| terminated_by="success", | |
| step_count=1, | |
| max_steps=15, | |
| success=True, | |
| prediction_records=[ | |
| PredictionRecord( | |
| step=1, | |
| action_id="test", | |
| predicted_r_level=1, | |
| predicted_confidence=0.95, | |
| actual_r_level=5, | |
| ) | |
| for _ in range(10) | |
| ], | |
| final_world_state_summary={}, | |
| final_locked_actions=[], | |
| final_critical_options={}, | |
| available_actions=[], | |
| preservation_targets=[], | |
| ) | |
| penalty, _, _ = RewardEngine().compute_catastrophe_penalty(result) | |
| assert penalty <= MAX_EPISODE_CATASTROPHE_PENALTY | |
| def test_unknown_action_consumes_step(): | |
| env = PermanenceEnv() | |
| env.reset(seed=123) | |
| initial_step = env.episode_tracker.step_count | |
| _, reward, terminated, truncated, info = env.step( | |
| '<action id="completely_made_up_action_xyz"/>\n' | |
| '<reversibility level="R2" confidence="0.5"/>' | |
| ) | |
| assert env.episode_tracker.step_count == initial_step + 1 | |
| assert reward == -0.1 | |
| assert not terminated | |
| assert not truncated or env.episode_tracker.step_count >= env.episode_tracker.max_steps | |
| assert info["error"] == "unknown_action" | |
| def test_cascade_task_sets_critical_option(): | |
| env = PermanenceEnv(config={"force_task": "task_cascade"}) | |
| env.reset(seed=42) | |
| _, reward, terminated, truncated, info = env.step( | |
| '<action id="review_contract_internally" contract_id="c001"/>\n' | |
| '<reversibility level="R1" confidence="0.99"/>' | |
| ) | |
| assert env._current_world_state.critical_options["internal_review_complete"] is True | |
| assert reward >= -0.2 | |
| assert isinstance(terminated, bool) | |
| assert isinstance(truncated, bool) | |
| assert info["task_id"] == "task_cascade" | |
| def test_terminal_info_is_json_serializable(): | |
| env = PermanenceEnv(config={"force_task": "task_cascade"}) | |
| env.reset(seed=42) | |
| env.step( | |
| '<action id="review_contract_internally" contract_id="c001"/>\n' | |
| '<reversibility level="R1" confidence="0.99"/>' | |
| ) | |
| env.step( | |
| '<action id="align_with_legal" dispute_summary="resolved"/>\n' | |
| '<reversibility level="R2" confidence="0.91"/>' | |
| ) | |
| _, reward, terminated, truncated, info = env.step( | |
| '<action id="communicate_resolution_externally" client_id="client_a" resolution_terms="settled" final_amount="1000"/>\n' | |
| '<reversibility level="R4" confidence="0.88"/>' | |
| ) | |
| assert terminated or truncated | |
| _assert_jsonable(info) | |
| assert json.dumps(info) | |
| assert isinstance(reward, float) | |