permanence / tests /test_core.py
chane35's picture
PERMANENCE training: 4-stage SFT -> gate -> GRPO -> eval pipeline
21c24ae verified
from __future__ import annotations
import json
from dataclasses import is_dataclass
from permanence.agent_interface.parser import _safe_parse_float, parse_agent_output
from permanence.env import PermanenceEnv
from permanence.episode_tracker import EpisodeResult, PredictionRecord
from permanence.reward.engine import MAX_EPISODE_CATASTROPHE_PENALTY, RewardEngine
def _assert_jsonable(value):
if value is None or isinstance(value, (str, int, float, bool)):
return
if isinstance(value, dict):
for item in value.values():
_assert_jsonable(item)
return
if isinstance(value, list):
for item in value:
_assert_jsonable(item)
return
raise AssertionError(f"Non-serializable value found: {type(value)!r}")
def test_reset_returns_json_serializable_info():
env = PermanenceEnv()
observation, info = env.reset(seed=123)
assert isinstance(observation["text"], str)
_assert_jsonable(info)
assert json.dumps(info)
def test_parser_handles_multiline_action_and_safe_float():
parsed = parse_agent_output(
"<thinking>reasoning</thinking>\n"
'<action id="communicate_resolution_externally"\n'
' client_id="client_a"\n'
' resolution_terms="full_refund"\n'
' final_amount="240000"/>\n'
'<reversibility level="R4" confidence="0.87"/>'
)
assert parsed.action_id == "communicate_resolution_externally"
assert parsed.parameters["client_id"] == "client_a"
assert parsed.predicted_r_level == 4
assert abs(parsed.predicted_confidence - 0.87) < 0.01
assert _safe_parse_float("0.9 (very sure)") == 0.9
assert _safe_parse_float("High") is None
def test_reward_missing_confidence_scores_zero():
result = EpisodeResult(
task_id="task_demo",
task_name="Demo",
scenario_id="demo:1",
terminated_by="success",
step_count=1,
max_steps=15,
success=True,
prediction_records=[
PredictionRecord(
step=1,
action_id="test",
predicted_r_level=3,
predicted_confidence=None,
actual_r_level=3,
)
],
final_world_state_summary={},
final_locked_actions=[],
final_critical_options={},
available_actions=[],
preservation_targets=[],
)
score = RewardEngine().compute_prediction_score(result)
assert score == 0.0
def test_catastrophe_penalty_is_capped():
result = EpisodeResult(
task_id="task_demo",
task_name="Demo",
scenario_id="demo:1",
terminated_by="success",
step_count=1,
max_steps=15,
success=True,
prediction_records=[
PredictionRecord(
step=1,
action_id="test",
predicted_r_level=1,
predicted_confidence=0.95,
actual_r_level=5,
)
for _ in range(10)
],
final_world_state_summary={},
final_locked_actions=[],
final_critical_options={},
available_actions=[],
preservation_targets=[],
)
penalty, _, _ = RewardEngine().compute_catastrophe_penalty(result)
assert penalty <= MAX_EPISODE_CATASTROPHE_PENALTY
def test_unknown_action_consumes_step():
env = PermanenceEnv()
env.reset(seed=123)
initial_step = env.episode_tracker.step_count
_, reward, terminated, truncated, info = env.step(
'<action id="completely_made_up_action_xyz"/>\n'
'<reversibility level="R2" confidence="0.5"/>'
)
assert env.episode_tracker.step_count == initial_step + 1
assert reward == -0.1
assert not terminated
assert not truncated or env.episode_tracker.step_count >= env.episode_tracker.max_steps
assert info["error"] == "unknown_action"
def test_cascade_task_sets_critical_option():
env = PermanenceEnv(config={"force_task": "task_cascade"})
env.reset(seed=42)
_, reward, terminated, truncated, info = env.step(
'<action id="review_contract_internally" contract_id="c001"/>\n'
'<reversibility level="R1" confidence="0.99"/>'
)
assert env._current_world_state.critical_options["internal_review_complete"] is True
assert reward >= -0.2
assert isinstance(terminated, bool)
assert isinstance(truncated, bool)
assert info["task_id"] == "task_cascade"
def test_terminal_info_is_json_serializable():
env = PermanenceEnv(config={"force_task": "task_cascade"})
env.reset(seed=42)
env.step(
'<action id="review_contract_internally" contract_id="c001"/>\n'
'<reversibility level="R1" confidence="0.99"/>'
)
env.step(
'<action id="align_with_legal" dispute_summary="resolved"/>\n'
'<reversibility level="R2" confidence="0.91"/>'
)
_, reward, terminated, truncated, info = env.step(
'<action id="communicate_resolution_externally" client_id="client_a" resolution_terms="settled" final_amount="1000"/>\n'
'<reversibility level="R4" confidence="0.88"/>'
)
assert terminated or truncated
_assert_jsonable(info)
assert json.dumps(info)
assert isinstance(reward, float)