Spaces:
Sleeping
Sleeping
File size: 4,717 Bytes
0b6a889 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 | """Tests for the FinePrint environment."""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
import pytest
import numpy as np
from fineprint.env import FinePrintEnv, ACTION_TYPES
@pytest.fixture
def env():
policies_path = str(Path(__file__).resolve().parent.parent / "policies")
e = FinePrintEnv(policies_dir=policies_path)
yield e
e.close()
class TestReset:
def test_reset_returns_obs_and_info(self, env):
obs, info = env.reset(seed=42)
assert isinstance(obs, dict)
assert isinstance(info, dict)
def test_reset_obs_keys(self, env):
obs, _ = env.reset(seed=42)
expected_keys = {
"current_workflow",
"current_step",
"workflow_progress",
"user_message",
"conversation_history",
"user_satisfaction",
"agent_believed_version",
"cached_policies",
"steps_since_last_verify",
"system_notification",
"contradiction_detected",
"user_expressed_confusion",
"last_action_compliant",
"last_compliance_note",
}
assert set(obs.keys()) == expected_keys
def test_reset_initial_values(self, env):
obs, info = env.reset(seed=42)
assert obs["agent_believed_version"] == "v1_base"
assert obs["steps_since_last_verify"][0] == 0
assert obs["user_satisfaction"][0] == 1.0
assert obs["contradiction_detected"] == 0
assert obs["user_expressed_confusion"] == 0
def test_reset_info_contains_version(self, env):
_, info = env.reset(seed=42)
assert info["active_version"] == "v1_base"
assert info["total_versions"] == 8
def test_reset_reproducibility(self, env):
obs1, _ = env.reset(seed=42)
obs2, _ = env.reset(seed=42)
assert obs1["current_workflow"] == obs2["current_workflow"]
class TestStep:
def test_step_returns_5_tuple(self, env):
env.reset(seed=42)
result = env.step({"action_type": 5, "message": "Hello"})
assert len(result) == 5
obs, reward, terminated, truncated, info = result
assert isinstance(obs, dict)
assert isinstance(reward, float)
assert isinstance(terminated, bool)
assert isinstance(truncated, bool)
assert isinstance(info, dict)
def test_step_without_reset_raises(self, env):
with pytest.raises(RuntimeError):
env.step({"action_type": 5})
def test_request_verification(self, env):
env.reset(seed=42)
obs, reward, _, _, info = env.step({"action_type": 0})
assert info["compliance"]["compliant"] is True
assert "Verified" in info["compliance"]["reason"]
def test_quote_policy_correct(self, env):
env.reset(seed=42)
# First verify to ensure cache is fresh
env.step({"action_type": 0})
# Quote a known v1_base value
obs, reward, _, _, info = env.step({
"action_type": 1,
"policy_field": "return.window_days",
"quoted_value": "30",
})
assert info["compliance"]["compliant"] is True
assert reward > 0
def test_quote_policy_incorrect(self, env):
env.reset(seed=42)
obs, reward, _, _, info = env.step({
"action_type": 1,
"policy_field": "return.window_days",
"quoted_value": "999",
})
assert info["compliance"]["compliant"] is False
def test_episode_terminates(self, env):
env.reset(seed=42)
done = False
steps = 0
while not done and steps < 100:
_, _, terminated, truncated, _ = env.step(
{"action_type": 5, "message": "OK"}
)
done = terminated or truncated
steps += 1
assert done
def test_info_contains_metrics(self, env):
env.reset(seed=42)
_, _, _, _, info = env.step({"action_type": 5, "message": "Hi"})
assert "compliance_failures" in info
assert "drift_detections" in info
assert "user_satisfaction" in info
assert "active_version" in info
assert "agent_version" in info
class TestRender:
def test_render_ansi(self, env):
env.reset(seed=42)
output = env.render(mode="ansi")
assert isinstance(output, str)
assert "FINEPRINT" in output
def test_render_before_reset(self, env):
result = env.render(mode="ansi")
assert result is None
class TestClose:
def test_close(self, env):
env.reset(seed=42)
env.close()
assert env.state is None
|