sysadmin-env / tests /test_models.py
huggingmenfordays's picture
Upload folder using huggingface_hub
4732653 verified
from pydantic import ValidationError
from sysadmin_env.models import Action
from sysadmin_env.models import DifficultyTier
from sysadmin_env.models import Observation
from sysadmin_env.models import RewardSignal
from sysadmin_env.models import TaskMetadata
def test_action_valid():
a = Action(command="systemctl restart nginx")
assert a.command == "systemctl restart nginx"
assert a.reasoning is None
j = a.model_dump_json()
assert "systemctl restart nginx" in j
print("action valid ok")
def test_action_with_reasoning():
a = Action(command="journalctl -u nginx", reasoning="checking logs for crash cause")
assert a.reasoning == "checking logs for crash cause"
roundtrip = Action.model_validate_json(a.model_dump_json())
assert roundtrip == a
print("action with reasoning ok")
def test_action_empty_command_rejected():
try:
Action(command="")
raise AssertionError("should have raised")
except ValidationError:
pass
print("action empty command rejected ok")
def test_action_missing_command_rejected():
try:
Action.model_validate({})
raise AssertionError("should have raised")
except ValidationError:
pass
print("action missing command rejected ok")
def test_observation_valid():
o = Observation(
stdout="active (running)",
stderr="",
exit_code=0,
working_directory="/root",
execution_time=0.042,
reward=0.15,
done=False,
step_number=3,
max_steps=50,
)
j = o.model_dump_json()
roundtrip = Observation.model_validate_json(j)
assert roundtrip == o
print("observation valid ok")
def test_observation_negative_execution_time_rejected():
try:
Observation(
stdout="",
stderr="",
exit_code=0,
working_directory="/",
execution_time=-1.0,
reward=0.0,
done=False,
step_number=0,
max_steps=10,
)
raise AssertionError("should have raised")
except ValidationError:
pass
print("observation negative execution time rejected ok")
def test_observation_zero_max_steps_rejected():
try:
Observation(
stdout="",
stderr="",
exit_code=0,
working_directory="/",
execution_time=0.0,
reward=0.0,
done=False,
step_number=0,
max_steps=0,
)
raise AssertionError("should have raised")
except ValidationError:
pass
print("observation zero max steps rejected ok")
def test_task_metadata_valid():
t = TaskMetadata(
task_id="nginx_crash",
difficulty=DifficultyTier.easy,
description="nginx crashed with stale pid and config syntax error",
max_steps=50,
time_limit=300.0,
base_filesystem_path="/assets/nginx_crash",
)
j = t.model_dump_json()
roundtrip = TaskMetadata.model_validate_json(j)
assert roundtrip == t
assert roundtrip.difficulty == DifficultyTier.easy
print("task metadata valid ok")
def test_task_metadata_invalid_difficulty_rejected():
try:
TaskMetadata(
task_id="test",
difficulty="legendary",
description="invalid tier",
max_steps=10,
time_limit=60.0,
base_filesystem_path="/tmp/test",
)
raise AssertionError("should have raised")
except ValidationError:
pass
print("task metadata invalid difficulty rejected ok")
def test_task_metadata_empty_id_rejected():
try:
TaskMetadata(
task_id="",
difficulty="easy",
description="empty id",
max_steps=10,
time_limit=60.0,
base_filesystem_path="/tmp/test",
)
raise AssertionError("should have raised")
except ValidationError:
pass
print("task metadata empty id rejected ok")
def test_reward_signal_valid():
r = RewardSignal(
health_delta=0.25,
knowledge_delta=0.1,
action_penalty=-0.01,
total_reward=0.34,
)
j = r.model_dump_json()
roundtrip = RewardSignal.model_validate_json(j)
assert roundtrip == r
print("reward signal valid ok")
def test_reward_signal_positive_penalty_rejected():
try:
RewardSignal(
health_delta=0.0,
knowledge_delta=0.0,
action_penalty=0.5,
total_reward=0.5,
)
raise AssertionError("should have raised")
except ValidationError:
pass
print("reward signal positive penalty rejected ok")
def test_reward_signal_negative_knowledge_rejected():
try:
RewardSignal(
health_delta=0.0,
knowledge_delta=-0.1,
action_penalty=-0.01,
total_reward=-0.11,
)
raise AssertionError("should have raised")
except ValidationError:
pass
print("reward signal negative knowledge rejected ok")
def test_all_models_serialization_roundtrip():
"""confirms all four models survive json roundtrip"""
action = Action(command="ls -la", reasoning="listing files")
obs = Observation(
stdout="total 0",
stderr="",
exit_code=0,
working_directory="/root",
execution_time=0.001,
reward=-0.01,
done=False,
step_number=1,
max_steps=50,
)
task = TaskMetadata(
task_id="disk_full",
difficulty=DifficultyTier.medium,
description="hidden sparse log file filling loopback mount",
max_steps=75,
time_limit=600.0,
base_filesystem_path="/assets/disk_full",
)
reward = RewardSignal(
health_delta=0.0,
knowledge_delta=0.05,
action_penalty=-0.01,
total_reward=0.04,
)
for model_instance in [action, obs, task, reward]:
json_str = model_instance.model_dump_json()
restored = type(model_instance).model_validate_json(json_str)
assert restored == model_instance
print("all models serialization roundtrip ok")
if __name__ == "__main__":
test_action_valid()
test_action_with_reasoning()
test_action_empty_command_rejected()
test_action_missing_command_rejected()
test_observation_valid()
test_observation_negative_execution_time_rejected()
test_observation_zero_max_steps_rejected()
test_task_metadata_valid()
test_task_metadata_invalid_difficulty_rejected()
test_task_metadata_empty_id_rejected()
test_reward_signal_valid()
test_reward_signal_positive_penalty_rejected()
test_reward_signal_negative_knowledge_rejected()
test_all_models_serialization_roundtrip()
print("all phase one tests passed")