Spaces:
Running
Running
| """MOD 08 — Comprehensive unit tests for schemas and validators. | |
| Covers edge cases in every Pydantic model from replicalab.models and | |
| validator behaviour from replicalab.utils.validation that are not | |
| already tested in test_models.py and test_validation.py. | |
| """ | |
| from __future__ import annotations | |
| import pytest | |
| from pydantic import ValidationError | |
| from replicalab.models import ( | |
| ConversationEntry, | |
| EpisodeLog, | |
| EpisodeState, | |
| LabManagerAction, | |
| LabManagerActionType, | |
| LabManagerObservation, | |
| Observation, | |
| Protocol, | |
| RewardBreakdown, | |
| ScientistAction, | |
| ScientistActionType, | |
| ScientistObservation, | |
| StepInfo, | |
| StepResult, | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Helpers | |
| # --------------------------------------------------------------------------- | |
| def _minimal_accept() -> dict: | |
| return { | |
| "action_type": "accept", | |
| "sample_size": 0, | |
| "controls": [], | |
| "technique": "", | |
| "duration_days": 0, | |
| "required_equipment": [], | |
| "required_reagents": [], | |
| "questions": [], | |
| "rationale": "", | |
| } | |
| def _minimal_request_info() -> dict: | |
| return { | |
| "action_type": "request_info", | |
| "sample_size": 0, | |
| "controls": [], | |
| "technique": "", | |
| "duration_days": 0, | |
| "required_equipment": [], | |
| "required_reagents": [], | |
| "questions": ["What equipment is available?"], | |
| "rationale": "", | |
| } | |
| def _minimal_propose() -> dict: | |
| return { | |
| "action_type": "propose_protocol", | |
| "sample_size": 10, | |
| "controls": ["baseline"], | |
| "technique": "grid_search", | |
| "duration_days": 5, | |
| "required_equipment": ["compute"], | |
| "required_reagents": ["data"], | |
| "questions": [], | |
| "rationale": "A simple plan.", | |
| } | |
| def _lm_accept() -> dict: | |
| return { | |
| "action_type": "accept", | |
| "feasible": True, | |
| "budget_ok": True, | |
| "equipment_ok": True, | |
| "reagents_ok": True, | |
| "schedule_ok": True, | |
| "staff_ok": True, | |
| "suggested_technique": "", | |
| "suggested_sample_size": 0, | |
| "suggested_controls": [], | |
| "explanation": "All constraints are satisfied.", | |
| } | |
| def _lm_reject() -> dict: | |
| return { | |
| "action_type": "reject", | |
| "feasible": False, | |
| "budget_ok": True, | |
| "equipment_ok": False, | |
| "reagents_ok": True, | |
| "schedule_ok": True, | |
| "staff_ok": True, | |
| "suggested_technique": "", | |
| "suggested_sample_size": 0, | |
| "suggested_controls": [], | |
| "explanation": "The equipment is unavailable.", | |
| } | |
| def _lm_report() -> dict: | |
| return { | |
| "action_type": "report_feasibility", | |
| "feasible": True, | |
| "budget_ok": True, | |
| "equipment_ok": True, | |
| "reagents_ok": True, | |
| "schedule_ok": True, | |
| "staff_ok": True, | |
| "suggested_technique": "", | |
| "suggested_sample_size": 0, | |
| "suggested_controls": [], | |
| "explanation": "Feasible as proposed.", | |
| } | |
| # =================================================================== | |
| # ScientistAction — edge cases | |
| # =================================================================== | |
| class TestScientistActionEdgeCases: | |
| def test_accept_valid_minimal(self) -> None: | |
| action = ScientistAction.model_validate(_minimal_accept()) | |
| assert action.action_type is ScientistActionType.ACCEPT | |
| def test_accept_rejects_questions(self) -> None: | |
| payload = _minimal_accept() | |
| payload["questions"] = ["Why?"] | |
| with pytest.raises(ValidationError, match="questions must be empty for accept"): | |
| ScientistAction.model_validate(payload) | |
| def test_accept_rejects_protocol_payload(self) -> None: | |
| payload = _minimal_accept() | |
| payload["sample_size"] = 10 | |
| with pytest.raises(ValidationError, match="accept cannot include protocol"): | |
| ScientistAction.model_validate(payload) | |
| def test_revise_protocol_valid(self) -> None: | |
| payload = _minimal_propose() | |
| payload["action_type"] = "revise_protocol" | |
| action = ScientistAction.model_validate(payload) | |
| assert action.action_type is ScientistActionType.REVISE_PROTOCOL | |
| def test_revise_rejects_zero_sample(self) -> None: | |
| payload = _minimal_propose() | |
| payload["action_type"] = "revise_protocol" | |
| payload["sample_size"] = 0 | |
| with pytest.raises(ValidationError, match="sample_size must be >= 1"): | |
| ScientistAction.model_validate(payload) | |
| def test_propose_rejects_empty_technique(self) -> None: | |
| payload = _minimal_propose() | |
| payload["technique"] = "" | |
| with pytest.raises(ValidationError, match="technique is required"): | |
| ScientistAction.model_validate(payload) | |
| def test_propose_rejects_empty_rationale(self) -> None: | |
| payload = _minimal_propose() | |
| payload["rationale"] = "" | |
| with pytest.raises(ValidationError, match="rationale is required"): | |
| ScientistAction.model_validate(payload) | |
| def test_propose_rejects_questions(self) -> None: | |
| payload = _minimal_propose() | |
| payload["questions"] = ["Why?"] | |
| with pytest.raises(ValidationError, match="questions must be empty"): | |
| ScientistAction.model_validate(payload) | |
| def test_request_info_valid(self) -> None: | |
| action = ScientistAction.model_validate(_minimal_request_info()) | |
| assert action.action_type is ScientistActionType.REQUEST_INFO | |
| def test_whitespace_stripping_in_lists(self) -> None: | |
| payload = _minimal_propose() | |
| payload["controls"] = [" baseline ", " positive "] | |
| action = ScientistAction.model_validate(payload) | |
| assert action.controls == ["baseline", "positive"] | |
| def test_empty_string_in_list_rejects(self) -> None: | |
| payload = _minimal_propose() | |
| payload["controls"] = ["baseline", ""] | |
| with pytest.raises(ValidationError, match="non-empty"): | |
| ScientistAction.model_validate(payload) | |
| def test_whitespace_only_in_list_rejects(self) -> None: | |
| payload = _minimal_propose() | |
| payload["required_equipment"] = ["compute", " "] | |
| with pytest.raises(ValidationError, match="non-empty"): | |
| ScientistAction.model_validate(payload) | |
| def test_negative_sample_size_rejects(self) -> None: | |
| payload = _minimal_propose() | |
| payload["sample_size"] = -1 | |
| with pytest.raises(ValidationError): | |
| ScientistAction.model_validate(payload) | |
| def test_negative_duration_days_rejects(self) -> None: | |
| payload = _minimal_propose() | |
| payload["duration_days"] = -5 | |
| with pytest.raises(ValidationError): | |
| ScientistAction.model_validate(payload) | |
| def test_enum_value_access(self) -> None: | |
| assert ScientistActionType.PROPOSE_PROTOCOL.value == "propose_protocol" | |
| assert ScientistActionType.REVISE_PROTOCOL.value == "revise_protocol" | |
| assert ScientistActionType.REQUEST_INFO.value == "request_info" | |
| assert ScientistActionType.ACCEPT.value == "accept" | |
| # =================================================================== | |
| # LabManagerAction — edge cases | |
| # =================================================================== | |
| class TestLabManagerActionEdgeCases: | |
| def test_accept_valid(self) -> None: | |
| action = LabManagerAction.model_validate(_lm_accept()) | |
| assert action.action_type is LabManagerActionType.ACCEPT | |
| assert action.feasible is True | |
| def test_accept_rejects_infeasible(self) -> None: | |
| payload = _lm_accept() | |
| payload["feasible"] = False | |
| payload["equipment_ok"] = False | |
| with pytest.raises(ValidationError, match="accept requires feasible=true"): | |
| LabManagerAction.model_validate(payload) | |
| def test_reject_valid(self) -> None: | |
| action = LabManagerAction.model_validate(_lm_reject()) | |
| assert action.action_type is LabManagerActionType.REJECT | |
| assert action.feasible is False | |
| def test_reject_rejects_feasible(self) -> None: | |
| payload = _lm_reject() | |
| payload["feasible"] = True | |
| payload["equipment_ok"] = True | |
| with pytest.raises(ValidationError, match="reject requires feasible=false"): | |
| LabManagerAction.model_validate(payload) | |
| def test_report_feasibility_valid(self) -> None: | |
| action = LabManagerAction.model_validate(_lm_report()) | |
| assert action.action_type is LabManagerActionType.REPORT_FEASIBILITY | |
| def test_report_rejects_suggestion_fields(self) -> None: | |
| payload = _lm_report() | |
| payload["suggested_technique"] = "alternative_method" | |
| with pytest.raises(ValidationError, match="suggestion fields are only allowed"): | |
| LabManagerAction.model_validate(payload) | |
| def test_suggest_alternative_rejects_feasible(self) -> None: | |
| payload = _lm_reject() | |
| payload["action_type"] = "suggest_alternative" | |
| payload["feasible"] = True | |
| payload["equipment_ok"] = True | |
| payload["suggested_technique"] = "alt" | |
| with pytest.raises(ValidationError, match="suggest_alternative requires feasible=false"): | |
| LabManagerAction.model_validate(payload) | |
| def test_empty_explanation_rejects(self) -> None: | |
| payload = _lm_accept() | |
| payload["explanation"] = "" | |
| with pytest.raises(ValidationError, match="explanation is required"): | |
| LabManagerAction.model_validate(payload) | |
| def test_extra_fields_rejected(self) -> None: | |
| payload = _lm_accept() | |
| payload["extra"] = "nope" | |
| with pytest.raises(ValidationError, match="Extra inputs are not permitted"): | |
| LabManagerAction.model_validate(payload) | |
| def test_feasible_flag_mismatch_single_false(self) -> None: | |
| payload = _lm_accept() | |
| payload["schedule_ok"] = False | |
| with pytest.raises(ValidationError, match="feasible must equal the logical AND"): | |
| LabManagerAction.model_validate(payload) | |
| def test_enum_value_access(self) -> None: | |
| assert LabManagerActionType.REPORT_FEASIBILITY.value == "report_feasibility" | |
| assert LabManagerActionType.SUGGEST_ALTERNATIVE.value == "suggest_alternative" | |
| assert LabManagerActionType.REJECT.value == "reject" | |
| assert LabManagerActionType.ACCEPT.value == "accept" | |
| # =================================================================== | |
| # Protocol — edge cases | |
| # =================================================================== | |
| class TestProtocolEdgeCases: | |
| def test_valid_minimal(self) -> None: | |
| p = Protocol( | |
| sample_size=1, | |
| controls=[], | |
| technique="method", | |
| duration_days=1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="Reason.", | |
| ) | |
| assert p.sample_size == 1 | |
| def test_zero_sample_size_allowed(self) -> None: | |
| p = Protocol( | |
| sample_size=0, | |
| controls=[], | |
| technique="method", | |
| duration_days=1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="Reason.", | |
| ) | |
| assert p.sample_size == 0 | |
| def test_empty_technique_rejects(self) -> None: | |
| with pytest.raises(ValidationError, match="non-empty"): | |
| Protocol( | |
| sample_size=1, | |
| controls=[], | |
| technique="", | |
| duration_days=1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="Reason.", | |
| ) | |
| def test_empty_rationale_rejects(self) -> None: | |
| with pytest.raises(ValidationError, match="non-empty"): | |
| Protocol( | |
| sample_size=1, | |
| controls=[], | |
| technique="method", | |
| duration_days=1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="", | |
| ) | |
| def test_negative_sample_size_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| Protocol( | |
| sample_size=-1, | |
| controls=[], | |
| technique="method", | |
| duration_days=1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="Reason.", | |
| ) | |
| def test_negative_duration_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| Protocol( | |
| sample_size=1, | |
| controls=[], | |
| technique="method", | |
| duration_days=-1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="Reason.", | |
| ) | |
| def test_whitespace_stripping(self) -> None: | |
| p = Protocol( | |
| sample_size=1, | |
| controls=[" ctrl "], | |
| technique=" method ", | |
| duration_days=1, | |
| required_equipment=[" equip "], | |
| required_reagents=[" reagent "], | |
| rationale=" reason ", | |
| ) | |
| assert p.controls == ["ctrl"] | |
| assert p.technique == "method" | |
| assert p.required_equipment == ["equip"] | |
| assert p.required_reagents == ["reagent"] | |
| assert p.rationale == "reason" | |
| def test_empty_string_in_controls_rejects(self) -> None: | |
| with pytest.raises(ValidationError, match="non-empty"): | |
| Protocol( | |
| sample_size=1, | |
| controls=["good", ""], | |
| technique="method", | |
| duration_days=1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="Reason.", | |
| ) | |
| def test_extra_fields_rejected(self) -> None: | |
| with pytest.raises(ValidationError, match="Extra inputs are not permitted"): | |
| Protocol( | |
| sample_size=1, | |
| controls=[], | |
| technique="method", | |
| duration_days=1, | |
| required_equipment=[], | |
| required_reagents=[], | |
| rationale="Reason.", | |
| extra_field="bad", | |
| ) | |
| def test_json_round_trip(self) -> None: | |
| p = Protocol( | |
| sample_size=10, | |
| controls=["baseline", "positive"], | |
| technique="grid_search", | |
| duration_days=5, | |
| required_equipment=["compute"], | |
| required_reagents=["data"], | |
| rationale="Full plan.", | |
| ) | |
| restored = Protocol.model_validate_json(p.model_dump_json()) | |
| assert restored == p | |
| # =================================================================== | |
| # ConversationEntry — edge cases | |
| # =================================================================== | |
| class TestConversationEntryEdgeCases: | |
| def test_null_action_type_valid(self) -> None: | |
| entry = ConversationEntry( | |
| role="scientist", | |
| message="Hello", | |
| round_number=0, | |
| action_type=None, | |
| ) | |
| assert entry.action_type is None | |
| def test_empty_string_action_type_rejects(self) -> None: | |
| with pytest.raises(ValidationError, match="action_type must be null or a non-empty"): | |
| ConversationEntry( | |
| role="scientist", | |
| message="Hello", | |
| round_number=0, | |
| action_type="", | |
| ) | |
| def test_empty_message_rejects(self) -> None: | |
| with pytest.raises(ValidationError, match="message is required"): | |
| ConversationEntry( | |
| role="scientist", | |
| message="", | |
| round_number=0, | |
| action_type=None, | |
| ) | |
| def test_system_role_valid(self) -> None: | |
| entry = ConversationEntry( | |
| role="system", | |
| message="Round started.", | |
| round_number=0, | |
| action_type=None, | |
| ) | |
| assert entry.role == "system" | |
| def test_invalid_role_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| ConversationEntry( | |
| role="judge", | |
| message="Verdict.", | |
| round_number=0, | |
| action_type=None, | |
| ) | |
| def test_negative_round_number_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| ConversationEntry( | |
| role="scientist", | |
| message="Hello", | |
| round_number=-1, | |
| action_type=None, | |
| ) | |
| def test_extra_fields_rejected(self) -> None: | |
| with pytest.raises(ValidationError, match="Extra inputs are not permitted"): | |
| ConversationEntry( | |
| role="scientist", | |
| message="Hello", | |
| round_number=0, | |
| action_type=None, | |
| extra="bad", | |
| ) | |
| # =================================================================== | |
| # RewardBreakdown — edge cases | |
| # =================================================================== | |
| class TestRewardBreakdownEdgeCases: | |
| def test_default_values(self) -> None: | |
| rb = RewardBreakdown() | |
| assert rb.rigor == 0.0 | |
| assert rb.feasibility == 0.0 | |
| assert rb.fidelity == 0.0 | |
| assert rb.parsimony == 1.0 | |
| assert rb.efficiency_bonus == 0.0 | |
| assert rb.communication_bonus == 0.0 | |
| assert rb.penalties == {} | |
| def test_boundary_values_valid(self) -> None: | |
| rb = RewardBreakdown(rigor=0.0, feasibility=1.0, fidelity=0.5, parsimony=0.0) | |
| assert rb.rigor == 0.0 | |
| assert rb.feasibility == 1.0 | |
| assert rb.parsimony == 0.0 | |
| def test_rigor_above_one_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| RewardBreakdown(rigor=1.1) | |
| def test_rigor_below_zero_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| RewardBreakdown(rigor=-0.1) | |
| def test_feasibility_above_one_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| RewardBreakdown(feasibility=1.5) | |
| def test_fidelity_below_zero_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| RewardBreakdown(fidelity=-0.01) | |
| def test_parsimony_above_one_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| RewardBreakdown(parsimony=2.0) | |
| def test_penalties_dict_preserved(self) -> None: | |
| rb = RewardBreakdown(penalties={"timeout": 0.2, "stalling": 0.05}) | |
| assert rb.penalties["timeout"] == 0.2 | |
| assert rb.penalties["stalling"] == 0.05 | |
| def test_json_round_trip(self) -> None: | |
| rb = RewardBreakdown( | |
| rigor=0.7, | |
| feasibility=0.8, | |
| fidelity=0.6, | |
| parsimony=0.9, | |
| efficiency_bonus=0.3, | |
| penalties={"invalid_tool_use": 0.1}, | |
| ) | |
| restored = RewardBreakdown.model_validate_json(rb.model_dump_json()) | |
| assert restored == rb | |
| # =================================================================== | |
| # Observation — edge cases | |
| # =================================================================== | |
| class TestObservationEdgeCases: | |
| def test_both_none_valid(self) -> None: | |
| obs = Observation(scientist=None, lab_manager=None) | |
| assert obs.scientist is None | |
| assert obs.lab_manager is None | |
| def test_scientist_only_valid(self) -> None: | |
| obs = Observation( | |
| scientist=ScientistObservation( | |
| paper_title="T", | |
| paper_hypothesis="H", | |
| paper_method="M", | |
| paper_key_finding="F", | |
| experiment_goal="G", | |
| conversation_history=[], | |
| current_protocol=None, | |
| round_number=0, | |
| max_rounds=6, | |
| ), | |
| lab_manager=None, | |
| ) | |
| assert obs.scientist is not None | |
| assert obs.lab_manager is None | |
| def test_lab_manager_only_valid(self) -> None: | |
| obs = Observation( | |
| scientist=None, | |
| lab_manager=LabManagerObservation( | |
| budget_total=1000.0, | |
| budget_remaining=800.0, | |
| equipment_available=["compute"], | |
| equipment_booked=[], | |
| reagents_in_stock=["data"], | |
| reagents_out_of_stock=[], | |
| staff_count=2, | |
| time_limit_days=7, | |
| safety_restrictions=[], | |
| conversation_history=[], | |
| current_protocol=None, | |
| round_number=0, | |
| max_rounds=6, | |
| ), | |
| ) | |
| assert obs.scientist is None | |
| assert obs.lab_manager is not None | |
| def test_extra_fields_rejected(self) -> None: | |
| with pytest.raises(ValidationError, match="Extra inputs are not permitted"): | |
| Observation(scientist=None, lab_manager=None, judge=None) | |
| # =================================================================== | |
| # LabManagerObservation — edge cases | |
| # =================================================================== | |
| class TestLabManagerObservationEdgeCases: | |
| def test_negative_staff_count_rejects(self) -> None: | |
| with pytest.raises(ValidationError): | |
| LabManagerObservation( | |
| budget_total=1000.0, | |
| budget_remaining=800.0, | |
| equipment_available=[], | |
| equipment_booked=[], | |
| reagents_in_stock=[], | |
| reagents_out_of_stock=[], | |
| staff_count=-1, | |
| time_limit_days=7, | |
| safety_restrictions=[], | |
| conversation_history=[], | |
| current_protocol=None, | |
| round_number=0, | |
| max_rounds=6, | |
| ) | |
| def test_empty_string_in_equipment_rejects(self) -> None: | |
| with pytest.raises(ValidationError, match="non-empty"): | |
| LabManagerObservation( | |
| budget_total=1000.0, | |
| budget_remaining=800.0, | |
| equipment_available=["compute", ""], | |
| equipment_booked=[], | |
| reagents_in_stock=[], | |
| reagents_out_of_stock=[], | |
| staff_count=2, | |
| time_limit_days=7, | |
| safety_restrictions=[], | |
| conversation_history=[], | |
| current_protocol=None, | |
| round_number=0, | |
| max_rounds=6, | |
| ) | |
| def test_whitespace_stripping_in_inventory(self) -> None: | |
| obs = LabManagerObservation( | |
| budget_total=1000.0, | |
| budget_remaining=800.0, | |
| equipment_available=[" compute "], | |
| equipment_booked=[" scope "], | |
| reagents_in_stock=[" data "], | |
| reagents_out_of_stock=[" unobtainium "], | |
| staff_count=2, | |
| time_limit_days=7, | |
| safety_restrictions=[" no_fire "], | |
| conversation_history=[], | |
| current_protocol=None, | |
| round_number=0, | |
| max_rounds=6, | |
| ) | |
| assert obs.equipment_available == ["compute"] | |
| assert obs.equipment_booked == ["scope"] | |
| assert obs.reagents_in_stock == ["data"] | |
| assert obs.reagents_out_of_stock == ["unobtainium"] | |
| assert obs.safety_restrictions == ["no_fire"] | |
| # =================================================================== | |
| # StepInfo — edge cases | |
| # =================================================================== | |
| class TestStepInfoEdgeCases: | |
| def test_defaults(self) -> None: | |
| info = StepInfo() | |
| assert info.agreement_reached is False | |
| assert info.error is None | |
| assert info.reward_breakdown is None | |
| assert info.judge_notes is None | |
| assert info.verdict is None | |
| assert info.top_failure_reasons == [] | |
| def test_extra_fields_allowed(self) -> None: | |
| info = StepInfo(custom_key="value", debug_round=3) | |
| assert info.custom_key == "value" # type: ignore[attr-defined] | |
| assert info.debug_round == 3 # type: ignore[attr-defined] | |
| def test_json_round_trip_with_extras(self) -> None: | |
| info = StepInfo( | |
| agreement_reached=True, | |
| reward_breakdown=RewardBreakdown(rigor=0.9), | |
| judge_notes="Good.", | |
| verdict="accept", | |
| extra_metric=42, | |
| ) | |
| dumped = info.model_dump_json() | |
| restored = StepInfo.model_validate_json(dumped) | |
| assert restored.agreement_reached is True | |
| assert restored.reward_breakdown.rigor == 0.9 | |
| assert restored.model_extra.get("extra_metric") == 42 | |
| # =================================================================== | |
| # StepResult — edge cases | |
| # =================================================================== | |
| class TestStepResultEdgeCases: | |
| def test_defaults(self) -> None: | |
| result = StepResult() | |
| assert result.observation is None | |
| assert result.reward == 0.0 | |
| assert result.done is False | |
| assert isinstance(result.info, StepInfo) | |
| def test_with_observation(self) -> None: | |
| result = StepResult( | |
| observation=Observation(scientist=None, lab_manager=None), | |
| reward=3.5, | |
| done=True, | |
| ) | |
| assert result.reward == 3.5 | |
| assert result.done is True | |
| def test_json_round_trip(self) -> None: | |
| info = StepInfo(agreement_reached=True, verdict="accept") | |
| result = StepResult(reward=5.0, done=True, info=info) | |
| restored = StepResult.model_validate_json(result.model_dump_json()) | |
| assert restored.reward == 5.0 | |
| assert restored.info.verdict == "accept" | |
| # =================================================================== | |
| # EpisodeState — edge cases | |
| # =================================================================== | |
| class TestEpisodeStateEdgeCases: | |
| def test_defaults(self) -> None: | |
| state = EpisodeState() | |
| assert state.seed == 0 | |
| assert state.scenario_template == "" | |
| assert state.done is False | |
| assert state.current_protocol is None | |
| assert state.conversation_history == [] | |
| assert state.top_failure_reasons == [] | |
| def test_top_failure_reasons_preserved(self) -> None: | |
| state = EpisodeState( | |
| top_failure_reasons=["Low feasibility.", "Timeout applied."], | |
| ) | |
| assert len(state.top_failure_reasons) == 2 | |
| assert "Low feasibility." in state.top_failure_reasons | |
| # =================================================================== | |
| # EpisodeLog — edge cases | |
| # =================================================================== | |
| class TestEpisodeLogEdgeCases: | |
| def test_defaults(self) -> None: | |
| log = EpisodeLog() | |
| assert log.episode_id == "" | |
| assert log.seed == 0 | |
| assert log.final_state is None | |
| assert log.transcript == [] | |
| assert log.reward_breakdown is None | |
| assert log.top_failure_reasons == [] | |
| assert log.verdict == "" | |
| def test_top_failure_reasons_in_json_round_trip(self) -> None: | |
| log = EpisodeLog( | |
| episode_id="ep-fr", | |
| top_failure_reasons=["Feasibility too low.", "Timeout."], | |
| verdict="timeout", | |
| ) | |
| restored = EpisodeLog.model_validate_json(log.model_dump_json()) | |
| assert restored.top_failure_reasons == ["Feasibility too low.", "Timeout."] | |
| assert restored.verdict == "timeout" | |
| def test_model_dump_contains_all_keys(self) -> None: | |
| log = EpisodeLog(episode_id="ep-keys") | |
| dumped = log.model_dump() | |
| expected_keys = { | |
| "episode_id", "seed", "scenario_template", "difficulty", | |
| "final_state", "transcript", "reward_breakdown", "total_reward", | |
| "rounds_used", "agreement_reached", "judge_notes", "verdict", | |
| "top_failure_reasons", | |
| } | |
| assert expected_keys.issubset(set(dumped.keys())) | |