from __future__ import annotations import json import tempfile import unittest from pathlib import Path from analyzer import analyze_trace_file, duration_label from report_renderer import render_report from view_model import build_view_model class AnalyzerTests(unittest.TestCase): def write_codex_trace(self, messages: list[str]) -> Path: handle = tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".jsonl", delete=False) with handle: handle.write( json.dumps( { "timestamp": "2026-06-07T00:00:00Z", "type": "session_meta", "payload": {"originator": "codex_cli"}, } ) + "\n" ) for index, text in enumerate(messages, start=1): handle.write( json.dumps( { "timestamp": f"2026-06-07T00:0{index}:00Z", "type": "response_item", "payload": { "type": "message", "role": "assistant", "content": [{"type": "output_text", "text": text}], }, } ) + "\n" ) return Path(handle.name) def test_sample_trace_produces_structured_episode_and_redactions(self) -> None: result, narrative = analyze_trace_file(Path("examples/sample_trace_redacted.jsonl")) self.assertEqual(result.agent_type_guess, "codex") self.assertGreaterEqual(len(result.episodes), 1) self.assertGreater(result.redaction_count, 0) self.assertIn("[REDACTED_EMAIL]", narrative) self.assertIn("episodes", result.to_dict()) report = render_report(result) self.assertIn("Executive Summary", report) self.assertIn("Journey Timeline", report) self.assertIn("Outcome Claim Audit", report) def test_codebook_flags_premature_success_after_unresolved_workaround(self) -> None: path = self.write_codex_trace( [ "I will fix the auth timeout and run the login flow tests.", "The auth test still fails with a timeout and there is a risk that the retry loop hides the actual bug.", "I changed the timeout constant and skipped the flaky assertion as a workaround.", "Done, fixed, and complete; it should work now.", ] ) result, narrative = analyze_trace_file(path) episode = result.episodes[0] verdict = build_view_model(result, narrative)["verdict"] self.assertIn("still fails", episode.reported_difficulty) self.assertEqual(episode.detour_type, "premature_closure") self.assertEqual(episode.outcome_claim, "premature_success_claim") self.assertEqual(episode.recovery_pattern, "overconfident_recovery") self.assertEqual(verdict["honesty"], "overclaimed") def test_codebook_prefers_requirement_uncertainty_for_ambiguous_scope(self) -> None: path = self.write_codex_trace( [ "I need to clarify the export goal before touching shared behavior, then I will split the work into parser, renderer, and UI checks.", "The requirement is ambiguous: better could mean smaller files, richer metadata, or a different markdown layout, and compatibility conflicts with removing old keys.", "I will decompose this and narrow scope to a metadata-only export improvement, leaving the markdown layout unchanged.", "The metadata export is implemented and partially verified; the broader layout request remains out of scope.", ] ) result, _ = analyze_trace_file(path) episode = result.episodes[0] self.assertEqual(episode.difficulty_type, "requirement_uncertainty") self.assertIn("requirement is ambiguous", episode.reported_difficulty) self.assertIn("narrow scope", episode.strategy_after) def test_codebook_does_not_treat_initial_verification_plan_as_difficulty(self) -> None: path = self.write_codex_trace( [ "I will inspect the database migration and verify the rollback path.", "The migration has a compatibility risk because the old worker still reads the legacy column.", "Instead of dropping the column now, I will add the new column and keep both writes until the worker is updated.", "The safer migration is implemented and verified with forward and rollback checks.", ] ) result, _ = analyze_trace_file(path) episode = result.episodes[0] self.assertEqual(episode.difficulty_type, "compatibility_risk") self.assertIn("compatibility risk", episode.reported_difficulty) self.assertNotEqual(episode.detour_type, "rollback_or_reversal") def test_codebook_does_not_match_ci_inside_other_words(self) -> None: path = self.write_codex_trace( [ "I will trace the report rendering path and verify the empty-state behavior.", "The issue is that the empty report is not a parser failure; it is an expected no-episode state.", "Instead of forcing a fake episode, I will keep the empty state and make the copy explain the limitation.", "Implemented with a caveat: this only clarifies the report, it does not infer hidden reasoning.", ] ) result, _ = analyze_trace_file(path) episode = result.episodes[0] self.assertNotEqual(episode.difficulty_type, "environment_blocker") def test_duration_label_handles_iso_timestamps(self) -> None: self.assertEqual( duration_label("2026-06-06T10:00:00Z", "2026-06-06T10:03:12Z"), "3m 12s", ) if __name__ == "__main__": unittest.main()