"""Tests for the load-test reporting helpers.""" from scripts.load_test import ( _build_results, _evaluate_response_quality, _select_headline_metric, _summarize_phase, ) class TestEvaluateResponseQuality: def test_grounded_success_requires_grounded_verified_recommendations(self): payload = { "recommendations": [ { "explanation": '"Battery lasts all day" [review_1]', "citations_verified": True, "confidence": {"is_grounded": True}, } ] } result = _evaluate_response_quality(payload, explain=True) assert result["grounded_success"] is True assert result["refusal_aware_success"] is True def test_refusal_counts_only_in_refusal_aware_mode(self): payload = { "recommendations": [ { "explanation": "I cannot recommend this product based on the available review evidence.", "citations_verified": None, "confidence": {"is_grounded": False}, } ] } result = _evaluate_response_quality(payload, explain=True) assert result["grounded_success"] is False assert result["refusal_aware_success"] is True class TestSummarizePhase: def test_summarizes_cache_and_quality_rates(self): samples = [ { "status": 200, "client_ms": 90.0, "server_ms": 60.0, "network_overhead_ms": 30.0, "server_timing_header_present": True, "cache_result_header_present": True, "cache_result": "exact", "grounded_success": True, "refusal_aware_success": True, }, { "status": 200, "client_ms": 220.0, "server_ms": 180.0, "network_overhead_ms": 40.0, "server_timing_header_present": True, "cache_result_header_present": True, "cache_result": "miss", "grounded_success": False, "refusal_aware_success": True, }, { "status": 503, "client_ms": None, "server_ms": None, "network_overhead_ms": None, "cache_result": "unknown", "grounded_success": None, "refusal_aware_success": None, }, ] summary = _summarize_phase(samples, explain=True) assert summary["successful"] == 2 assert summary["errors"] == 1 assert summary["cache_results"] == {"exact": 1, "miss": 1} assert summary["cache_hit_rate"] == 0.5 assert summary["header_presence"]["x_response_time_ms"] == { "present": 2, "missing": 0, } assert summary["header_presence"]["x_cache_result"] == { "present": 2, "missing": 0, } assert summary["cache_observability"]["available"] is True assert summary["api_quality"]["evaluated_requests"] == 2 assert summary["api_quality"]["grounded_success_rate"] == 0.5 assert summary["api_quality"]["refusal_aware_success_rate"] == 1.0 def test_reports_missing_cache_header_observability_explicitly(self): samples = [ { "status": 200, "client_ms": 80.0, "server_ms": 20.0, "network_overhead_ms": 60.0, "server_timing_header_present": True, "cache_result_header_present": False, "cache_result": "unknown", "grounded_success": True, "refusal_aware_success": True, } ] summary = _summarize_phase(samples, explain=True) assert summary["cache_results"] == {} assert summary["cache_hit_rate"] is None assert summary["header_presence"]["x_cache_result"] == { "present": 0, "missing": 1, } assert summary["cache_observability"] == { "available": False, "successful_responses": 1, "header_present_responses": 0, "missing_header_responses": 1, "reason": "X-Cache-Result header absent on successful responses", } class TestSelectHeadlineMetric: def test_prefers_server_p95_when_available(self): measured_summary = { "server_latency_ms": {"p95": 180.0}, "client_latency_ms": {"p95": 260.0}, } headline = _select_headline_metric(measured_summary, target_ms=500.0) assert headline["name"] == "steady_state_server_p95_ms" assert headline["value_ms"] == 180.0 assert headline["pass"] is True def test_falls_back_to_client_p95_without_server_metric(self): measured_summary = { "server_latency_ms": None, "client_latency_ms": {"p95": 260.0}, } headline = _select_headline_metric(measured_summary, target_ms=200.0) assert headline["name"] == "steady_state_client_p95_ms" assert headline["value_ms"] == 260.0 assert headline["pass"] is False class TestBuildResults: def test_cache_hits_is_none_when_cache_metadata_is_absent(self): measured_samples = [ { "status": 200, "client_ms": 80.0, "server_ms": 20.0, "network_overhead_ms": 60.0, "server_timing_header_present": True, "cache_result_header_present": False, "cache_result": "unknown", "grounded_success": True, "refusal_aware_success": True, } ] results = _build_results( warmup_samples=[], measured_samples=measured_samples, config={"explain": True}, target_ms=500.0, ) assert results["cache_hits"] is None assert results["cache_hit_rate"] is None assert results["measured"]["cache_observability"]["available"] is False