{ "evaluation_name": "v18_hybrid_acoustic_gemma_heldout", "status": "pass", "count": 2000, "success_count": 2000, "effective_success_count": 2000, "error_count": 0, "hard_error_count": 0, "hard_error_ids": [], "truncated_count": 225, "in_memory_retry_count": 75, "acoustic_hint_count": 2000, "acoustic_hint_match": 0.976, "response_repaired_count": 2000, "generation_fallback_count": 75, "class_match": 0.976, "class_match_successful_only": 0.976, "clear_match": 0.989, "clear_match_successful_only": 0.989, "has_reason": 1.0, "has_reason_successful_only": 1.0, "has_corrective_cue": 1.0, "has_corrective_cue_successful_only": 1.0, "has_encouragement": 1.0, "has_encouragement_successful_only": 1.0, "format_exact": 1.0, "format_exact_successful_only": 1.0, "format_four_lines": 1.0, "format_four_lines_successful_only": 1.0, "detected_class_in_schema": 1.0, "detected_class_in_schema_successful_only": 1.0, "notes": [ "This is the v18 hybrid acoustic+Gemma held-out evaluation.", "The lisp-class hint comes from acoustic features; Gemma generates the structured coaching response.", "Do not interpret these metrics as a pure direct-Gemma raw-audio classification result." ] }