{
  "evaluation_name": "v18_hybrid_acoustic_gemma_heldout",
  "status": "pass",
  "count": 2000,
  "success_count": 2000,
  "effective_success_count": 2000,
  "error_count": 0,
  "hard_error_count": 0,
  "hard_error_ids": [],
  "truncated_count": 225,
  "in_memory_retry_count": 75,
  "acoustic_hint_count": 2000,
  "acoustic_hint_match": 0.976,
  "response_repaired_count": 2000,
  "generation_fallback_count": 75,
  "class_match": 0.976,
  "class_match_successful_only": 0.976,
  "clear_match": 0.989,
  "clear_match_successful_only": 0.989,
  "has_reason": 1.0,
  "has_reason_successful_only": 1.0,
  "has_corrective_cue": 1.0,
  "has_corrective_cue_successful_only": 1.0,
  "has_encouragement": 1.0,
  "has_encouragement_successful_only": 1.0,
  "format_exact": 1.0,
  "format_exact_successful_only": 1.0,
  "format_four_lines": 1.0,
  "format_four_lines_successful_only": 1.0,
  "detected_class_in_schema": 1.0,
  "detected_class_in_schema_successful_only": 1.0,
  "notes": [
    "This is the v18 hybrid acoustic+Gemma held-out evaluation.",
    "The lisp-class hint comes from acoustic features; Gemma generates the structured coaching response.",
    "Do not interpret these metrics as a pure direct-Gemma raw-audio classification result."
  ]
}