lisper-gemma4-e2b-audio-full / eval_summary.json
thomasjvu's picture
Publish v18 hybrid eval summary
9cb59fc verified
{
"evaluation_name": "v18_hybrid_acoustic_gemma_heldout",
"status": "pass",
"count": 2000,
"success_count": 2000,
"effective_success_count": 2000,
"error_count": 0,
"hard_error_count": 0,
"hard_error_ids": [],
"truncated_count": 225,
"in_memory_retry_count": 75,
"acoustic_hint_count": 2000,
"acoustic_hint_match": 0.976,
"response_repaired_count": 2000,
"generation_fallback_count": 75,
"class_match": 0.976,
"class_match_successful_only": 0.976,
"clear_match": 0.989,
"clear_match_successful_only": 0.989,
"has_reason": 1.0,
"has_reason_successful_only": 1.0,
"has_corrective_cue": 1.0,
"has_corrective_cue_successful_only": 1.0,
"has_encouragement": 1.0,
"has_encouragement_successful_only": 1.0,
"format_exact": 1.0,
"format_exact_successful_only": 1.0,
"format_four_lines": 1.0,
"format_four_lines_successful_only": 1.0,
"detected_class_in_schema": 1.0,
"detected_class_in_schema_successful_only": 1.0,
"notes": [
"This is the v18 hybrid acoustic+Gemma held-out evaluation.",
"The lisp-class hint comes from acoustic features; Gemma generates the structured coaching response.",
"Do not interpret these metrics as a pure direct-Gemma raw-audio classification result."
]
}