thomasjvu commited on
Commit
9cb59fc
·
verified ·
1 Parent(s): 879ed7a

Publish v18 hybrid eval summary

Browse files
Files changed (1) hide show
  1. eval_summary.json +37 -0
eval_summary.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "evaluation_name": "v18_hybrid_acoustic_gemma_heldout",
3
+ "status": "pass",
4
+ "count": 2000,
5
+ "success_count": 2000,
6
+ "effective_success_count": 2000,
7
+ "error_count": 0,
8
+ "hard_error_count": 0,
9
+ "hard_error_ids": [],
10
+ "truncated_count": 225,
11
+ "in_memory_retry_count": 75,
12
+ "acoustic_hint_count": 2000,
13
+ "acoustic_hint_match": 0.976,
14
+ "response_repaired_count": 2000,
15
+ "generation_fallback_count": 75,
16
+ "class_match": 0.976,
17
+ "class_match_successful_only": 0.976,
18
+ "clear_match": 0.989,
19
+ "clear_match_successful_only": 0.989,
20
+ "has_reason": 1.0,
21
+ "has_reason_successful_only": 1.0,
22
+ "has_corrective_cue": 1.0,
23
+ "has_corrective_cue_successful_only": 1.0,
24
+ "has_encouragement": 1.0,
25
+ "has_encouragement_successful_only": 1.0,
26
+ "format_exact": 1.0,
27
+ "format_exact_successful_only": 1.0,
28
+ "format_four_lines": 1.0,
29
+ "format_four_lines_successful_only": 1.0,
30
+ "detected_class_in_schema": 1.0,
31
+ "detected_class_in_schema_successful_only": 1.0,
32
+ "notes": [
33
+ "This is the v18 hybrid acoustic+Gemma held-out evaluation.",
34
+ "The lisp-class hint comes from acoustic features; Gemma generates the structured coaching response.",
35
+ "Do not interpret these metrics as a pure direct-Gemma raw-audio classification result."
36
+ ]
37
+ }