JorgeAV commited on
Commit
9eb985d
·
verified ·
1 Parent(s): 4786946

Phase 4 results - SmolLM2 decoder, 7 epochs

Browse files
Files changed (1) hide show
  1. results/phase4_smollm2.json +91 -0
results/phase4_smollm2.json ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_name": "phase4_smollm2",
3
+ "phase": "4",
4
+ "decoder": "SmolLM2-135M-Instruct",
5
+ "bridge": "LLaVA-1.5 MLP (768->576->576)",
6
+ "backbone": "dinov3",
7
+ "K": 3,
8
+ "stage1_epochs": 3,
9
+ "stage2_epochs_completed": 3,
10
+ "stage2_epochs_target": 7,
11
+ "bridge_lr": 0.001,
12
+ "lm_lr": 2e-05,
13
+ "core_lr": 5e-05,
14
+ "label_smoothing": 0.1,
15
+ "num_evidence_tokens": 8,
16
+ "gen_weight": 2.0,
17
+ "eval_history": [
18
+ {
19
+ "epoch": "S1E0",
20
+ "mc": 58.5,
21
+ "docvqa_anls": 0.0,
22
+ "chartqa_ra": 0.0,
23
+ "textvqa_vqa": 0.0,
24
+ "composite": 14.6
25
+ },
26
+ {
27
+ "epoch": "S1E1",
28
+ "mc": 58.0,
29
+ "docvqa_anls": 0.0,
30
+ "chartqa_ra": 0.0,
31
+ "textvqa_vqa": 0.0,
32
+ "composite": 14.5
33
+ },
34
+ {
35
+ "epoch": "S1E2",
36
+ "mc": 58.0,
37
+ "docvqa_anls": 0.0,
38
+ "chartqa_ra": 0.0,
39
+ "textvqa_vqa": 0.0,
40
+ "composite": 14.5
41
+ },
42
+ {
43
+ "epoch": "S2E0",
44
+ "mc": 58.0,
45
+ "docvqa_anls": 0.0,
46
+ "chartqa_ra": 0.0,
47
+ "textvqa_vqa": 0.0,
48
+ "composite": 14.5
49
+ },
50
+ {
51
+ "epoch": "S2E1",
52
+ "mc": 55.0,
53
+ "docvqa_anls": 0.0,
54
+ "chartqa_ra": 0.0,
55
+ "textvqa_vqa": 0.0,
56
+ "composite": 13.8
57
+ },
58
+ {
59
+ "epoch": "S2E2",
60
+ "mc": 56.0,
61
+ "docvqa_anls": 0.0,
62
+ "chartqa_ra": 0.0,
63
+ "textvqa_vqa": 0.0,
64
+ "composite": 14.0
65
+ },
66
+ {
67
+ "epoch": "S2E3",
68
+ "mc": 55.0,
69
+ "docvqa_anls": 0.0,
70
+ "chartqa_ra": 0.0,
71
+ "textvqa_vqa": 0.0,
72
+ "composite": 13.8
73
+ }
74
+ ],
75
+ "key_findings": [
76
+ "SmolLM2 pre-trained decoder generates real English text (vs degenerate repetition in Phase 3.x)",
77
+ "Gen losses dropped dramatically: ChartQA 4.98->3.40, DocVQA 7.53->4.52, TextVQA 10.33->3.97",
78
+ "Predictions are diverse and topically relevant but too verbose for exact-match metrics",
79
+ "Bridge MLP successfully projects JEPA 768d -> SmolLM2 576d space",
80
+ "MC accuracy holds at 55-58% (slight catastrophic forgetting during Stage 2)",
81
+ "Next steps: post-processing, shorter generation, greedy decoding"
82
+ ],
83
+ "improvements_over_phase3": [
84
+ "Replaced random-init 4-layer decoder (7M params) with SmolLM2-135M (135M params)",
85
+ "LLaVA-1.5 two-layer MLP bridge for nonlinear alignment",
86
+ "Label smoothing (0.1) for smoother gradient signal",
87
+ "Nucleus sampling + repetition penalty in eval",
88
+ "Two-stage training: bridge-only then full fine-tuning",
89
+ "SmolLM2 tokenizer (49K vocab) vs Qwen3 (152K vocab)"
90
+ ]
91
+ }