andyjzhao commited on
Commit
8a87f50
·
verified ·
1 Parent(s): ac5f15e

Upload folder using huggingface_hub

Browse files
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73e0bebd95a2e6a37ad397ea656dc2d8e18c70e39c9590d7d105ea57d54f2f98
3
+ size 558664971
checkpoint-1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac6fa33968d4ed26b5e442c2cc888a6a86a98b5325a529d3bfea288be0093c5b
3
+ size 279336283
checkpoint-1000/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b093dfe59b41efeb45cc3d628d3360abaa2303bbaa489081411faf431e52941d
3
+ size 16389
checkpoint-1000/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:450a0ac1645503c0b14fe9c37d77060cc76b1c9942dcfdd0e779cd526b2e98d9
3
+ size 16389
checkpoint-1000/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:938b37918eac9a4cbef3805f7d2abdcef094a334f848e73ac19fcdc39d38663a
3
+ size 16389
checkpoint-1000/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8b27a54988f134299ab296b95e8c1e63d476dffdba7c6f120f2076e8688f355
3
+ size 16389
checkpoint-1000/rng_state_4.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d95f73d920296d5d9558e47894c5a2c0d649d7cb10a3b07a013d6bfbd3b8cf90
3
+ size 16389
checkpoint-1000/rng_state_5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70b945bb634c9daf4a00433296ecc5245b34a2b5f09017993b5f5f03b84dabea
3
+ size 16389
checkpoint-1000/rng_state_6.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfdd1fca0dace16a59c8592c531a70661218184bb0249c5862bbfb5ab0844fc9
3
+ size 16389
checkpoint-1000/rng_state_7.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d106363f9f1b0ff898c86d083a097bf22fd84de35e5670aa299504abcc99752a
3
+ size 16389
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f10ac0b70778fb49c17b30c87ede08deb3a2bbd486b8970ed9515f2cf51c5eb
3
+ size 1529
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,2012 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1000,
3
+ "best_metric": 3.2542863563067255,
4
+ "best_model_checkpoint": "/gpfs/scratch/guoh/DNAFM/output/gencode_human_12.8k_12800/GeneZip-4R_12.8K-100B/checkpoint-1000",
5
+ "epoch": 0.28368794326241137,
6
+ "eval_steps": 500,
7
+ "global_step": 1000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "comp/rl_weight": 0.03,
14
+ "comp/strictness": 1.0,
15
+ "epoch": 0.0028368794326241137,
16
+ "grad_norm": 1631.21875,
17
+ "loss": 88.5514,
18
+ "loss_ce": 61.01824188232422,
19
+ "loss_region": 0.08141374588012695,
20
+ "loss_total": 61.09965515136719,
21
+ "lr": 2.20454076850486e-05,
22
+ "router/avg_deficit_s0": 0.0,
23
+ "router/avg_deficit_s1": 0.0,
24
+ "router/selected_tokens_s0": 829.25,
25
+ "router/selected_tokens_s1": 174.53125,
26
+ "router/trigger_rate_s0": 0.0,
27
+ "router/trigger_rate_s1": 0.0,
28
+ "step": 10,
29
+ "tokens_trained": 0.03276544
30
+ },
31
+ {
32
+ "comp/rl_weight": 0.03,
33
+ "comp/strictness": 1.0,
34
+ "epoch": 0.005673758865248227,
35
+ "grad_norm": 434.8985595703125,
36
+ "loss": 27.284,
37
+ "loss_ce": 15.031281471252441,
38
+ "loss_region": 0.13326193392276764,
39
+ "loss_total": 15.164543151855469,
40
+ "lr": 4.654030511288038e-05,
41
+ "router/avg_deficit_s0": 7.0,
42
+ "router/avg_deficit_s1": 7.0,
43
+ "router/selected_tokens_s0": 1.0,
44
+ "router/selected_tokens_s1": 1.0,
45
+ "router/trigger_rate_s0": 1.0,
46
+ "router/trigger_rate_s1": 1.0,
47
+ "step": 20,
48
+ "tokens_trained": 0.06553088
49
+ },
50
+ {
51
+ "comp/rl_weight": 0.03,
52
+ "comp/strictness": 1.0,
53
+ "epoch": 0.00851063829787234,
54
+ "grad_norm": 583.2597045898438,
55
+ "loss": 10.2036,
56
+ "loss_ce": 7.8456501960754395,
57
+ "loss_region": 0.06420987844467163,
58
+ "loss_total": 7.909860134124756,
59
+ "lr": 7.103520254071216e-05,
60
+ "router/avg_deficit_s0": 0.0,
61
+ "router/avg_deficit_s1": 7.0,
62
+ "router/selected_tokens_s0": 99.875,
63
+ "router/selected_tokens_s1": 1.0,
64
+ "router/trigger_rate_s0": 0.0,
65
+ "router/trigger_rate_s1": 1.0,
66
+ "step": 30,
67
+ "tokens_trained": 0.09829632
68
+ },
69
+ {
70
+ "comp/rl_weight": 0.03,
71
+ "comp/strictness": 1.0,
72
+ "epoch": 0.011347517730496455,
73
+ "grad_norm": 476.65606689453125,
74
+ "loss": 14.0147,
75
+ "loss_ce": 18.72601318359375,
76
+ "loss_region": 0.13252411782741547,
77
+ "loss_total": 18.858537673950195,
78
+ "lr": 9.553009996854394e-05,
79
+ "router/avg_deficit_s0": 7.0,
80
+ "router/avg_deficit_s1": 7.0,
81
+ "router/selected_tokens_s0": 1.0,
82
+ "router/selected_tokens_s1": 1.0,
83
+ "router/trigger_rate_s0": 1.0,
84
+ "router/trigger_rate_s1": 1.0,
85
+ "step": 40,
86
+ "tokens_trained": 0.13106176
87
+ },
88
+ {
89
+ "comp/rl_weight": 0.03,
90
+ "comp/strictness": 1.0,
91
+ "epoch": 0.014184397163120567,
92
+ "grad_norm": 574.13232421875,
93
+ "loss": 11.397,
94
+ "loss_ce": 12.435722351074219,
95
+ "loss_region": 0.13453753292560577,
96
+ "loss_total": 12.570260047912598,
97
+ "lr": 0.00012002499739637572,
98
+ "router/avg_deficit_s0": 7.0,
99
+ "router/avg_deficit_s1": 7.0,
100
+ "router/selected_tokens_s0": 1.0,
101
+ "router/selected_tokens_s1": 1.0,
102
+ "router/trigger_rate_s0": 1.0,
103
+ "router/trigger_rate_s1": 1.0,
104
+ "step": 50,
105
+ "tokens_trained": 0.1638272
106
+ },
107
+ {
108
+ "comp/rl_weight": 0.03,
109
+ "comp/strictness": 1.0,
110
+ "epoch": 0.01702127659574468,
111
+ "grad_norm": 572.3484497070312,
112
+ "loss": 11.4654,
113
+ "loss_ce": 15.20344066619873,
114
+ "loss_region": 0.1364777684211731,
115
+ "loss_total": 15.33991813659668,
116
+ "lr": 0.00014451989482420748,
117
+ "router/avg_deficit_s0": 7.0,
118
+ "router/avg_deficit_s1": 7.0,
119
+ "router/selected_tokens_s0": 1.0,
120
+ "router/selected_tokens_s1": 1.0,
121
+ "router/trigger_rate_s0": 1.0,
122
+ "router/trigger_rate_s1": 1.0,
123
+ "step": 60,
124
+ "tokens_trained": 0.19659264
125
+ },
126
+ {
127
+ "comp/rl_weight": 0.03,
128
+ "comp/strictness": 1.0,
129
+ "epoch": 0.019858156028368795,
130
+ "grad_norm": 506.12725830078125,
131
+ "loss": 10.0282,
132
+ "loss_ce": 7.821432590484619,
133
+ "loss_region": 0.12491649389266968,
134
+ "loss_total": 7.946349143981934,
135
+ "lr": 0.00016901479225203927,
136
+ "router/avg_deficit_s0": 7.0,
137
+ "router/avg_deficit_s1": 7.0,
138
+ "router/selected_tokens_s0": 1.0,
139
+ "router/selected_tokens_s1": 1.0,
140
+ "router/trigger_rate_s0": 1.0,
141
+ "router/trigger_rate_s1": 1.0,
142
+ "step": 70,
143
+ "tokens_trained": 0.22935808
144
+ },
145
+ {
146
+ "comp/rl_weight": 0.03,
147
+ "comp/strictness": 1.0,
148
+ "epoch": 0.02269503546099291,
149
+ "grad_norm": 327.83026123046875,
150
+ "loss": 9.0494,
151
+ "loss_ce": 13.648162841796875,
152
+ "loss_region": 0.1402742862701416,
153
+ "loss_total": 13.788436889648438,
154
+ "lr": 0.00019350968967987104,
155
+ "router/avg_deficit_s0": 7.0,
156
+ "router/avg_deficit_s1": 7.0,
157
+ "router/selected_tokens_s0": 1.0,
158
+ "router/selected_tokens_s1": 1.0,
159
+ "router/trigger_rate_s0": 1.0,
160
+ "router/trigger_rate_s1": 1.0,
161
+ "step": 80,
162
+ "tokens_trained": 0.26212352
163
+ },
164
+ {
165
+ "comp/rl_weight": 0.03,
166
+ "comp/strictness": 1.0,
167
+ "epoch": 0.02553191489361702,
168
+ "grad_norm": 533.38037109375,
169
+ "loss": 10.6353,
170
+ "loss_ce": 11.381054878234863,
171
+ "loss_region": 0.12512199580669403,
172
+ "loss_total": 11.506176948547363,
173
+ "lr": 0.0002180045871077028,
174
+ "router/avg_deficit_s0": 7.0,
175
+ "router/avg_deficit_s1": 7.0,
176
+ "router/selected_tokens_s0": 1.0,
177
+ "router/selected_tokens_s1": 1.0,
178
+ "router/trigger_rate_s0": 1.0,
179
+ "router/trigger_rate_s1": 1.0,
180
+ "step": 90,
181
+ "tokens_trained": 0.29488896
182
+ },
183
+ {
184
+ "comp/rl_weight": 0.03,
185
+ "comp/strictness": 1.0,
186
+ "epoch": 0.028368794326241134,
187
+ "grad_norm": 491.7203063964844,
188
+ "loss": 10.7556,
189
+ "loss_ce": 14.666193962097168,
190
+ "loss_region": 0.13100658357143402,
191
+ "loss_total": 14.797200202941895,
192
+ "lr": 0.00024249948453553463,
193
+ "router/avg_deficit_s0": 7.0,
194
+ "router/avg_deficit_s1": 7.0,
195
+ "router/selected_tokens_s0": 1.0,
196
+ "router/selected_tokens_s1": 1.0,
197
+ "router/trigger_rate_s0": 1.0,
198
+ "router/trigger_rate_s1": 1.0,
199
+ "step": 100,
200
+ "tokens_trained": 0.3276544
201
+ },
202
+ {
203
+ "comp/rl_weight": 0.03,
204
+ "comp/strictness": 1.0,
205
+ "epoch": 0.031205673758865248,
206
+ "grad_norm": 412.38726806640625,
207
+ "loss": 8.1796,
208
+ "loss_ce": 7.042880535125732,
209
+ "loss_region": 0.12635233998298645,
210
+ "loss_total": 7.1692328453063965,
211
+ "lr": 0.00026699438196336637,
212
+ "router/avg_deficit_s0": 7.0,
213
+ "router/avg_deficit_s1": 7.0,
214
+ "router/selected_tokens_s0": 1.0,
215
+ "router/selected_tokens_s1": 1.0,
216
+ "router/trigger_rate_s0": 1.0,
217
+ "router/trigger_rate_s1": 1.0,
218
+ "step": 110,
219
+ "tokens_trained": 0.36041984
220
+ },
221
+ {
222
+ "comp/rl_weight": 0.03,
223
+ "comp/strictness": 1.0,
224
+ "epoch": 0.03404255319148936,
225
+ "grad_norm": 380.94964599609375,
226
+ "loss": 10.906,
227
+ "loss_ce": 11.391271591186523,
228
+ "loss_region": 0.13119234144687653,
229
+ "loss_total": 11.52246379852295,
230
+ "lr": 0.00029148927939119814,
231
+ "router/avg_deficit_s0": 6.96875,
232
+ "router/avg_deficit_s1": 7.0,
233
+ "router/selected_tokens_s0": 1.03125,
234
+ "router/selected_tokens_s1": 1.0,
235
+ "router/trigger_rate_s0": 1.0,
236
+ "router/trigger_rate_s1": 1.0,
237
+ "step": 120,
238
+ "tokens_trained": 0.39318448
239
+ },
240
+ {
241
+ "comp/rl_weight": 0.03,
242
+ "comp/strictness": 1.0,
243
+ "epoch": 0.03687943262411347,
244
+ "grad_norm": 199.84938049316406,
245
+ "loss": 9.1563,
246
+ "loss_ce": 3.2591464519500732,
247
+ "loss_region": 0.06539206206798553,
248
+ "loss_total": 3.324538469314575,
249
+ "lr": 0.00031598417681902996,
250
+ "router/avg_deficit_s0": 5.0,
251
+ "router/avg_deficit_s1": 7.0,
252
+ "router/selected_tokens_s0": 47.1875,
253
+ "router/selected_tokens_s1": 1.0,
254
+ "router/trigger_rate_s0": 0.03125,
255
+ "router/trigger_rate_s1": 1.0,
256
+ "step": 130,
257
+ "tokens_trained": 0.42594992
258
+ },
259
+ {
260
+ "comp/rl_weight": 0.03,
261
+ "comp/strictness": 1.0,
262
+ "epoch": 0.03971631205673759,
263
+ "grad_norm": 296.4170227050781,
264
+ "loss": 12.2809,
265
+ "loss_ce": 11.650270462036133,
266
+ "loss_region": 0.09621138125658035,
267
+ "loss_total": 11.746481895446777,
268
+ "lr": 0.00034047907424686173,
269
+ "router/avg_deficit_s0": 0.0,
270
+ "router/avg_deficit_s1": 7.0,
271
+ "router/selected_tokens_s0": 2378.96875,
272
+ "router/selected_tokens_s1": 1.0,
273
+ "router/trigger_rate_s0": 0.0,
274
+ "router/trigger_rate_s1": 1.0,
275
+ "step": 140,
276
+ "tokens_trained": 0.45871536
277
+ },
278
+ {
279
+ "comp/rl_weight": 0.03,
280
+ "comp/strictness": 1.0,
281
+ "epoch": 0.0425531914893617,
282
+ "grad_norm": 286.05914306640625,
283
+ "loss": 9.714,
284
+ "loss_ce": 13.918790817260742,
285
+ "loss_region": 0.1489456295967102,
286
+ "loss_total": 14.067736625671387,
287
+ "lr": 0.0003649739716746935,
288
+ "router/avg_deficit_s0": 0.0,
289
+ "router/avg_deficit_s1": 7.0,
290
+ "router/selected_tokens_s0": 4390.6875,
291
+ "router/selected_tokens_s1": 1.0,
292
+ "router/trigger_rate_s0": 0.0,
293
+ "router/trigger_rate_s1": 1.0,
294
+ "step": 150,
295
+ "tokens_trained": 0.4914808
296
+ },
297
+ {
298
+ "comp/rl_weight": 0.03,
299
+ "comp/strictness": 1.0,
300
+ "epoch": 0.04539007092198582,
301
+ "grad_norm": 256.81048583984375,
302
+ "loss": 7.6435,
303
+ "loss_ce": 7.968850135803223,
304
+ "loss_region": 0.1997712403535843,
305
+ "loss_total": 8.168621063232422,
306
+ "lr": 0.00038946886910252526,
307
+ "router/avg_deficit_s0": 0.0,
308
+ "router/avg_deficit_s1": 7.0,
309
+ "router/selected_tokens_s0": 6185.8125,
310
+ "router/selected_tokens_s1": 1.0,
311
+ "router/trigger_rate_s0": 0.0,
312
+ "router/trigger_rate_s1": 1.0,
313
+ "step": 160,
314
+ "tokens_trained": 0.52424544
315
+ },
316
+ {
317
+ "comp/rl_weight": 0.03,
318
+ "comp/strictness": 1.0,
319
+ "epoch": 0.04822695035460993,
320
+ "grad_norm": 223.51954650878906,
321
+ "loss": 5.6721,
322
+ "loss_ce": 7.768090724945068,
323
+ "loss_region": 0.10872498154640198,
324
+ "loss_total": 7.8768157958984375,
325
+ "lr": 0.0004139637665303571,
326
+ "router/avg_deficit_s0": 0.0,
327
+ "router/avg_deficit_s1": 7.0,
328
+ "router/selected_tokens_s0": 2846.78125,
329
+ "router/selected_tokens_s1": 1.0,
330
+ "router/trigger_rate_s0": 0.0,
331
+ "router/trigger_rate_s1": 1.0,
332
+ "step": 170,
333
+ "tokens_trained": 0.55701008
334
+ },
335
+ {
336
+ "comp/rl_weight": 0.03,
337
+ "comp/strictness": 1.0,
338
+ "epoch": 0.05106382978723404,
339
+ "grad_norm": 218.51199340820312,
340
+ "loss": 5.0097,
341
+ "loss_ce": 4.9551897048950195,
342
+ "loss_region": 0.11287109553813934,
343
+ "loss_total": 5.068060874938965,
344
+ "lr": 0.0004384586639581888,
345
+ "router/avg_deficit_s0": 0.0,
346
+ "router/avg_deficit_s1": 7.0,
347
+ "router/selected_tokens_s0": 3095.65625,
348
+ "router/selected_tokens_s1": 1.0,
349
+ "router/trigger_rate_s0": 0.0,
350
+ "router/trigger_rate_s1": 1.0,
351
+ "step": 180,
352
+ "tokens_trained": 0.58977552
353
+ },
354
+ {
355
+ "comp/rl_weight": 0.03,
356
+ "comp/strictness": 1.0,
357
+ "epoch": 0.05390070921985816,
358
+ "grad_norm": 111.65990447998047,
359
+ "loss": 4.0707,
360
+ "loss_ce": 1.8088655471801758,
361
+ "loss_region": 0.1163250282406807,
362
+ "loss_total": 1.9251905679702759,
363
+ "lr": 0.0004629535613860206,
364
+ "router/avg_deficit_s0": 0.0,
365
+ "router/avg_deficit_s1": 7.0,
366
+ "router/selected_tokens_s0": 3243.25,
367
+ "router/selected_tokens_s1": 1.0,
368
+ "router/trigger_rate_s0": 0.0,
369
+ "router/trigger_rate_s1": 1.0,
370
+ "step": 190,
371
+ "tokens_trained": 0.62254016
372
+ },
373
+ {
374
+ "comp/rl_weight": 0.03,
375
+ "comp/strictness": 1.0,
376
+ "epoch": 0.05673758865248227,
377
+ "grad_norm": 202.0291748046875,
378
+ "loss": 3.8831,
379
+ "loss_ce": 4.9208855628967285,
380
+ "loss_region": 0.15217137336730957,
381
+ "loss_total": 5.073057174682617,
382
+ "lr": 0.00048744845881385244,
383
+ "router/avg_deficit_s0": 0.0,
384
+ "router/avg_deficit_s1": 7.0,
385
+ "router/selected_tokens_s0": 4667.375,
386
+ "router/selected_tokens_s1": 1.0,
387
+ "router/trigger_rate_s0": 0.0,
388
+ "router/trigger_rate_s1": 1.0,
389
+ "step": 200,
390
+ "tokens_trained": 0.6553056
391
+ },
392
+ {
393
+ "comp/rl_weight": 0.03,
394
+ "comp/strictness": 1.0,
395
+ "epoch": 0.059574468085106386,
396
+ "grad_norm": 148.11932373046875,
397
+ "loss": 5.9535,
398
+ "loss_ce": 6.731753349304199,
399
+ "loss_region": 0.1624327301979065,
400
+ "loss_total": 6.894186019897461,
401
+ "lr": 0.0005119433562416841,
402
+ "router/avg_deficit_s0": 0.0,
403
+ "router/avg_deficit_s1": 7.0,
404
+ "router/selected_tokens_s0": 5666.625,
405
+ "router/selected_tokens_s1": 1.0,
406
+ "router/trigger_rate_s0": 0.0,
407
+ "router/trigger_rate_s1": 1.0,
408
+ "step": 210,
409
+ "tokens_trained": 0.68807104
410
+ },
411
+ {
412
+ "comp/rl_weight": 0.03,
413
+ "comp/strictness": 1.0,
414
+ "epoch": 0.062411347517730496,
415
+ "grad_norm": 170.54896545410156,
416
+ "loss": 6.0952,
417
+ "loss_ce": 6.673608303070068,
418
+ "loss_region": 0.05599266663193703,
419
+ "loss_total": 6.72960090637207,
420
+ "lr": 0.0005364382536695159,
421
+ "router/avg_deficit_s0": 0.0,
422
+ "router/avg_deficit_s1": 7.0,
423
+ "router/selected_tokens_s0": 173.8125,
424
+ "router/selected_tokens_s1": 1.0,
425
+ "router/trigger_rate_s0": 0.0,
426
+ "router/trigger_rate_s1": 1.0,
427
+ "step": 220,
428
+ "tokens_trained": 0.72083648
429
+ },
430
+ {
431
+ "comp/rl_weight": 0.03,
432
+ "comp/strictness": 1.0,
433
+ "epoch": 0.06524822695035461,
434
+ "grad_norm": 97.6951904296875,
435
+ "loss": 5.0635,
436
+ "loss_ce": 4.121210098266602,
437
+ "loss_region": 0.20329658687114716,
438
+ "loss_total": 4.324506759643555,
439
+ "lr": 0.0005609331510973477,
440
+ "router/avg_deficit_s0": 0.0,
441
+ "router/avg_deficit_s1": 7.0,
442
+ "router/selected_tokens_s0": 6635.03125,
443
+ "router/selected_tokens_s1": 1.0,
444
+ "router/trigger_rate_s0": 0.0,
445
+ "router/trigger_rate_s1": 1.0,
446
+ "step": 230,
447
+ "tokens_trained": 0.75360192
448
+ },
449
+ {
450
+ "comp/rl_weight": 0.03,
451
+ "comp/strictness": 1.0,
452
+ "epoch": 0.06808510638297872,
453
+ "grad_norm": 138.48391723632812,
454
+ "loss": 3.7867,
455
+ "loss_ce": 4.167834281921387,
456
+ "loss_region": 0.06501111388206482,
457
+ "loss_total": 4.232845306396484,
458
+ "lr": 0.0005854280485251795,
459
+ "router/avg_deficit_s0": 0.0,
460
+ "router/avg_deficit_s1": 7.0,
461
+ "router/selected_tokens_s0": 1004.15625,
462
+ "router/selected_tokens_s1": 1.0,
463
+ "router/trigger_rate_s0": 0.0,
464
+ "router/trigger_rate_s1": 1.0,
465
+ "step": 240,
466
+ "tokens_trained": 0.78636736
467
+ },
468
+ {
469
+ "comp/rl_weight": 0.03,
470
+ "comp/strictness": 1.0,
471
+ "epoch": 0.07092198581560284,
472
+ "grad_norm": 132.49085998535156,
473
+ "loss": 3.4818,
474
+ "loss_ce": 4.135341167449951,
475
+ "loss_region": 0.059110887348651886,
476
+ "loss_total": 4.194452285766602,
477
+ "lr": 0.0006099229459530113,
478
+ "router/avg_deficit_s0": 0.0,
479
+ "router/avg_deficit_s1": 7.0,
480
+ "router/selected_tokens_s0": 102.625,
481
+ "router/selected_tokens_s1": 1.0,
482
+ "router/trigger_rate_s0": 0.0,
483
+ "router/trigger_rate_s1": 1.0,
484
+ "step": 250,
485
+ "tokens_trained": 0.8191328
486
+ },
487
+ {
488
+ "comp/rl_weight": 0.03,
489
+ "comp/strictness": 1.0,
490
+ "epoch": 0.07375886524822695,
491
+ "grad_norm": 114.51081848144531,
492
+ "loss": 3.9975,
493
+ "loss_ce": 4.087859630584717,
494
+ "loss_region": 0.06149622052907944,
495
+ "loss_total": 4.149355888366699,
496
+ "lr": 0.0006344178433808431,
497
+ "router/avg_deficit_s0": 0.0,
498
+ "router/avg_deficit_s1": 7.0,
499
+ "router/selected_tokens_s0": 696.5625,
500
+ "router/selected_tokens_s1": 1.0,
501
+ "router/trigger_rate_s0": 0.0,
502
+ "router/trigger_rate_s1": 1.0,
503
+ "step": 260,
504
+ "tokens_trained": 0.85189824
505
+ },
506
+ {
507
+ "comp/rl_weight": 0.03,
508
+ "comp/strictness": 1.0,
509
+ "epoch": 0.07659574468085106,
510
+ "grad_norm": 97.07891845703125,
511
+ "loss": 4.1583,
512
+ "loss_ce": 5.67254638671875,
513
+ "loss_region": 0.08243336528539658,
514
+ "loss_total": 5.754979610443115,
515
+ "lr": 0.0006589127408086749,
516
+ "router/avg_deficit_s0": 0.0,
517
+ "router/avg_deficit_s1": 7.0,
518
+ "router/selected_tokens_s0": 1881.78125,
519
+ "router/selected_tokens_s1": 1.0,
520
+ "router/trigger_rate_s0": 0.0,
521
+ "router/trigger_rate_s1": 1.0,
522
+ "step": 270,
523
+ "tokens_trained": 0.88466288
524
+ },
525
+ {
526
+ "comp/rl_weight": 0.03,
527
+ "comp/strictness": 1.0,
528
+ "epoch": 0.07943262411347518,
529
+ "grad_norm": 117.09831237792969,
530
+ "loss": 4.3864,
531
+ "loss_ce": 3.5589284896850586,
532
+ "loss_region": 0.08375530689954758,
533
+ "loss_total": 3.642683744430542,
534
+ "lr": 0.0006834076382365066,
535
+ "router/avg_deficit_s0": 0.0,
536
+ "router/avg_deficit_s1": 7.0,
537
+ "router/selected_tokens_s0": 2048.4375,
538
+ "router/selected_tokens_s1": 1.0,
539
+ "router/trigger_rate_s0": 0.0,
540
+ "router/trigger_rate_s1": 1.0,
541
+ "step": 280,
542
+ "tokens_trained": 0.91742832
543
+ },
544
+ {
545
+ "comp/rl_weight": 0.03,
546
+ "comp/strictness": 1.0,
547
+ "epoch": 0.08226950354609928,
548
+ "grad_norm": 50.94511413574219,
549
+ "loss": 2.6175,
550
+ "loss_ce": 1.673315167427063,
551
+ "loss_region": 0.05678440257906914,
552
+ "loss_total": 1.7300995588302612,
553
+ "lr": 0.0007079025356643384,
554
+ "router/avg_deficit_s0": 0.0,
555
+ "router/avg_deficit_s1": 7.0,
556
+ "router/selected_tokens_s0": 916.25,
557
+ "router/selected_tokens_s1": 1.0,
558
+ "router/trigger_rate_s0": 0.0,
559
+ "router/trigger_rate_s1": 1.0,
560
+ "step": 290,
561
+ "tokens_trained": 0.95019376
562
+ },
563
+ {
564
+ "comp/rl_weight": 0.03,
565
+ "comp/strictness": 1.0,
566
+ "epoch": 0.0851063829787234,
567
+ "grad_norm": 101.19383239746094,
568
+ "loss": 2.602,
569
+ "loss_ce": 2.7257542610168457,
570
+ "loss_region": 0.06887104362249374,
571
+ "loss_total": 2.7946252822875977,
572
+ "lr": 0.0007323974330921702,
573
+ "router/avg_deficit_s0": 0.0,
574
+ "router/avg_deficit_s1": 7.0,
575
+ "router/selected_tokens_s0": 1558.4375,
576
+ "router/selected_tokens_s1": 1.0,
577
+ "router/trigger_rate_s0": 0.0,
578
+ "router/trigger_rate_s1": 1.0,
579
+ "step": 300,
580
+ "tokens_trained": 0.9829592
581
+ },
582
+ {
583
+ "comp/rl_weight": 0.03,
584
+ "comp/strictness": 1.0,
585
+ "epoch": 0.08794326241134752,
586
+ "grad_norm": 103.9866943359375,
587
+ "loss": 2.7451,
588
+ "loss_ce": 4.545335292816162,
589
+ "loss_region": 0.06630731374025345,
590
+ "loss_total": 4.611642837524414,
591
+ "lr": 0.000756892330520002,
592
+ "router/avg_deficit_s0": 0.0,
593
+ "router/avg_deficit_s1": 7.0,
594
+ "router/selected_tokens_s0": 1316.53125,
595
+ "router/selected_tokens_s1": 1.0,
596
+ "router/trigger_rate_s0": 0.0,
597
+ "router/trigger_rate_s1": 1.0,
598
+ "step": 310,
599
+ "tokens_trained": 1.01572464
600
+ },
601
+ {
602
+ "comp/rl_weight": 0.03,
603
+ "comp/strictness": 1.0,
604
+ "epoch": 0.09078014184397164,
605
+ "grad_norm": 68.10435485839844,
606
+ "loss": 3.1077,
607
+ "loss_ce": 3.9576547145843506,
608
+ "loss_region": 0.06686295568943024,
609
+ "loss_total": 4.02451753616333,
610
+ "lr": 0.0007813872279478337,
611
+ "router/avg_deficit_s0": 0.0,
612
+ "router/avg_deficit_s1": 7.0,
613
+ "router/selected_tokens_s0": 1619.4375,
614
+ "router/selected_tokens_s1": 1.0,
615
+ "router/trigger_rate_s0": 0.0,
616
+ "router/trigger_rate_s1": 1.0,
617
+ "step": 320,
618
+ "tokens_trained": 1.04848992
619
+ },
620
+ {
621
+ "comp/rl_weight": 0.03,
622
+ "comp/strictness": 1.0,
623
+ "epoch": 0.09361702127659574,
624
+ "grad_norm": 66.0996322631836,
625
+ "loss": 3.6268,
626
+ "loss_ce": 2.3814711570739746,
627
+ "loss_region": 0.06678226590156555,
628
+ "loss_total": 2.4482533931732178,
629
+ "lr": 0.0008058821253756655,
630
+ "router/avg_deficit_s0": 0.0,
631
+ "router/avg_deficit_s1": 7.0,
632
+ "router/selected_tokens_s0": 1653.875,
633
+ "router/selected_tokens_s1": 1.0,
634
+ "router/trigger_rate_s0": 0.0,
635
+ "router/trigger_rate_s1": 1.0,
636
+ "step": 330,
637
+ "tokens_trained": 1.08125456
638
+ },
639
+ {
640
+ "comp/rl_weight": 0.03,
641
+ "comp/strictness": 1.0,
642
+ "epoch": 0.09645390070921986,
643
+ "grad_norm": 85.41226959228516,
644
+ "loss": 2.2121,
645
+ "loss_ce": 2.704575300216675,
646
+ "loss_region": 0.06229915842413902,
647
+ "loss_total": 2.7668745517730713,
648
+ "lr": 0.0008303770228034974,
649
+ "router/avg_deficit_s0": 0.0,
650
+ "router/avg_deficit_s1": 7.0,
651
+ "router/selected_tokens_s0": 1233.90625,
652
+ "router/selected_tokens_s1": 1.0,
653
+ "router/trigger_rate_s0": 0.0,
654
+ "router/trigger_rate_s1": 1.0,
655
+ "step": 340,
656
+ "tokens_trained": 1.11402
657
+ },
658
+ {
659
+ "comp/rl_weight": 0.03,
660
+ "comp/strictness": 1.0,
661
+ "epoch": 0.09929078014184398,
662
+ "grad_norm": 65.33210754394531,
663
+ "loss": 3.0251,
664
+ "loss_ce": 2.7039618492126465,
665
+ "loss_region": 0.06328069418668747,
666
+ "loss_total": 2.767242431640625,
667
+ "lr": 0.0008548719202313291,
668
+ "router/avg_deficit_s0": 0.0,
669
+ "router/avg_deficit_s1": 7.0,
670
+ "router/selected_tokens_s0": 1259.8125,
671
+ "router/selected_tokens_s1": 1.0,
672
+ "router/trigger_rate_s0": 0.0,
673
+ "router/trigger_rate_s1": 1.0,
674
+ "step": 350,
675
+ "tokens_trained": 1.14678544
676
+ },
677
+ {
678
+ "comp/rl_weight": 0.03,
679
+ "comp/strictness": 1.0,
680
+ "epoch": 0.10212765957446808,
681
+ "grad_norm": 90.26396179199219,
682
+ "loss": 3.2803,
683
+ "loss_ce": 3.2153475284576416,
684
+ "loss_region": 0.09488862007856369,
685
+ "loss_total": 3.3102362155914307,
686
+ "lr": 0.0008793668176591608,
687
+ "router/avg_deficit_s0": 0.0,
688
+ "router/avg_deficit_s1": 7.0,
689
+ "router/selected_tokens_s0": 2469.34375,
690
+ "router/selected_tokens_s1": 1.0,
691
+ "router/trigger_rate_s0": 0.0,
692
+ "router/trigger_rate_s1": 1.0,
693
+ "step": 360,
694
+ "tokens_trained": 1.17955088
695
+ },
696
+ {
697
+ "comp/rl_weight": 0.03,
698
+ "comp/strictness": 1.0,
699
+ "epoch": 0.1049645390070922,
700
+ "grad_norm": 54.07102584838867,
701
+ "loss": 2.0596,
702
+ "loss_ce": 2.0937747955322266,
703
+ "loss_region": 0.06421175599098206,
704
+ "loss_total": 2.157986640930176,
705
+ "lr": 0.0009038617150869926,
706
+ "router/avg_deficit_s0": 0.0,
707
+ "router/avg_deficit_s1": 7.0,
708
+ "router/selected_tokens_s0": 1472.28125,
709
+ "router/selected_tokens_s1": 1.0,
710
+ "router/trigger_rate_s0": 0.0,
711
+ "router/trigger_rate_s1": 1.0,
712
+ "step": 370,
713
+ "tokens_trained": 1.21231632
714
+ },
715
+ {
716
+ "comp/rl_weight": 0.03,
717
+ "comp/strictness": 1.0,
718
+ "epoch": 0.10780141843971631,
719
+ "grad_norm": 48.10196304321289,
720
+ "loss": 2.1087,
721
+ "loss_ce": 2.1116416454315186,
722
+ "loss_region": 0.05196397379040718,
723
+ "loss_total": 2.1636056900024414,
724
+ "lr": 0.0009283566125148244,
725
+ "router/avg_deficit_s0": 0.0,
726
+ "router/avg_deficit_s1": 7.0,
727
+ "router/selected_tokens_s0": 449.125,
728
+ "router/selected_tokens_s1": 1.0,
729
+ "router/trigger_rate_s0": 0.0,
730
+ "router/trigger_rate_s1": 1.0,
731
+ "step": 380,
732
+ "tokens_trained": 1.24508176
733
+ },
734
+ {
735
+ "comp/rl_weight": 0.03,
736
+ "comp/strictness": 1.0,
737
+ "epoch": 0.11063829787234042,
738
+ "grad_norm": 54.023807525634766,
739
+ "loss": 2.2794,
740
+ "loss_ce": 2.8213136196136475,
741
+ "loss_region": 0.059752676635980606,
742
+ "loss_total": 2.88106632232666,
743
+ "lr": 0.0009528515099426562,
744
+ "router/avg_deficit_s0": 0.0,
745
+ "router/avg_deficit_s1": 7.0,
746
+ "router/selected_tokens_s0": 1112.1875,
747
+ "router/selected_tokens_s1": 1.0,
748
+ "router/trigger_rate_s0": 0.0,
749
+ "router/trigger_rate_s1": 1.0,
750
+ "step": 390,
751
+ "tokens_trained": 1.2778448
752
+ },
753
+ {
754
+ "comp/rl_weight": 0.03,
755
+ "comp/strictness": 1.0,
756
+ "epoch": 0.11347517730496454,
757
+ "grad_norm": 89.19528198242188,
758
+ "loss": 2.4597,
759
+ "loss_ce": 3.5597033500671387,
760
+ "loss_region": 0.2661536931991577,
761
+ "loss_total": 3.825857162475586,
762
+ "lr": 0.000977346407370488,
763
+ "router/avg_deficit_s0": 7.0,
764
+ "router/avg_deficit_s1": 7.0,
765
+ "router/selected_tokens_s0": 1.0,
766
+ "router/selected_tokens_s1": 1.0,
767
+ "router/trigger_rate_s0": 1.0,
768
+ "router/trigger_rate_s1": 1.0,
769
+ "step": 400,
770
+ "tokens_trained": 1.31061024
771
+ },
772
+ {
773
+ "comp/rl_weight": 0.03,
774
+ "comp/strictness": 1.0,
775
+ "epoch": 0.11631205673758865,
776
+ "grad_norm": 37.59690856933594,
777
+ "loss": 2.0656,
778
+ "loss_ce": 1.5812817811965942,
779
+ "loss_region": 0.07297738641500473,
780
+ "loss_total": 1.654259204864502,
781
+ "lr": 0.0010018413047983197,
782
+ "router/avg_deficit_s0": 0.0,
783
+ "router/avg_deficit_s1": 7.0,
784
+ "router/selected_tokens_s0": 1352.25,
785
+ "router/selected_tokens_s1": 1.0,
786
+ "router/trigger_rate_s0": 0.0,
787
+ "router/trigger_rate_s1": 1.0,
788
+ "step": 410,
789
+ "tokens_trained": 1.34337568
790
+ },
791
+ {
792
+ "comp/rl_weight": 0.03,
793
+ "comp/strictness": 1.0,
794
+ "epoch": 0.11914893617021277,
795
+ "grad_norm": 31.402502059936523,
796
+ "loss": 1.7999,
797
+ "loss_ce": 1.4559017419815063,
798
+ "loss_region": 0.07083795219659805,
799
+ "loss_total": 1.5267397165298462,
800
+ "lr": 0.0010263362022261515,
801
+ "router/avg_deficit_s0": 0.0,
802
+ "router/avg_deficit_s1": 7.0,
803
+ "router/selected_tokens_s0": 1392.46875,
804
+ "router/selected_tokens_s1": 1.0,
805
+ "router/trigger_rate_s0": 0.0,
806
+ "router/trigger_rate_s1": 1.0,
807
+ "step": 420,
808
+ "tokens_trained": 1.376141104
809
+ },
810
+ {
811
+ "comp/rl_weight": 0.03,
812
+ "comp/strictness": 1.0,
813
+ "epoch": 0.12198581560283688,
814
+ "grad_norm": 32.30119705200195,
815
+ "loss": 1.7606,
816
+ "loss_ce": 1.619798183441162,
817
+ "loss_region": 0.05854247137904167,
818
+ "loss_total": 1.6783406734466553,
819
+ "lr": 0.0010508310996539833,
820
+ "router/avg_deficit_s0": 0.0,
821
+ "router/avg_deficit_s1": 7.0,
822
+ "router/selected_tokens_s0": 107.46875,
823
+ "router/selected_tokens_s1": 1.0,
824
+ "router/trigger_rate_s0": 0.0,
825
+ "router/trigger_rate_s1": 1.0,
826
+ "step": 430,
827
+ "tokens_trained": 1.408906544
828
+ },
829
+ {
830
+ "comp/rl_weight": 0.03,
831
+ "comp/strictness": 1.0,
832
+ "epoch": 0.12482269503546099,
833
+ "grad_norm": 46.65761947631836,
834
+ "loss": 1.9924,
835
+ "loss_ce": 2.4070842266082764,
836
+ "loss_region": 0.06459856033325195,
837
+ "loss_total": 2.4716827869415283,
838
+ "lr": 0.0010753259970818151,
839
+ "router/avg_deficit_s0": 0.0,
840
+ "router/avg_deficit_s1": 7.0,
841
+ "router/selected_tokens_s0": 1131.90625,
842
+ "router/selected_tokens_s1": 1.0,
843
+ "router/trigger_rate_s0": 0.0,
844
+ "router/trigger_rate_s1": 1.0,
845
+ "step": 440,
846
+ "tokens_trained": 1.441671984
847
+ },
848
+ {
849
+ "comp/rl_weight": 0.03,
850
+ "comp/strictness": 1.0,
851
+ "epoch": 0.1276595744680851,
852
+ "grad_norm": 30.38531494140625,
853
+ "loss": 1.8704,
854
+ "loss_ce": 1.4392523765563965,
855
+ "loss_region": 0.055338773876428604,
856
+ "loss_total": 1.4945911169052124,
857
+ "lr": 0.001099820894509647,
858
+ "router/avg_deficit_s0": 0.0,
859
+ "router/avg_deficit_s1": 7.0,
860
+ "router/selected_tokens_s0": 631.5625,
861
+ "router/selected_tokens_s1": 1.0,
862
+ "router/trigger_rate_s0": 0.0,
863
+ "router/trigger_rate_s1": 1.0,
864
+ "step": 450,
865
+ "tokens_trained": 1.474437424
866
+ },
867
+ {
868
+ "comp/rl_weight": 0.03,
869
+ "comp/strictness": 1.0,
870
+ "epoch": 0.13049645390070921,
871
+ "grad_norm": 37.19720458984375,
872
+ "loss": 1.7504,
873
+ "loss_ce": 1.5982601642608643,
874
+ "loss_region": 0.07102015614509583,
875
+ "loss_total": 1.6692802906036377,
876
+ "lr": 0.0011243157919374788,
877
+ "router/avg_deficit_s0": 0.0,
878
+ "router/avg_deficit_s1": 0.0,
879
+ "router/selected_tokens_s0": 929.3125,
880
+ "router/selected_tokens_s1": 48.28125,
881
+ "router/trigger_rate_s0": 0.0,
882
+ "router/trigger_rate_s1": 0.0,
883
+ "step": 460,
884
+ "tokens_trained": 1.507202864
885
+ },
886
+ {
887
+ "comp/rl_weight": 0.03,
888
+ "comp/strictness": 1.0,
889
+ "epoch": 0.13333333333333333,
890
+ "grad_norm": 30.36027717590332,
891
+ "loss": 1.7281,
892
+ "loss_ce": 1.4914231300354004,
893
+ "loss_region": 0.05361942574381828,
894
+ "loss_total": 1.5450425148010254,
895
+ "lr": 0.0011488106893653104,
896
+ "router/avg_deficit_s0": 0.0,
897
+ "router/avg_deficit_s1": 7.0,
898
+ "router/selected_tokens_s0": 377.4375,
899
+ "router/selected_tokens_s1": 1.0,
900
+ "router/trigger_rate_s0": 0.0,
901
+ "router/trigger_rate_s1": 1.0,
902
+ "step": 470,
903
+ "tokens_trained": 1.539968304
904
+ },
905
+ {
906
+ "comp/rl_weight": 0.03,
907
+ "comp/strictness": 1.0,
908
+ "epoch": 0.13617021276595745,
909
+ "grad_norm": 12.751656532287598,
910
+ "loss": 1.502,
911
+ "loss_ce": 1.3392599821090698,
912
+ "loss_region": 0.05824059993028641,
913
+ "loss_total": 1.3975006341934204,
914
+ "lr": 0.0011733055867931422,
915
+ "router/avg_deficit_s0": 0.0,
916
+ "router/avg_deficit_s1": 7.0,
917
+ "router/selected_tokens_s0": 779.8125,
918
+ "router/selected_tokens_s1": 1.0,
919
+ "router/trigger_rate_s0": 0.0,
920
+ "router/trigger_rate_s1": 1.0,
921
+ "step": 480,
922
+ "tokens_trained": 1.572733744
923
+ },
924
+ {
925
+ "comp/rl_weight": 0.03,
926
+ "comp/strictness": 1.0,
927
+ "epoch": 0.13900709219858157,
928
+ "grad_norm": 30.712886810302734,
929
+ "loss": 1.5265,
930
+ "loss_ce": 1.441983699798584,
931
+ "loss_region": 0.05336770787835121,
932
+ "loss_total": 1.4953514337539673,
933
+ "lr": 0.001197800484220974,
934
+ "router/avg_deficit_s0": 0.0,
935
+ "router/avg_deficit_s1": 7.0,
936
+ "router/selected_tokens_s0": 630.71875,
937
+ "router/selected_tokens_s1": 1.0,
938
+ "router/trigger_rate_s0": 0.0,
939
+ "router/trigger_rate_s1": 1.0,
940
+ "step": 490,
941
+ "tokens_trained": 1.605499184
942
+ },
943
+ {
944
+ "comp/rl_weight": 0.03,
945
+ "comp/strictness": 1.0,
946
+ "epoch": 0.14184397163120568,
947
+ "grad_norm": 23.531269073486328,
948
+ "loss": 1.6006,
949
+ "loss_ce": 1.3859856128692627,
950
+ "loss_region": 0.05309389531612396,
951
+ "loss_total": 1.4390795230865479,
952
+ "lr": 0.0012222953816488059,
953
+ "router/avg_deficit_s0": 0.0,
954
+ "router/avg_deficit_s1": 7.0,
955
+ "router/selected_tokens_s0": 355.1875,
956
+ "router/selected_tokens_s1": 1.0,
957
+ "router/trigger_rate_s0": 0.0,
958
+ "router/trigger_rate_s1": 1.0,
959
+ "step": 500,
960
+ "tokens_trained": 1.638264624
961
+ },
962
+ {
963
+ "epoch": 0.14184397163120568,
964
+ "eval_ppl": 5.639791369232795,
965
+ "eval_runtime": 0.6154,
966
+ "step": 500,
967
+ "tokens_trained": 1.638264624
968
+ },
969
+ {
970
+ "epoch": 0.14184397163120568,
971
+ "eval_F": 0.0006250488319399953,
972
+ "eval_F_cds": 0.000417908784442649,
973
+ "eval_F_exon": 0.0010869565217391304,
974
+ "eval_F_intron": 0.0006231804525964301,
975
+ "eval_F_nig": 0.0007117661631871394,
976
+ "eval_F_promoter": 0.00046730843692003687,
977
+ "eval_F_utr": 0.0007665184730952016,
978
+ "eval_G": 0.018260951935942262,
979
+ "eval_G_cds": 0.017367320490764216,
980
+ "eval_G_exon": 0.01836432857789855,
981
+ "eval_G_intron": 0.018287128753366702,
982
+ "eval_G_nig": 0.019185528203734217,
983
+ "eval_G_promoter": 0.016835490531997278,
984
+ "eval_G_utr": 0.017702484637692268,
985
+ "eval_avg_bp_per_token": 1599.875,
986
+ "eval_bp_per_token/cds": 2392.866666666667,
987
+ "eval_bp_per_token/exon": 920.0,
988
+ "eval_bp_per_token/intron": 1604.6716417910447,
989
+ "eval_bp_per_token/nig": 1404.9558011049724,
990
+ "eval_bp_per_token/promoter": 2139.9142857142856,
991
+ "eval_bp_per_token/utr": 1304.6,
992
+ "eval_ppl_cds": 6.105901405038029,
993
+ "eval_ppl_exon": 5.801578519769911,
994
+ "eval_ppl_intron": 5.612093447668663,
995
+ "eval_ppl_nig": 5.620627240075904,
996
+ "eval_ppl_promoter": 5.688336002196773,
997
+ "eval_ppl_utr": 5.695504465687492,
998
+ "step": 500,
999
+ "tokens_trained": 1.638264624
1000
+ },
1001
+ {
1002
+ "comp/rl_weight": 0.03,
1003
+ "comp/strictness": 1.0,
1004
+ "epoch": 0.14468085106382977,
1005
+ "grad_norm": 9.532892227172852,
1006
+ "loss": 1.5975,
1007
+ "loss_ce": 1.3092715740203857,
1008
+ "loss_region": 0.05799846723675728,
1009
+ "loss_total": 1.3672699928283691,
1010
+ "lr": 0.0012243786686061229,
1011
+ "router/avg_deficit_s0": 0.0,
1012
+ "router/avg_deficit_s1": 7.0,
1013
+ "router/selected_tokens_s0": 946.25,
1014
+ "router/selected_tokens_s1": 1.0,
1015
+ "router/trigger_rate_s0": 0.0,
1016
+ "router/trigger_rate_s1": 1.0,
1017
+ "step": 510,
1018
+ "tokens_trained": 1.671030064
1019
+ },
1020
+ {
1021
+ "comp/rl_weight": 0.03,
1022
+ "comp/strictness": 1.0,
1023
+ "epoch": 0.1475177304964539,
1024
+ "grad_norm": 18.65001678466797,
1025
+ "loss": 1.6168,
1026
+ "loss_ce": 1.343700885772705,
1027
+ "loss_region": 0.05204938352108002,
1028
+ "loss_total": 1.3957502841949463,
1029
+ "lr": 0.0012239717766222718,
1030
+ "router/avg_deficit_s0": 0.0,
1031
+ "router/avg_deficit_s1": 7.0,
1032
+ "router/selected_tokens_s0": 541.875,
1033
+ "router/selected_tokens_s1": 1.0,
1034
+ "router/trigger_rate_s0": 0.0,
1035
+ "router/trigger_rate_s1": 1.0,
1036
+ "step": 520,
1037
+ "tokens_trained": 1.703795504
1038
+ },
1039
+ {
1040
+ "comp/rl_weight": 0.03,
1041
+ "comp/strictness": 1.0,
1042
+ "epoch": 0.150354609929078,
1043
+ "grad_norm": 34.98784255981445,
1044
+ "loss": 1.6703,
1045
+ "loss_ce": 1.456465721130371,
1046
+ "loss_region": 0.05966515839099884,
1047
+ "loss_total": 1.5161309242248535,
1048
+ "lr": 0.001223564884638421,
1049
+ "router/avg_deficit_s0": 0.0,
1050
+ "router/avg_deficit_s1": 7.0,
1051
+ "router/selected_tokens_s0": 980.71875,
1052
+ "router/selected_tokens_s1": 1.0,
1053
+ "router/trigger_rate_s0": 0.0,
1054
+ "router/trigger_rate_s1": 1.0,
1055
+ "step": 530,
1056
+ "tokens_trained": 1.736560944
1057
+ },
1058
+ {
1059
+ "comp/rl_weight": 0.03,
1060
+ "comp/strictness": 1.0,
1061
+ "epoch": 0.15319148936170213,
1062
+ "grad_norm": 27.90123176574707,
1063
+ "loss": 1.5678,
1064
+ "loss_ce": 1.4636605978012085,
1065
+ "loss_region": 0.055262189358472824,
1066
+ "loss_total": 1.5189228057861328,
1067
+ "lr": 0.00122315799265457,
1068
+ "router/avg_deficit_s0": 0.0,
1069
+ "router/avg_deficit_s1": 7.0,
1070
+ "router/selected_tokens_s0": 515.9375,
1071
+ "router/selected_tokens_s1": 1.0,
1072
+ "router/trigger_rate_s0": 0.0,
1073
+ "router/trigger_rate_s1": 1.0,
1074
+ "step": 540,
1075
+ "tokens_trained": 1.769326384
1076
+ },
1077
+ {
1078
+ "comp/rl_weight": 0.03,
1079
+ "comp/strictness": 1.0,
1080
+ "epoch": 0.15602836879432624,
1081
+ "grad_norm": 36.412662506103516,
1082
+ "loss": 1.617,
1083
+ "loss_ce": 1.481337070465088,
1084
+ "loss_region": 0.057017069309949875,
1085
+ "loss_total": 1.5383541584014893,
1086
+ "lr": 0.001222751100670719,
1087
+ "router/avg_deficit_s0": 0.0,
1088
+ "router/avg_deficit_s1": 7.0,
1089
+ "router/selected_tokens_s0": 653.09375,
1090
+ "router/selected_tokens_s1": 1.0,
1091
+ "router/trigger_rate_s0": 0.0,
1092
+ "router/trigger_rate_s1": 1.0,
1093
+ "step": 550,
1094
+ "tokens_trained": 1.802091024
1095
+ },
1096
+ {
1097
+ "comp/rl_weight": 0.03,
1098
+ "comp/strictness": 1.0,
1099
+ "epoch": 0.15886524822695036,
1100
+ "grad_norm": 33.760154724121094,
1101
+ "loss": 1.6967,
1102
+ "loss_ce": 1.6242401599884033,
1103
+ "loss_region": 0.05576888844370842,
1104
+ "loss_total": 1.6800090074539185,
1105
+ "lr": 0.001222344208686868,
1106
+ "router/avg_deficit_s0": 0.0,
1107
+ "router/avg_deficit_s1": 6.46875,
1108
+ "router/selected_tokens_s0": 594.71875,
1109
+ "router/selected_tokens_s1": 1.53125,
1110
+ "router/trigger_rate_s0": 0.0,
1111
+ "router/trigger_rate_s1": 1.0,
1112
+ "step": 560,
1113
+ "tokens_trained": 1.834856464
1114
+ },
1115
+ {
1116
+ "comp/rl_weight": 0.03,
1117
+ "comp/strictness": 1.0,
1118
+ "epoch": 0.16170212765957448,
1119
+ "grad_norm": 30.55908966064453,
1120
+ "loss": 1.7022,
1121
+ "loss_ce": 1.4764841794967651,
1122
+ "loss_region": 0.053381841629743576,
1123
+ "loss_total": 1.5298659801483154,
1124
+ "lr": 0.0012219373167030169,
1125
+ "router/avg_deficit_s0": 0.0,
1126
+ "router/avg_deficit_s1": 7.0,
1127
+ "router/selected_tokens_s0": 584.40625,
1128
+ "router/selected_tokens_s1": 1.0,
1129
+ "router/trigger_rate_s0": 0.0,
1130
+ "router/trigger_rate_s1": 1.0,
1131
+ "step": 570,
1132
+ "tokens_trained": 1.867621904
1133
+ },
1134
+ {
1135
+ "comp/rl_weight": 0.03,
1136
+ "comp/strictness": 1.0,
1137
+ "epoch": 0.16453900709219857,
1138
+ "grad_norm": 24.999780654907227,
1139
+ "loss": 1.4763,
1140
+ "loss_ce": 1.4031097888946533,
1141
+ "loss_region": 0.05390815809369087,
1142
+ "loss_total": 1.4570178985595703,
1143
+ "lr": 0.0012215304247191658,
1144
+ "router/avg_deficit_s0": 0.0,
1145
+ "router/avg_deficit_s1": 7.0,
1146
+ "router/selected_tokens_s0": 671.75,
1147
+ "router/selected_tokens_s1": 1.0,
1148
+ "router/trigger_rate_s0": 0.0,
1149
+ "router/trigger_rate_s1": 1.0,
1150
+ "step": 580,
1151
+ "tokens_trained": 1.900385744
1152
+ },
1153
+ {
1154
+ "comp/rl_weight": 0.03,
1155
+ "comp/strictness": 1.0,
1156
+ "epoch": 0.1673758865248227,
1157
+ "grad_norm": 34.729034423828125,
1158
+ "loss": 1.4617,
1159
+ "loss_ce": 1.487898349761963,
1160
+ "loss_region": 0.10076552629470825,
1161
+ "loss_total": 1.5886638164520264,
1162
+ "lr": 0.0012211235327353148,
1163
+ "router/avg_deficit_s0": 0.0,
1164
+ "router/avg_deficit_s1": 5.5,
1165
+ "router/selected_tokens_s0": 540.78125,
1166
+ "router/selected_tokens_s1": 105.71875,
1167
+ "router/trigger_rate_s0": 0.0,
1168
+ "router/trigger_rate_s1": 0.125,
1169
+ "step": 590,
1170
+ "tokens_trained": 1.933151184
1171
+ },
1172
+ {
1173
+ "comp/rl_weight": 0.03,
1174
+ "comp/strictness": 1.0,
1175
+ "epoch": 0.1702127659574468,
1176
+ "grad_norm": 23.622325897216797,
1177
+ "loss": 1.4675,
1178
+ "loss_ce": 1.3250795602798462,
1179
+ "loss_region": 0.0658915713429451,
1180
+ "loss_total": 1.3909711837768555,
1181
+ "lr": 0.0012207166407514638,
1182
+ "router/avg_deficit_s0": 0.0,
1183
+ "router/avg_deficit_s1": 5.0,
1184
+ "router/selected_tokens_s0": 539.9375,
1185
+ "router/selected_tokens_s1": 32.5,
1186
+ "router/trigger_rate_s0": 0.0,
1187
+ "router/trigger_rate_s1": 0.3125,
1188
+ "step": 600,
1189
+ "tokens_trained": 1.965916624
1190
+ },
1191
+ {
1192
+ "comp/rl_weight": 0.03,
1193
+ "comp/strictness": 1.0,
1194
+ "epoch": 0.17304964539007092,
1195
+ "grad_norm": 20.5329532623291,
1196
+ "loss": 1.3919,
1197
+ "loss_ce": 1.31636381149292,
1198
+ "loss_region": 0.06174392253160477,
1199
+ "loss_total": 1.3781077861785889,
1200
+ "lr": 0.0012203097487676127,
1201
+ "router/avg_deficit_s0": 0.0,
1202
+ "router/avg_deficit_s1": 6.84375,
1203
+ "router/selected_tokens_s0": 1102.0,
1204
+ "router/selected_tokens_s1": 1.15625,
1205
+ "router/trigger_rate_s0": 0.0,
1206
+ "router/trigger_rate_s1": 1.0,
1207
+ "step": 610,
1208
+ "tokens_trained": 1.998682064
1209
+ },
1210
+ {
1211
+ "comp/rl_weight": 0.03,
1212
+ "comp/strictness": 1.0,
1213
+ "epoch": 0.17588652482269504,
1214
+ "grad_norm": 25.521442413330078,
1215
+ "loss": 1.371,
1216
+ "loss_ce": 1.3530433177947998,
1217
+ "loss_region": 0.055061180144548416,
1218
+ "loss_total": 1.4081045389175415,
1219
+ "lr": 0.0012199028567837617,
1220
+ "router/avg_deficit_s0": 0.0,
1221
+ "router/avg_deficit_s1": 6.96875,
1222
+ "router/selected_tokens_s0": 267.96875,
1223
+ "router/selected_tokens_s1": 1.03125,
1224
+ "router/trigger_rate_s0": 0.0,
1225
+ "router/trigger_rate_s1": 1.0,
1226
+ "step": 620,
1227
+ "tokens_trained": 2.031447504
1228
+ },
1229
+ {
1230
+ "comp/rl_weight": 0.03,
1231
+ "comp/strictness": 1.0,
1232
+ "epoch": 0.17872340425531916,
1233
+ "grad_norm": 19.09522819519043,
1234
+ "loss": 1.3462,
1235
+ "loss_ce": 1.2844027280807495,
1236
+ "loss_region": 0.05663185566663742,
1237
+ "loss_total": 1.3410345315933228,
1238
+ "lr": 0.0012194959647999107,
1239
+ "router/avg_deficit_s0": 0.0,
1240
+ "router/avg_deficit_s1": 5.741935483870968,
1241
+ "router/selected_tokens_s0": 761.0625,
1242
+ "router/selected_tokens_s1": 2.4375,
1243
+ "router/trigger_rate_s0": 0.0,
1244
+ "router/trigger_rate_s1": 0.96875,
1245
+ "step": 630,
1246
+ "tokens_trained": 2.064212944
1247
+ },
1248
+ {
1249
+ "comp/rl_weight": 0.03,
1250
+ "comp/strictness": 1.0,
1251
+ "epoch": 0.18156028368794327,
1252
+ "grad_norm": 13.425186157226562,
1253
+ "loss": 1.3235,
1254
+ "loss_ce": 1.2834771871566772,
1255
+ "loss_region": 0.057796087116003036,
1256
+ "loss_total": 1.341273307800293,
1257
+ "lr": 0.0012190890728160596,
1258
+ "router/avg_deficit_s0": 0.0,
1259
+ "router/avg_deficit_s1": 4.384615384615385,
1260
+ "router/selected_tokens_s0": 976.3125,
1261
+ "router/selected_tokens_s1": 5.75,
1262
+ "router/trigger_rate_s0": 0.0,
1263
+ "router/trigger_rate_s1": 0.8125,
1264
+ "step": 640,
1265
+ "tokens_trained": 2.096975976
1266
+ },
1267
+ {
1268
+ "comp/rl_weight": 0.03,
1269
+ "comp/strictness": 1.0,
1270
+ "epoch": 0.18439716312056736,
1271
+ "grad_norm": 30.06955909729004,
1272
+ "loss": 1.3965,
1273
+ "loss_ce": 1.4326735734939575,
1274
+ "loss_region": 0.051951903849840164,
1275
+ "loss_total": 1.4846254587173462,
1276
+ "lr": 0.0012186821808322086,
1277
+ "router/avg_deficit_s0": 0.0,
1278
+ "router/avg_deficit_s1": 6.96875,
1279
+ "router/selected_tokens_s0": 366.9375,
1280
+ "router/selected_tokens_s1": 1.03125,
1281
+ "router/trigger_rate_s0": 0.0,
1282
+ "router/trigger_rate_s1": 1.0,
1283
+ "step": 650,
1284
+ "tokens_trained": 2.129741416
1285
+ },
1286
+ {
1287
+ "comp/rl_weight": 0.03,
1288
+ "comp/strictness": 1.0,
1289
+ "epoch": 0.18723404255319148,
1290
+ "grad_norm": 26.722410202026367,
1291
+ "loss": 1.3767,
1292
+ "loss_ce": 1.3280962705612183,
1293
+ "loss_region": 0.05729561671614647,
1294
+ "loss_total": 1.3853918313980103,
1295
+ "lr": 0.0012182752888483576,
1296
+ "router/avg_deficit_s0": 0.0,
1297
+ "router/avg_deficit_s1": 7.0,
1298
+ "router/selected_tokens_s0": 938.03125,
1299
+ "router/selected_tokens_s1": 1.0,
1300
+ "router/trigger_rate_s0": 0.0,
1301
+ "router/trigger_rate_s1": 1.0,
1302
+ "step": 660,
1303
+ "tokens_trained": 2.162506856
1304
+ },
1305
+ {
1306
+ "comp/rl_weight": 0.03,
1307
+ "comp/strictness": 1.0,
1308
+ "epoch": 0.1900709219858156,
1309
+ "grad_norm": 13.13896656036377,
1310
+ "loss": 1.3329,
1311
+ "loss_ce": 1.2802703380584717,
1312
+ "loss_region": 0.05178743973374367,
1313
+ "loss_total": 1.3320578336715698,
1314
+ "lr": 0.0012178683968645065,
1315
+ "router/avg_deficit_s0": 0.0,
1316
+ "router/avg_deficit_s1": 7.0,
1317
+ "router/selected_tokens_s0": 558.8125,
1318
+ "router/selected_tokens_s1": 1.0,
1319
+ "router/trigger_rate_s0": 0.0,
1320
+ "router/trigger_rate_s1": 1.0,
1321
+ "step": 670,
1322
+ "tokens_trained": 2.195272296
1323
+ },
1324
+ {
1325
+ "comp/rl_weight": 0.03,
1326
+ "comp/strictness": 1.0,
1327
+ "epoch": 0.19290780141843972,
1328
+ "grad_norm": 13.104703903198242,
1329
+ "loss": 1.3014,
1330
+ "loss_ce": 1.2344958782196045,
1331
+ "loss_region": 0.05391368269920349,
1332
+ "loss_total": 1.2884095907211304,
1333
+ "lr": 0.0012174615048806555,
1334
+ "router/avg_deficit_s0": 0.0,
1335
+ "router/avg_deficit_s1": 7.0,
1336
+ "router/selected_tokens_s0": 328.4375,
1337
+ "router/selected_tokens_s1": 1.0,
1338
+ "router/trigger_rate_s0": 0.0,
1339
+ "router/trigger_rate_s1": 1.0,
1340
+ "step": 680,
1341
+ "tokens_trained": 2.228037736
1342
+ },
1343
+ {
1344
+ "comp/rl_weight": 0.03,
1345
+ "comp/strictness": 1.0,
1346
+ "epoch": 0.19574468085106383,
1347
+ "grad_norm": 15.946929931640625,
1348
+ "loss": 1.2954,
1349
+ "loss_ce": 1.2488086223602295,
1350
+ "loss_region": 0.05314250662922859,
1351
+ "loss_total": 1.3019511699676514,
1352
+ "lr": 0.0012170546128968045,
1353
+ "router/avg_deficit_s0": 0.0,
1354
+ "router/avg_deficit_s1": 7.0,
1355
+ "router/selected_tokens_s0": 583.71875,
1356
+ "router/selected_tokens_s1": 1.0,
1357
+ "router/trigger_rate_s0": 0.0,
1358
+ "router/trigger_rate_s1": 1.0,
1359
+ "step": 690,
1360
+ "tokens_trained": 2.260803176
1361
+ },
1362
+ {
1363
+ "comp/rl_weight": 0.03,
1364
+ "comp/strictness": 1.0,
1365
+ "epoch": 0.19858156028368795,
1366
+ "grad_norm": 36.70180130004883,
1367
+ "loss": 1.3394,
1368
+ "loss_ce": 1.3971507549285889,
1369
+ "loss_region": 0.06877385824918747,
1370
+ "loss_total": 1.465924620628357,
1371
+ "lr": 0.0012166477209129534,
1372
+ "router/avg_deficit_s0": 0.0,
1373
+ "router/avg_deficit_s1": 6.96875,
1374
+ "router/selected_tokens_s0": 1593.3125,
1375
+ "router/selected_tokens_s1": 1.03125,
1376
+ "router/trigger_rate_s0": 0.0,
1377
+ "router/trigger_rate_s1": 1.0,
1378
+ "step": 700,
1379
+ "tokens_trained": 2.293568616
1380
+ },
1381
+ {
1382
+ "comp/rl_weight": 0.03,
1383
+ "comp/strictness": 1.0,
1384
+ "epoch": 0.20141843971631207,
1385
+ "grad_norm": 9.902652740478516,
1386
+ "loss": 1.4144,
1387
+ "loss_ce": 1.231865644454956,
1388
+ "loss_region": 0.23015622794628143,
1389
+ "loss_total": 1.462021827697754,
1390
+ "lr": 0.0012162408289291026,
1391
+ "router/avg_deficit_s0": 6.333333333333333,
1392
+ "router/avg_deficit_s1": 7.0,
1393
+ "router/selected_tokens_s0": 36.34375,
1394
+ "router/selected_tokens_s1": 1.0,
1395
+ "router/trigger_rate_s0": 0.1875,
1396
+ "router/trigger_rate_s1": 1.0,
1397
+ "step": 710,
1398
+ "tokens_trained": 2.326334056
1399
+ },
1400
+ {
1401
+ "comp/rl_weight": 0.03,
1402
+ "comp/strictness": 1.0,
1403
+ "epoch": 0.20425531914893616,
1404
+ "grad_norm": 16.028406143188477,
1405
+ "loss": 1.3595,
1406
+ "loss_ce": 1.2959296703338623,
1407
+ "loss_region": 0.05357291176915169,
1408
+ "loss_total": 1.3495025634765625,
1409
+ "lr": 0.0012158339369452516,
1410
+ "router/avg_deficit_s0": 0.0,
1411
+ "router/avg_deficit_s1": 6.9375,
1412
+ "router/selected_tokens_s0": 248.96875,
1413
+ "router/selected_tokens_s1": 1.0625,
1414
+ "router/trigger_rate_s0": 0.0,
1415
+ "router/trigger_rate_s1": 1.0,
1416
+ "step": 720,
1417
+ "tokens_trained": 2.359099496
1418
+ },
1419
+ {
1420
+ "comp/rl_weight": 0.03,
1421
+ "comp/strictness": 1.0,
1422
+ "epoch": 0.20709219858156028,
1423
+ "grad_norm": 12.643508911132812,
1424
+ "loss": 1.347,
1425
+ "loss_ce": 1.2560234069824219,
1426
+ "loss_region": 0.05382990837097168,
1427
+ "loss_total": 1.3098533153533936,
1428
+ "lr": 0.0012154270449614005,
1429
+ "router/avg_deficit_s0": 0.0,
1430
+ "router/avg_deficit_s1": 7.0,
1431
+ "router/selected_tokens_s0": 212.34375,
1432
+ "router/selected_tokens_s1": 1.0,
1433
+ "router/trigger_rate_s0": 0.0,
1434
+ "router/trigger_rate_s1": 1.0,
1435
+ "step": 730,
1436
+ "tokens_trained": 2.391864136
1437
+ },
1438
+ {
1439
+ "comp/rl_weight": 0.03,
1440
+ "comp/strictness": 1.0,
1441
+ "epoch": 0.2099290780141844,
1442
+ "grad_norm": 6.794911861419678,
1443
+ "loss": 1.2899,
1444
+ "loss_ce": 1.2041139602661133,
1445
+ "loss_region": 0.05598007142543793,
1446
+ "loss_total": 1.2600940465927124,
1447
+ "lr": 0.0012150201529775495,
1448
+ "router/avg_deficit_s0": 0.0,
1449
+ "router/avg_deficit_s1": 7.0,
1450
+ "router/selected_tokens_s0": 436.8125,
1451
+ "router/selected_tokens_s1": 1.0,
1452
+ "router/trigger_rate_s0": 0.0,
1453
+ "router/trigger_rate_s1": 1.0,
1454
+ "step": 740,
1455
+ "tokens_trained": 2.424628776
1456
+ },
1457
+ {
1458
+ "comp/rl_weight": 0.03,
1459
+ "comp/strictness": 1.0,
1460
+ "epoch": 0.2127659574468085,
1461
+ "grad_norm": 8.187490463256836,
1462
+ "loss": 1.2837,
1463
+ "loss_ce": 1.21809720993042,
1464
+ "loss_region": 0.05316738411784172,
1465
+ "loss_total": 1.2712645530700684,
1466
+ "lr": 0.0012146132609936982,
1467
+ "router/avg_deficit_s0": 0.0,
1468
+ "router/avg_deficit_s1": 7.0,
1469
+ "router/selected_tokens_s0": 324.8125,
1470
+ "router/selected_tokens_s1": 1.0,
1471
+ "router/trigger_rate_s0": 0.0,
1472
+ "router/trigger_rate_s1": 1.0,
1473
+ "step": 750,
1474
+ "tokens_trained": 2.457394216
1475
+ },
1476
+ {
1477
+ "comp/rl_weight": 0.03,
1478
+ "comp/strictness": 1.0,
1479
+ "epoch": 0.21560283687943263,
1480
+ "grad_norm": 6.42191743850708,
1481
+ "loss": 1.2935,
1482
+ "loss_ce": 1.232576847076416,
1483
+ "loss_region": 0.053723808377981186,
1484
+ "loss_total": 1.2863006591796875,
1485
+ "lr": 0.0012142063690098472,
1486
+ "router/avg_deficit_s0": 0.0,
1487
+ "router/avg_deficit_s1": 6.5625,
1488
+ "router/selected_tokens_s0": 596.46875,
1489
+ "router/selected_tokens_s1": 1.4375,
1490
+ "router/trigger_rate_s0": 0.0,
1491
+ "router/trigger_rate_s1": 1.0,
1492
+ "step": 760,
1493
+ "tokens_trained": 2.490159656
1494
+ },
1495
+ {
1496
+ "comp/rl_weight": 0.03,
1497
+ "comp/strictness": 1.0,
1498
+ "epoch": 0.21843971631205675,
1499
+ "grad_norm": 18.290868759155273,
1500
+ "loss": 1.303,
1501
+ "loss_ce": 1.2971560955047607,
1502
+ "loss_region": 0.0527898408472538,
1503
+ "loss_total": 1.3499459028244019,
1504
+ "lr": 0.0012137994770259962,
1505
+ "router/avg_deficit_s0": 0.0,
1506
+ "router/avg_deficit_s1": 7.0,
1507
+ "router/selected_tokens_s0": 454.40625,
1508
+ "router/selected_tokens_s1": 1.0,
1509
+ "router/trigger_rate_s0": 0.0,
1510
+ "router/trigger_rate_s1": 1.0,
1511
+ "step": 770,
1512
+ "tokens_trained": 2.522925096
1513
+ },
1514
+ {
1515
+ "comp/rl_weight": 0.03,
1516
+ "comp/strictness": 1.0,
1517
+ "epoch": 0.22127659574468084,
1518
+ "grad_norm": 15.430254936218262,
1519
+ "loss": 1.313,
1520
+ "loss_ce": 1.2602447271347046,
1521
+ "loss_region": 0.052465811371803284,
1522
+ "loss_total": 1.3127105236053467,
1523
+ "lr": 0.0012133925850421454,
1524
+ "router/avg_deficit_s0": 0.0,
1525
+ "router/avg_deficit_s1": 7.0,
1526
+ "router/selected_tokens_s0": 236.28125,
1527
+ "router/selected_tokens_s1": 1.0,
1528
+ "router/trigger_rate_s0": 0.0,
1529
+ "router/trigger_rate_s1": 1.0,
1530
+ "step": 780,
1531
+ "tokens_trained": 2.555690536
1532
+ },
1533
+ {
1534
+ "comp/rl_weight": 0.03,
1535
+ "comp/strictness": 1.0,
1536
+ "epoch": 0.22411347517730495,
1537
+ "grad_norm": 7.031615734100342,
1538
+ "loss": 1.301,
1539
+ "loss_ce": 1.185632586479187,
1540
+ "loss_region": 0.06820279359817505,
1541
+ "loss_total": 1.2538354396820068,
1542
+ "lr": 0.0012129856930582943,
1543
+ "router/avg_deficit_s0": 0.0,
1544
+ "router/avg_deficit_s1": 7.0,
1545
+ "router/selected_tokens_s0": 1199.375,
1546
+ "router/selected_tokens_s1": 1.0,
1547
+ "router/trigger_rate_s0": 0.0,
1548
+ "router/trigger_rate_s1": 1.0,
1549
+ "step": 790,
1550
+ "tokens_trained": 2.588455976
1551
+ },
1552
+ {
1553
+ "comp/rl_weight": 0.03,
1554
+ "comp/strictness": 1.0,
1555
+ "epoch": 0.22695035460992907,
1556
+ "grad_norm": 5.123201847076416,
1557
+ "loss": 1.2839,
1558
+ "loss_ce": 1.2030867338180542,
1559
+ "loss_region": 0.06425300985574722,
1560
+ "loss_total": 1.2673397064208984,
1561
+ "lr": 0.0012125788010744433,
1562
+ "router/avg_deficit_s0": 0.0,
1563
+ "router/avg_deficit_s1": 7.0,
1564
+ "router/selected_tokens_s0": 1046.25,
1565
+ "router/selected_tokens_s1": 1.0,
1566
+ "router/trigger_rate_s0": 0.0,
1567
+ "router/trigger_rate_s1": 1.0,
1568
+ "step": 800,
1569
+ "tokens_trained": 2.621221416
1570
+ },
1571
+ {
1572
+ "comp/rl_weight": 0.03,
1573
+ "comp/strictness": 1.0,
1574
+ "epoch": 0.2297872340425532,
1575
+ "grad_norm": 8.874433517456055,
1576
+ "loss": 1.282,
1577
+ "loss_ce": 1.2380104064941406,
1578
+ "loss_region": 0.05459459125995636,
1579
+ "loss_total": 1.2926050424575806,
1580
+ "lr": 0.0012121719090905923,
1581
+ "router/avg_deficit_s0": 0.0,
1582
+ "router/avg_deficit_s1": 5.448275862068965,
1583
+ "router/selected_tokens_s0": 606.28125,
1584
+ "router/selected_tokens_s1": 3.125,
1585
+ "router/trigger_rate_s0": 0.0,
1586
+ "router/trigger_rate_s1": 0.90625,
1587
+ "step": 810,
1588
+ "tokens_trained": 2.653986856
1589
+ },
1590
+ {
1591
+ "comp/rl_weight": 0.03,
1592
+ "comp/strictness": 1.0,
1593
+ "epoch": 0.2326241134751773,
1594
+ "grad_norm": 3.3697080612182617,
1595
+ "loss": 1.279,
1596
+ "loss_ce": 1.2127931118011475,
1597
+ "loss_region": 0.05367187783122063,
1598
+ "loss_total": 1.2664649486541748,
1599
+ "lr": 0.0012117650171067412,
1600
+ "router/avg_deficit_s0": 0.0,
1601
+ "router/avg_deficit_s1": 5.387096774193548,
1602
+ "router/selected_tokens_s0": 550.875,
1603
+ "router/selected_tokens_s1": 2.78125,
1604
+ "router/trigger_rate_s0": 0.0,
1605
+ "router/trigger_rate_s1": 0.96875,
1606
+ "step": 820,
1607
+ "tokens_trained": 2.686752296
1608
+ },
1609
+ {
1610
+ "comp/rl_weight": 0.03,
1611
+ "comp/strictness": 1.0,
1612
+ "epoch": 0.23546099290780143,
1613
+ "grad_norm": 18.332046508789062,
1614
+ "loss": 1.3407,
1615
+ "loss_ce": 1.366118311882019,
1616
+ "loss_region": 0.0635012835264206,
1617
+ "loss_total": 1.429619550704956,
1618
+ "lr": 0.0012113581251228902,
1619
+ "router/avg_deficit_s0": 0.0,
1620
+ "router/avg_deficit_s1": 6.9375,
1621
+ "router/selected_tokens_s0": 1186.375,
1622
+ "router/selected_tokens_s1": 1.0625,
1623
+ "router/trigger_rate_s0": 0.0,
1624
+ "router/trigger_rate_s1": 1.0,
1625
+ "step": 830,
1626
+ "tokens_trained": 2.719517736
1627
+ },
1628
+ {
1629
+ "comp/rl_weight": 0.03,
1630
+ "comp/strictness": 1.0,
1631
+ "epoch": 0.23829787234042554,
1632
+ "grad_norm": 9.937188148498535,
1633
+ "loss": 1.3486,
1634
+ "loss_ce": 1.243201494216919,
1635
+ "loss_region": 0.09887048602104187,
1636
+ "loss_total": 1.3420720100402832,
1637
+ "lr": 0.0012109512331390391,
1638
+ "router/avg_deficit_s0": 0.0,
1639
+ "router/avg_deficit_s1": 6.96875,
1640
+ "router/selected_tokens_s0": 67.71875,
1641
+ "router/selected_tokens_s1": 1.03125,
1642
+ "router/trigger_rate_s0": 0.0,
1643
+ "router/trigger_rate_s1": 1.0,
1644
+ "step": 840,
1645
+ "tokens_trained": 2.752283176
1646
+ },
1647
+ {
1648
+ "comp/rl_weight": 0.03,
1649
+ "comp/strictness": 1.0,
1650
+ "epoch": 0.24113475177304963,
1651
+ "grad_norm": 6.917110443115234,
1652
+ "loss": 1.311,
1653
+ "loss_ce": 1.2267051935195923,
1654
+ "loss_region": 0.0637051984667778,
1655
+ "loss_total": 1.2904103994369507,
1656
+ "lr": 0.0012105443411551881,
1657
+ "router/avg_deficit_s0": 0.0,
1658
+ "router/avg_deficit_s1": 6.28125,
1659
+ "router/selected_tokens_s0": 1052.40625,
1660
+ "router/selected_tokens_s1": 1.71875,
1661
+ "router/trigger_rate_s0": 0.0,
1662
+ "router/trigger_rate_s1": 1.0,
1663
+ "step": 850,
1664
+ "tokens_trained": 2.7850478
1665
+ },
1666
+ {
1667
+ "comp/rl_weight": 0.03,
1668
+ "comp/strictness": 1.0,
1669
+ "epoch": 0.24397163120567375,
1670
+ "grad_norm": 6.469630718231201,
1671
+ "loss": 1.269,
1672
+ "loss_ce": 1.1876814365386963,
1673
+ "loss_region": 0.0656404048204422,
1674
+ "loss_total": 1.253321886062622,
1675
+ "lr": 0.001210137449171337,
1676
+ "router/avg_deficit_s0": 0.0,
1677
+ "router/avg_deficit_s1": 6.516129032258065,
1678
+ "router/selected_tokens_s0": 1256.46875,
1679
+ "router/selected_tokens_s1": 3.09375,
1680
+ "router/trigger_rate_s0": 0.0,
1681
+ "router/trigger_rate_s1": 0.96875,
1682
+ "step": 860,
1683
+ "tokens_trained": 2.817809728
1684
+ },
1685
+ {
1686
+ "comp/rl_weight": 0.03,
1687
+ "comp/strictness": 1.0,
1688
+ "epoch": 0.24680851063829787,
1689
+ "grad_norm": 9.715760231018066,
1690
+ "loss": 1.2708,
1691
+ "loss_ce": 1.2158164978027344,
1692
+ "loss_region": 0.12369807064533234,
1693
+ "loss_total": 1.3395146131515503,
1694
+ "lr": 0.001209730557187486,
1695
+ "router/avg_deficit_s0": 0.0,
1696
+ "router/avg_deficit_s1": 3.6666666666666665,
1697
+ "router/selected_tokens_s0": 780.3125,
1698
+ "router/selected_tokens_s1": 226.90625,
1699
+ "router/trigger_rate_s0": 0.0,
1700
+ "router/trigger_rate_s1": 0.09375,
1701
+ "step": 870,
1702
+ "tokens_trained": 2.850575168
1703
+ },
1704
+ {
1705
+ "comp/rl_weight": 0.03,
1706
+ "comp/strictness": 1.0,
1707
+ "epoch": 0.24964539007092199,
1708
+ "grad_norm": 5.817880630493164,
1709
+ "loss": 1.2945,
1710
+ "loss_ce": 1.2212210893630981,
1711
+ "loss_region": 0.0879848301410675,
1712
+ "loss_total": 1.3092058897018433,
1713
+ "lr": 0.001209323665203635,
1714
+ "router/avg_deficit_s0": 0.0,
1715
+ "router/avg_deficit_s1": 0.0,
1716
+ "router/selected_tokens_s0": 245.15625,
1717
+ "router/selected_tokens_s1": 37.03125,
1718
+ "router/trigger_rate_s0": 0.0,
1719
+ "router/trigger_rate_s1": 0.0,
1720
+ "step": 880,
1721
+ "tokens_trained": 2.883340608
1722
+ },
1723
+ {
1724
+ "comp/rl_weight": 0.03,
1725
+ "comp/strictness": 1.0,
1726
+ "epoch": 0.2524822695035461,
1727
+ "grad_norm": 13.84002685546875,
1728
+ "loss": 1.3021,
1729
+ "loss_ce": 1.2078100442886353,
1730
+ "loss_region": 0.055929187685251236,
1731
+ "loss_total": 1.2637392282485962,
1732
+ "lr": 0.001208916773219784,
1733
+ "router/avg_deficit_s0": 0.0,
1734
+ "router/avg_deficit_s1": 6.9375,
1735
+ "router/selected_tokens_s0": 808.375,
1736
+ "router/selected_tokens_s1": 1.0625,
1737
+ "router/trigger_rate_s0": 0.0,
1738
+ "router/trigger_rate_s1": 1.0,
1739
+ "step": 890,
1740
+ "tokens_trained": 2.916106048
1741
+ },
1742
+ {
1743
+ "comp/rl_weight": 0.03,
1744
+ "comp/strictness": 1.0,
1745
+ "epoch": 0.2553191489361702,
1746
+ "grad_norm": 12.068603515625,
1747
+ "loss": 1.2995,
1748
+ "loss_ce": 1.2349709272384644,
1749
+ "loss_region": 0.07071184366941452,
1750
+ "loss_total": 1.3056827783584595,
1751
+ "lr": 0.001208509881235933,
1752
+ "router/avg_deficit_s0": 0.0,
1753
+ "router/avg_deficit_s1": 6.71875,
1754
+ "router/selected_tokens_s0": 1462.46875,
1755
+ "router/selected_tokens_s1": 1.28125,
1756
+ "router/trigger_rate_s0": 0.0,
1757
+ "router/trigger_rate_s1": 1.0,
1758
+ "step": 900,
1759
+ "tokens_trained": 2.948871488
1760
+ },
1761
+ {
1762
+ "comp/rl_weight": 0.03,
1763
+ "comp/strictness": 1.0,
1764
+ "epoch": 0.2581560283687943,
1765
+ "grad_norm": 3.185922384262085,
1766
+ "loss": 1.2773,
1767
+ "loss_ce": 1.2066916227340698,
1768
+ "loss_region": 0.05171738564968109,
1769
+ "loss_total": 1.258409023284912,
1770
+ "lr": 0.001208102989252082,
1771
+ "router/avg_deficit_s0": 0.0,
1772
+ "router/avg_deficit_s1": 6.9375,
1773
+ "router/selected_tokens_s0": 612.25,
1774
+ "router/selected_tokens_s1": 1.0625,
1775
+ "router/trigger_rate_s0": 0.0,
1776
+ "router/trigger_rate_s1": 1.0,
1777
+ "step": 910,
1778
+ "tokens_trained": 2.981630496
1779
+ },
1780
+ {
1781
+ "comp/rl_weight": 0.03,
1782
+ "comp/strictness": 1.0,
1783
+ "epoch": 0.26099290780141843,
1784
+ "grad_norm": 13.117000579833984,
1785
+ "loss": 1.3082,
1786
+ "loss_ce": 1.2591320276260376,
1787
+ "loss_region": 0.057609450072050095,
1788
+ "loss_total": 1.3167414665222168,
1789
+ "lr": 0.0012076960972682309,
1790
+ "router/avg_deficit_s0": 0.0,
1791
+ "router/avg_deficit_s1": 7.0,
1792
+ "router/selected_tokens_s0": 873.21875,
1793
+ "router/selected_tokens_s1": 1.0,
1794
+ "router/trigger_rate_s0": 0.0,
1795
+ "router/trigger_rate_s1": 1.0,
1796
+ "step": 920,
1797
+ "tokens_trained": 3.014395936
1798
+ },
1799
+ {
1800
+ "comp/rl_weight": 0.03,
1801
+ "comp/strictness": 1.0,
1802
+ "epoch": 0.26382978723404255,
1803
+ "grad_norm": 3.454926013946533,
1804
+ "loss": 1.2552,
1805
+ "loss_ce": 1.192643165588379,
1806
+ "loss_region": 0.05184994637966156,
1807
+ "loss_total": 1.2444931268692017,
1808
+ "lr": 0.0012072892052843798,
1809
+ "router/avg_deficit_s0": 0.0,
1810
+ "router/avg_deficit_s1": 7.0,
1811
+ "router/selected_tokens_s0": 660.6875,
1812
+ "router/selected_tokens_s1": 1.0,
1813
+ "router/trigger_rate_s0": 0.0,
1814
+ "router/trigger_rate_s1": 1.0,
1815
+ "step": 930,
1816
+ "tokens_trained": 3.047160576
1817
+ },
1818
+ {
1819
+ "comp/rl_weight": 0.03,
1820
+ "comp/strictness": 1.0,
1821
+ "epoch": 0.26666666666666666,
1822
+ "grad_norm": 3.770582437515259,
1823
+ "loss": 1.256,
1824
+ "loss_ce": 1.1592071056365967,
1825
+ "loss_region": 0.049208104610443115,
1826
+ "loss_total": 1.2084152698516846,
1827
+ "lr": 0.0012068823133005288,
1828
+ "router/avg_deficit_s0": 0.0,
1829
+ "router/avg_deficit_s1": 6.8125,
1830
+ "router/selected_tokens_s0": 356.875,
1831
+ "router/selected_tokens_s1": 1.1875,
1832
+ "router/trigger_rate_s0": 0.0,
1833
+ "router/trigger_rate_s1": 1.0,
1834
+ "step": 940,
1835
+ "tokens_trained": 3.079925216
1836
+ },
1837
+ {
1838
+ "comp/rl_weight": 0.03,
1839
+ "comp/strictness": 1.0,
1840
+ "epoch": 0.2695035460992908,
1841
+ "grad_norm": 7.1153244972229,
1842
+ "loss": 1.2572,
1843
+ "loss_ce": 1.1901915073394775,
1844
+ "loss_region": 0.050450198352336884,
1845
+ "loss_total": 1.240641713142395,
1846
+ "lr": 0.0012064754213166778,
1847
+ "router/avg_deficit_s0": 0.0,
1848
+ "router/avg_deficit_s1": 6.935483870967742,
1849
+ "router/selected_tokens_s0": 375.375,
1850
+ "router/selected_tokens_s1": 1.3125,
1851
+ "router/trigger_rate_s0": 0.0,
1852
+ "router/trigger_rate_s1": 0.96875,
1853
+ "step": 950,
1854
+ "tokens_trained": 3.112689856
1855
+ },
1856
+ {
1857
+ "comp/rl_weight": 0.03,
1858
+ "comp/strictness": 1.0,
1859
+ "epoch": 0.2723404255319149,
1860
+ "grad_norm": 9.772027015686035,
1861
+ "loss": 1.2898,
1862
+ "loss_ce": 1.2428486347198486,
1863
+ "loss_region": 0.04990971088409424,
1864
+ "loss_total": 1.2927583456039429,
1865
+ "lr": 0.001206068529332827,
1866
+ "router/avg_deficit_s0": 0.0,
1867
+ "router/avg_deficit_s1": 6.9375,
1868
+ "router/selected_tokens_s0": 411.8125,
1869
+ "router/selected_tokens_s1": 1.0625,
1870
+ "router/trigger_rate_s0": 0.0,
1871
+ "router/trigger_rate_s1": 1.0,
1872
+ "step": 960,
1873
+ "tokens_trained": 3.145454496
1874
+ },
1875
+ {
1876
+ "comp/rl_weight": 0.03,
1877
+ "comp/strictness": 1.0,
1878
+ "epoch": 0.275177304964539,
1879
+ "grad_norm": 11.92085075378418,
1880
+ "loss": 1.2779,
1881
+ "loss_ce": 1.224082350730896,
1882
+ "loss_region": 0.061600904911756516,
1883
+ "loss_total": 1.285683274269104,
1884
+ "lr": 0.001205661637348976,
1885
+ "router/avg_deficit_s0": 0.0,
1886
+ "router/avg_deficit_s1": 6.9375,
1887
+ "router/selected_tokens_s0": 1274.90625,
1888
+ "router/selected_tokens_s1": 1.0625,
1889
+ "router/trigger_rate_s0": 0.0,
1890
+ "router/trigger_rate_s1": 1.0,
1891
+ "step": 970,
1892
+ "tokens_trained": 3.178219936
1893
+ },
1894
+ {
1895
+ "comp/rl_weight": 0.03,
1896
+ "comp/strictness": 1.0,
1897
+ "epoch": 0.27801418439716313,
1898
+ "grad_norm": 6.2715935707092285,
1899
+ "loss": 1.256,
1900
+ "loss_ce": 1.2057243585586548,
1901
+ "loss_region": 0.05960024148225784,
1902
+ "loss_total": 1.265324592590332,
1903
+ "lr": 0.0012052547453651249,
1904
+ "router/avg_deficit_s0": 0.0,
1905
+ "router/avg_deficit_s1": 6.9375,
1906
+ "router/selected_tokens_s0": 1017.5,
1907
+ "router/selected_tokens_s1": 1.0625,
1908
+ "router/trigger_rate_s0": 0.0,
1909
+ "router/trigger_rate_s1": 1.0,
1910
+ "step": 980,
1911
+ "tokens_trained": 3.210985376
1912
+ },
1913
+ {
1914
+ "comp/rl_weight": 0.03,
1915
+ "comp/strictness": 1.0,
1916
+ "epoch": 0.28085106382978725,
1917
+ "grad_norm": 4.448913097381592,
1918
+ "loss": 1.2509,
1919
+ "loss_ce": 1.2143372297286987,
1920
+ "loss_region": 0.056969594210386276,
1921
+ "loss_total": 1.2713068723678589,
1922
+ "lr": 0.0012048478533812738,
1923
+ "router/avg_deficit_s0": 0.0,
1924
+ "router/avg_deficit_s1": 7.0,
1925
+ "router/selected_tokens_s0": 965.90625,
1926
+ "router/selected_tokens_s1": 1.0,
1927
+ "router/trigger_rate_s0": 0.0,
1928
+ "router/trigger_rate_s1": 1.0,
1929
+ "step": 990,
1930
+ "tokens_trained": 3.243750816
1931
+ },
1932
+ {
1933
+ "comp/rl_weight": 0.03,
1934
+ "comp/strictness": 1.0,
1935
+ "epoch": 0.28368794326241137,
1936
+ "grad_norm": 5.426684856414795,
1937
+ "loss": 1.2402,
1938
+ "loss_ce": 1.16555655002594,
1939
+ "loss_region": 0.05230846256017685,
1940
+ "loss_total": 1.217864990234375,
1941
+ "lr": 0.0012044409613974226,
1942
+ "router/avg_deficit_s0": 0.0,
1943
+ "router/avg_deficit_s1": 7.0,
1944
+ "router/selected_tokens_s0": 619.40625,
1945
+ "router/selected_tokens_s1": 1.0,
1946
+ "router/trigger_rate_s0": 0.0,
1947
+ "router/trigger_rate_s1": 1.0,
1948
+ "step": 1000,
1949
+ "tokens_trained": 3.276516256
1950
+ },
1951
+ {
1952
+ "epoch": 0.28368794326241137,
1953
+ "eval_ppl": 3.2542863563067255,
1954
+ "eval_runtime": 0.578,
1955
+ "step": 1000,
1956
+ "tokens_trained": 3.276516256
1957
+ },
1958
+ {
1959
+ "epoch": 0.28368794326241137,
1960
+ "eval_F": 0.0006250488319399953,
1961
+ "eval_F_cds": 0.0011144234251803972,
1962
+ "eval_F_exon": 0.00042270531400966186,
1963
+ "eval_F_intron": 0.000556743304131992,
1964
+ "eval_F_nig": 0.0006567124268080237,
1965
+ "eval_F_promoter": 0.000801100177577206,
1966
+ "eval_F_utr": 0.0007665184730952016,
1967
+ "eval_G": 0.02674509616687583,
1968
+ "eval_G_cds": 0.03768764539743125,
1969
+ "eval_G_exon": 0.030816443991545893,
1970
+ "eval_G_intron": 0.02521758477546237,
1971
+ "eval_G_nig": 0.02268251573209672,
1972
+ "eval_G_promoter": 0.037520551661782184,
1973
+ "eval_G_utr": 0.032285099359957076,
1974
+ "eval_avg_bp_per_token": 1599.875,
1975
+ "eval_bp_per_token/cds": 897.325,
1976
+ "eval_bp_per_token/exon": 2365.714285714286,
1977
+ "eval_bp_per_token/intron": 1796.1599045346063,
1978
+ "eval_bp_per_token/nig": 1522.7365269461077,
1979
+ "eval_bp_per_token/promoter": 1248.2833333333333,
1980
+ "eval_bp_per_token/utr": 1304.6,
1981
+ "eval_ppl_cds": 3.858434678372507,
1982
+ "eval_ppl_exon": 3.4414248334927393,
1983
+ "eval_ppl_intron": 3.301873903034357,
1984
+ "eval_ppl_nig": 3.1578400796505584,
1985
+ "eval_ppl_promoter": 3.4346062040854677,
1986
+ "eval_ppl_utr": 3.5520807511116885,
1987
+ "step": 1000,
1988
+ "tokens_trained": 3.276516256
1989
+ }
1990
+ ],
1991
+ "logging_steps": 10,
1992
+ "max_steps": 30600,
1993
+ "num_input_tokens_seen": 0,
1994
+ "num_train_epochs": 9,
1995
+ "save_steps": 1000,
1996
+ "stateful_callbacks": {
1997
+ "TrainerControl": {
1998
+ "args": {
1999
+ "should_epoch_stop": false,
2000
+ "should_evaluate": false,
2001
+ "should_log": false,
2002
+ "should_save": true,
2003
+ "should_training_stop": false
2004
+ },
2005
+ "attributes": {}
2006
+ }
2007
+ },
2008
+ "total_flos": 0.0,
2009
+ "train_batch_size": 32,
2010
+ "trial_name": null,
2011
+ "trial_params": null
2012
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7cb4518b673fe0a40ad5d569c2df0284b1ddc1094eaecb8b6f4b2a826f0f109
3
+ size 5969