akseljoonas HF Staff commited on
Commit
6af558e
·
verified ·
1 Parent(s): 8a40947

Model save

Browse files
README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-4B
3
+ library_name: transformers
4
+ model_name: Agentic-Qwen3-4B-e2-lr2-b8
5
+ tags:
6
+ - generated_from_trainer
7
+ - trl
8
+ - sft
9
+ licence: license
10
+ ---
11
+
12
+ # Model Card for Agentic-Qwen3-4B-e2-lr2-b8
13
+
14
+ This model is a fine-tuned version of [Qwen/Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B).
15
+ It has been trained using [TRL](https://github.com/huggingface/trl).
16
+
17
+ ## Quick start
18
+
19
+ ```python
20
+ from transformers import pipeline
21
+
22
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
23
+ generator = pipeline("text-generation", model="akseljoonas/Agentic-Qwen3-4B-e2-lr2-b8", device="cuda")
24
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
25
+ print(output["generated_text"])
26
+ ```
27
+
28
+ ## Training procedure
29
+
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/akseljoonas-university-of-groningen/huggingface/runs/ux6f3h75)
31
+
32
+
33
+ This model was trained with SFT.
34
+
35
+ ### Framework versions
36
+
37
+ - TRL: 0.18.1
38
+ - Transformers: 4.52.4
39
+ - Pytorch: 2.6.0
40
+ - Datasets: 3.6.0
41
+ - Tokenizers: 0.21.1
42
+
43
+ ## Citations
44
+
45
+
46
+
47
+ Cite TRL as:
48
+
49
+ ```bibtex
50
+ @misc{vonwerra2022trl,
51
+ title = {{TRL: Transformer Reinforcement Learning}},
52
+ author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
53
+ year = 2020,
54
+ journal = {GitHub repository},
55
+ publisher = {GitHub},
56
+ howpublished = {\url{https://github.com/huggingface/trl}}
57
+ }
58
+ ```
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 60200843018240.0,
3
+ "train_loss": 0.6736609485914122,
4
+ "train_runtime": 1231.7269,
5
+ "train_samples": 9217,
6
+ "train_samples_per_second": 2.062,
7
+ "train_steps_per_second": 0.258
8
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.52.4"
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "total_flos": 60200843018240.0,
3
+ "train_loss": 0.6736609485914122,
4
+ "train_runtime": 1231.7269,
5
+ "train_samples": 9217,
6
+ "train_samples_per_second": 2.062,
7
+ "train_steps_per_second": 0.258
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,612 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 318,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.031446540880503145,
14
+ "grad_norm": 13.03271333270673,
15
+ "learning_rate": 2.5e-06,
16
+ "loss": 1.3775,
17
+ "mean_token_accuracy": 0.7547178506851197,
18
+ "num_tokens": 818536.0,
19
+ "step": 5
20
+ },
21
+ {
22
+ "epoch": 0.06289308176100629,
23
+ "grad_norm": 3.142081262767518,
24
+ "learning_rate": 5.625e-06,
25
+ "loss": 1.1561,
26
+ "mean_token_accuracy": 0.7710366725921631,
27
+ "num_tokens": 1596086.0,
28
+ "step": 10
29
+ },
30
+ {
31
+ "epoch": 0.09433962264150944,
32
+ "grad_norm": 0.9204549726929935,
33
+ "learning_rate": 8.750000000000001e-06,
34
+ "loss": 1.0427,
35
+ "mean_token_accuracy": 0.7695102214813232,
36
+ "num_tokens": 2415286.0,
37
+ "step": 15
38
+ },
39
+ {
40
+ "epoch": 0.12578616352201258,
41
+ "grad_norm": 0.7085856910992004,
42
+ "learning_rate": 1.1875e-05,
43
+ "loss": 1.0061,
44
+ "mean_token_accuracy": 0.7758254289627076,
45
+ "num_tokens": 3218052.0,
46
+ "step": 20
47
+ },
48
+ {
49
+ "epoch": 0.15723270440251572,
50
+ "grad_norm": 0.547258781879994,
51
+ "learning_rate": 1.5000000000000002e-05,
52
+ "loss": 0.8875,
53
+ "mean_token_accuracy": 0.7997570395469665,
54
+ "num_tokens": 4004194.0,
55
+ "step": 25
56
+ },
57
+ {
58
+ "epoch": 0.18867924528301888,
59
+ "grad_norm": 1.0222820759392208,
60
+ "learning_rate": 1.8125e-05,
61
+ "loss": 0.9002,
62
+ "mean_token_accuracy": 0.7947627782821656,
63
+ "num_tokens": 4817215.0,
64
+ "step": 30
65
+ },
66
+ {
67
+ "epoch": 0.22012578616352202,
68
+ "grad_norm": 3.585439572305339,
69
+ "learning_rate": 1.986013986013986e-05,
70
+ "loss": 0.9059,
71
+ "mean_token_accuracy": 0.7968738198280334,
72
+ "num_tokens": 5631876.0,
73
+ "step": 35
74
+ },
75
+ {
76
+ "epoch": 0.25157232704402516,
77
+ "grad_norm": 0.6067914037174289,
78
+ "learning_rate": 1.951048951048951e-05,
79
+ "loss": 0.9672,
80
+ "mean_token_accuracy": 0.7905117273330688,
81
+ "num_tokens": 6431930.0,
82
+ "step": 40
83
+ },
84
+ {
85
+ "epoch": 0.2830188679245283,
86
+ "grad_norm": 0.619187902035828,
87
+ "learning_rate": 1.916083916083916e-05,
88
+ "loss": 0.8712,
89
+ "mean_token_accuracy": 0.7971467971801758,
90
+ "num_tokens": 7248262.0,
91
+ "step": 45
92
+ },
93
+ {
94
+ "epoch": 0.31446540880503143,
95
+ "grad_norm": 1.0922125142356272,
96
+ "learning_rate": 1.881118881118881e-05,
97
+ "loss": 0.9262,
98
+ "mean_token_accuracy": 0.7934189438819885,
99
+ "num_tokens": 8024263.0,
100
+ "step": 50
101
+ },
102
+ {
103
+ "epoch": 0.34591194968553457,
104
+ "grad_norm": 1.8833162758210453,
105
+ "learning_rate": 1.8461538461538465e-05,
106
+ "loss": 0.8009,
107
+ "mean_token_accuracy": 0.8181156992912293,
108
+ "num_tokens": 8843409.0,
109
+ "step": 55
110
+ },
111
+ {
112
+ "epoch": 0.37735849056603776,
113
+ "grad_norm": 0.5294123548152969,
114
+ "learning_rate": 1.8111888111888115e-05,
115
+ "loss": 0.7584,
116
+ "mean_token_accuracy": 0.8229650974273681,
117
+ "num_tokens": 9650085.0,
118
+ "step": 60
119
+ },
120
+ {
121
+ "epoch": 0.4088050314465409,
122
+ "grad_norm": 0.4800791810392324,
123
+ "learning_rate": 1.7762237762237765e-05,
124
+ "loss": 0.8147,
125
+ "mean_token_accuracy": 0.8108610510826111,
126
+ "num_tokens": 10453111.0,
127
+ "step": 65
128
+ },
129
+ {
130
+ "epoch": 0.44025157232704404,
131
+ "grad_norm": 0.47044090988814274,
132
+ "learning_rate": 1.7412587412587415e-05,
133
+ "loss": 0.8538,
134
+ "mean_token_accuracy": 0.8061554193496704,
135
+ "num_tokens": 11269645.0,
136
+ "step": 70
137
+ },
138
+ {
139
+ "epoch": 0.4716981132075472,
140
+ "grad_norm": 0.36384702985188494,
141
+ "learning_rate": 1.7062937062937065e-05,
142
+ "loss": 0.879,
143
+ "mean_token_accuracy": 0.8073269724845886,
144
+ "num_tokens": 12062908.0,
145
+ "step": 75
146
+ },
147
+ {
148
+ "epoch": 0.5031446540880503,
149
+ "grad_norm": 0.5787507645142339,
150
+ "learning_rate": 1.6713286713286712e-05,
151
+ "loss": 0.916,
152
+ "mean_token_accuracy": 0.7904855847358704,
153
+ "num_tokens": 12852544.0,
154
+ "step": 80
155
+ },
156
+ {
157
+ "epoch": 0.5345911949685535,
158
+ "grad_norm": 0.8612764231795952,
159
+ "learning_rate": 1.6363636363636366e-05,
160
+ "loss": 0.7602,
161
+ "mean_token_accuracy": 0.8291459321975708,
162
+ "num_tokens": 13622852.0,
163
+ "step": 85
164
+ },
165
+ {
166
+ "epoch": 0.5660377358490566,
167
+ "grad_norm": 0.5764345054487913,
168
+ "learning_rate": 1.6013986013986016e-05,
169
+ "loss": 0.7738,
170
+ "mean_token_accuracy": 0.8199570298194885,
171
+ "num_tokens": 14402536.0,
172
+ "step": 90
173
+ },
174
+ {
175
+ "epoch": 0.5974842767295597,
176
+ "grad_norm": 0.5860043968041379,
177
+ "learning_rate": 1.5664335664335666e-05,
178
+ "loss": 0.5879,
179
+ "mean_token_accuracy": 0.8609241962432861,
180
+ "num_tokens": 15201407.0,
181
+ "step": 95
182
+ },
183
+ {
184
+ "epoch": 0.6289308176100629,
185
+ "grad_norm": 0.4612587590718709,
186
+ "learning_rate": 1.5314685314685317e-05,
187
+ "loss": 0.6411,
188
+ "mean_token_accuracy": 0.8491015195846557,
189
+ "num_tokens": 16020607.0,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 0.660377358490566,
194
+ "grad_norm": 0.7302185701182936,
195
+ "learning_rate": 1.4965034965034965e-05,
196
+ "loss": 0.668,
197
+ "mean_token_accuracy": 0.8352020621299744,
198
+ "num_tokens": 16838236.0,
199
+ "step": 105
200
+ },
201
+ {
202
+ "epoch": 0.6918238993710691,
203
+ "grad_norm": 0.45981371746934846,
204
+ "learning_rate": 1.4615384615384615e-05,
205
+ "loss": 0.5761,
206
+ "mean_token_accuracy": 0.8600066065788269,
207
+ "num_tokens": 17657436.0,
208
+ "step": 110
209
+ },
210
+ {
211
+ "epoch": 0.7232704402515723,
212
+ "grad_norm": 0.4990834329499322,
213
+ "learning_rate": 1.4265734265734267e-05,
214
+ "loss": 0.5672,
215
+ "mean_token_accuracy": 0.863051176071167,
216
+ "num_tokens": 18476636.0,
217
+ "step": 115
218
+ },
219
+ {
220
+ "epoch": 0.7547169811320755,
221
+ "grad_norm": 0.5062785400963731,
222
+ "learning_rate": 1.3916083916083917e-05,
223
+ "loss": 0.7395,
224
+ "mean_token_accuracy": 0.8357039093971252,
225
+ "num_tokens": 19273991.0,
226
+ "step": 120
227
+ },
228
+ {
229
+ "epoch": 0.7861635220125787,
230
+ "grad_norm": 0.47628852410912564,
231
+ "learning_rate": 1.3566433566433568e-05,
232
+ "loss": 0.5779,
233
+ "mean_token_accuracy": 0.8661539793014527,
234
+ "num_tokens": 20073959.0,
235
+ "step": 125
236
+ },
237
+ {
238
+ "epoch": 0.8176100628930818,
239
+ "grad_norm": 0.766898576259439,
240
+ "learning_rate": 1.3216783216783218e-05,
241
+ "loss": 1.0121,
242
+ "mean_token_accuracy": 0.7860404014587402,
243
+ "num_tokens": 20891202.0,
244
+ "step": 130
245
+ },
246
+ {
247
+ "epoch": 0.8490566037735849,
248
+ "grad_norm": 0.8560643966737385,
249
+ "learning_rate": 1.2867132867132868e-05,
250
+ "loss": 0.6729,
251
+ "mean_token_accuracy": 0.846558690071106,
252
+ "num_tokens": 21698529.0,
253
+ "step": 135
254
+ },
255
+ {
256
+ "epoch": 0.8805031446540881,
257
+ "grad_norm": 0.5696461399962188,
258
+ "learning_rate": 1.2517482517482518e-05,
259
+ "loss": 0.6544,
260
+ "mean_token_accuracy": 0.8542250633239746,
261
+ "num_tokens": 22517729.0,
262
+ "step": 140
263
+ },
264
+ {
265
+ "epoch": 0.9119496855345912,
266
+ "grad_norm": 0.4884669558649522,
267
+ "learning_rate": 1.216783216783217e-05,
268
+ "loss": 0.5603,
269
+ "mean_token_accuracy": 0.8656034827232361,
270
+ "num_tokens": 23326880.0,
271
+ "step": 145
272
+ },
273
+ {
274
+ "epoch": 0.9433962264150944,
275
+ "grad_norm": 0.6163292040479019,
276
+ "learning_rate": 1.181818181818182e-05,
277
+ "loss": 0.7203,
278
+ "mean_token_accuracy": 0.835569703578949,
279
+ "num_tokens": 24132107.0,
280
+ "step": 150
281
+ },
282
+ {
283
+ "epoch": 0.9748427672955975,
284
+ "grad_norm": 0.4767257326937619,
285
+ "learning_rate": 1.1468531468531469e-05,
286
+ "loss": 0.5465,
287
+ "mean_token_accuracy": 0.8675103425979614,
288
+ "num_tokens": 24928038.0,
289
+ "step": 155
290
+ },
291
+ {
292
+ "epoch": 1.0062893081761006,
293
+ "grad_norm": 0.9012992028692588,
294
+ "learning_rate": 1.1118881118881119e-05,
295
+ "loss": 0.7252,
296
+ "mean_token_accuracy": 0.8303738117218018,
297
+ "num_tokens": 25706278.0,
298
+ "step": 160
299
+ },
300
+ {
301
+ "epoch": 1.0377358490566038,
302
+ "grad_norm": 0.8111141420851177,
303
+ "learning_rate": 1.076923076923077e-05,
304
+ "loss": 0.5765,
305
+ "mean_token_accuracy": 0.8677368283271789,
306
+ "num_tokens": 26514532.0,
307
+ "step": 165
308
+ },
309
+ {
310
+ "epoch": 1.069182389937107,
311
+ "grad_norm": 0.6267585859027412,
312
+ "learning_rate": 1.041958041958042e-05,
313
+ "loss": 0.4623,
314
+ "mean_token_accuracy": 0.8873383641242981,
315
+ "num_tokens": 27313592.0,
316
+ "step": 170
317
+ },
318
+ {
319
+ "epoch": 1.10062893081761,
320
+ "grad_norm": 0.444789476020434,
321
+ "learning_rate": 1.0069930069930071e-05,
322
+ "loss": 0.5428,
323
+ "mean_token_accuracy": 0.8730736255645752,
324
+ "num_tokens": 28132792.0,
325
+ "step": 175
326
+ },
327
+ {
328
+ "epoch": 1.1320754716981132,
329
+ "grad_norm": 0.6056217287653249,
330
+ "learning_rate": 9.72027972027972e-06,
331
+ "loss": 0.5868,
332
+ "mean_token_accuracy": 0.865373182296753,
333
+ "num_tokens": 28949045.0,
334
+ "step": 180
335
+ },
336
+ {
337
+ "epoch": 1.1635220125786163,
338
+ "grad_norm": 0.4694269418208841,
339
+ "learning_rate": 9.370629370629372e-06,
340
+ "loss": 0.5848,
341
+ "mean_token_accuracy": 0.868312668800354,
342
+ "num_tokens": 29768245.0,
343
+ "step": 185
344
+ },
345
+ {
346
+ "epoch": 1.1949685534591195,
347
+ "grad_norm": 0.6521728421466072,
348
+ "learning_rate": 9.020979020979022e-06,
349
+ "loss": 0.5126,
350
+ "mean_token_accuracy": 0.8788403034210205,
351
+ "num_tokens": 30543456.0,
352
+ "step": 190
353
+ },
354
+ {
355
+ "epoch": 1.2264150943396226,
356
+ "grad_norm": 0.5283463167203141,
357
+ "learning_rate": 8.671328671328672e-06,
358
+ "loss": 0.6338,
359
+ "mean_token_accuracy": 0.8573248863220215,
360
+ "num_tokens": 31350518.0,
361
+ "step": 195
362
+ },
363
+ {
364
+ "epoch": 1.2578616352201257,
365
+ "grad_norm": 0.46526678539901895,
366
+ "learning_rate": 8.321678321678323e-06,
367
+ "loss": 0.5093,
368
+ "mean_token_accuracy": 0.8769666433334351,
369
+ "num_tokens": 32169718.0,
370
+ "step": 200
371
+ },
372
+ {
373
+ "epoch": 1.2893081761006289,
374
+ "grad_norm": 0.45788290526995284,
375
+ "learning_rate": 7.972027972027973e-06,
376
+ "loss": 0.5234,
377
+ "mean_token_accuracy": 0.878737998008728,
378
+ "num_tokens": 32955823.0,
379
+ "step": 205
380
+ },
381
+ {
382
+ "epoch": 1.320754716981132,
383
+ "grad_norm": 0.5536484023317151,
384
+ "learning_rate": 7.622377622377622e-06,
385
+ "loss": 0.4318,
386
+ "mean_token_accuracy": 0.8946848034858703,
387
+ "num_tokens": 33775023.0,
388
+ "step": 210
389
+ },
390
+ {
391
+ "epoch": 1.3522012578616351,
392
+ "grad_norm": 0.6186022115094232,
393
+ "learning_rate": 7.272727272727273e-06,
394
+ "loss": 0.5947,
395
+ "mean_token_accuracy": 0.8497153162956238,
396
+ "num_tokens": 34546164.0,
397
+ "step": 215
398
+ },
399
+ {
400
+ "epoch": 1.3836477987421385,
401
+ "grad_norm": 0.47987850722159214,
402
+ "learning_rate": 6.923076923076923e-06,
403
+ "loss": 0.5026,
404
+ "mean_token_accuracy": 0.8792301297187806,
405
+ "num_tokens": 35357830.0,
406
+ "step": 220
407
+ },
408
+ {
409
+ "epoch": 1.4150943396226414,
410
+ "grad_norm": 0.43018225407142757,
411
+ "learning_rate": 6.573426573426574e-06,
412
+ "loss": 0.5064,
413
+ "mean_token_accuracy": 0.879835331439972,
414
+ "num_tokens": 36174162.0,
415
+ "step": 225
416
+ },
417
+ {
418
+ "epoch": 1.4465408805031448,
419
+ "grad_norm": 0.46620085974916936,
420
+ "learning_rate": 6.223776223776225e-06,
421
+ "loss": 0.4226,
422
+ "mean_token_accuracy": 0.8939149737358093,
423
+ "num_tokens": 36989817.0,
424
+ "step": 230
425
+ },
426
+ {
427
+ "epoch": 1.4779874213836477,
428
+ "grad_norm": 0.48609974920693577,
429
+ "learning_rate": 5.874125874125874e-06,
430
+ "loss": 0.5202,
431
+ "mean_token_accuracy": 0.8768733620643616,
432
+ "num_tokens": 37777847.0,
433
+ "step": 235
434
+ },
435
+ {
436
+ "epoch": 1.509433962264151,
437
+ "grad_norm": 0.5821105347647334,
438
+ "learning_rate": 5.524475524475524e-06,
439
+ "loss": 0.6143,
440
+ "mean_token_accuracy": 0.8637777328491211,
441
+ "num_tokens": 38562630.0,
442
+ "step": 240
443
+ },
444
+ {
445
+ "epoch": 1.540880503144654,
446
+ "grad_norm": 0.5803085528179021,
447
+ "learning_rate": 5.174825174825175e-06,
448
+ "loss": 0.5018,
449
+ "mean_token_accuracy": 0.8810171365737915,
450
+ "num_tokens": 39381830.0,
451
+ "step": 245
452
+ },
453
+ {
454
+ "epoch": 1.5723270440251573,
455
+ "grad_norm": 0.4396680250939225,
456
+ "learning_rate": 4.8251748251748255e-06,
457
+ "loss": 0.5409,
458
+ "mean_token_accuracy": 0.8760395407676697,
459
+ "num_tokens": 40188597.0,
460
+ "step": 250
461
+ },
462
+ {
463
+ "epoch": 1.6037735849056602,
464
+ "grad_norm": 0.8802693971286873,
465
+ "learning_rate": 4.475524475524476e-06,
466
+ "loss": 0.5175,
467
+ "mean_token_accuracy": 0.8776832342147827,
468
+ "num_tokens": 41007797.0,
469
+ "step": 255
470
+ },
471
+ {
472
+ "epoch": 1.6352201257861636,
473
+ "grad_norm": 0.5614997097724259,
474
+ "learning_rate": 4.125874125874127e-06,
475
+ "loss": 0.5166,
476
+ "mean_token_accuracy": 0.880309796333313,
477
+ "num_tokens": 41803691.0,
478
+ "step": 260
479
+ },
480
+ {
481
+ "epoch": 1.6666666666666665,
482
+ "grad_norm": 0.6969893060316523,
483
+ "learning_rate": 3.776223776223776e-06,
484
+ "loss": 0.4707,
485
+ "mean_token_accuracy": 0.8875695466995239,
486
+ "num_tokens": 42595724.0,
487
+ "step": 265
488
+ },
489
+ {
490
+ "epoch": 1.6981132075471699,
491
+ "grad_norm": 0.48728281199668755,
492
+ "learning_rate": 3.426573426573427e-06,
493
+ "loss": 0.3861,
494
+ "mean_token_accuracy": 0.9031725287437439,
495
+ "num_tokens": 43413353.0,
496
+ "step": 270
497
+ },
498
+ {
499
+ "epoch": 1.7295597484276728,
500
+ "grad_norm": 0.4756501231096391,
501
+ "learning_rate": 3.0769230769230774e-06,
502
+ "loss": 0.429,
503
+ "mean_token_accuracy": 0.8964764833450317,
504
+ "num_tokens": 44211004.0,
505
+ "step": 275
506
+ },
507
+ {
508
+ "epoch": 1.7610062893081762,
509
+ "grad_norm": 0.5778523505124141,
510
+ "learning_rate": 2.7272727272727272e-06,
511
+ "loss": 0.645,
512
+ "mean_token_accuracy": 0.8627704739570617,
513
+ "num_tokens": 45020739.0,
514
+ "step": 280
515
+ },
516
+ {
517
+ "epoch": 1.7924528301886793,
518
+ "grad_norm": 0.5512721198891299,
519
+ "learning_rate": 2.377622377622378e-06,
520
+ "loss": 0.5684,
521
+ "mean_token_accuracy": 0.8708881616592408,
522
+ "num_tokens": 45829341.0,
523
+ "step": 285
524
+ },
525
+ {
526
+ "epoch": 1.8238993710691824,
527
+ "grad_norm": 0.45716516224489046,
528
+ "learning_rate": 2.027972027972028e-06,
529
+ "loss": 0.5288,
530
+ "mean_token_accuracy": 0.8799654126167298,
531
+ "num_tokens": 46647913.0,
532
+ "step": 290
533
+ },
534
+ {
535
+ "epoch": 1.8553459119496856,
536
+ "grad_norm": 1.2212730329024803,
537
+ "learning_rate": 1.6783216783216785e-06,
538
+ "loss": 0.4361,
539
+ "mean_token_accuracy": 0.8925036907196044,
540
+ "num_tokens": 47446784.0,
541
+ "step": 295
542
+ },
543
+ {
544
+ "epoch": 1.8867924528301887,
545
+ "grad_norm": 0.5584280710628016,
546
+ "learning_rate": 1.3286713286713287e-06,
547
+ "loss": 0.5054,
548
+ "mean_token_accuracy": 0.879949152469635,
549
+ "num_tokens": 48248618.0,
550
+ "step": 300
551
+ },
552
+ {
553
+ "epoch": 1.9182389937106918,
554
+ "grad_norm": 0.5396960462166436,
555
+ "learning_rate": 9.790209790209791e-07,
556
+ "loss": 0.4173,
557
+ "mean_token_accuracy": 0.900512409210205,
558
+ "num_tokens": 49061639.0,
559
+ "step": 305
560
+ },
561
+ {
562
+ "epoch": 1.949685534591195,
563
+ "grad_norm": 0.4828928484837873,
564
+ "learning_rate": 6.293706293706295e-07,
565
+ "loss": 0.4519,
566
+ "mean_token_accuracy": 0.8932919859886169,
567
+ "num_tokens": 49861941.0,
568
+ "step": 310
569
+ },
570
+ {
571
+ "epoch": 1.9811320754716981,
572
+ "grad_norm": 0.5299789433457202,
573
+ "learning_rate": 2.7972027972027973e-07,
574
+ "loss": 0.5017,
575
+ "mean_token_accuracy": 0.8787847280502319,
576
+ "num_tokens": 50666035.0,
577
+ "step": 315
578
+ },
579
+ {
580
+ "epoch": 2.0,
581
+ "mean_token_accuracy": 0.8753345211346945,
582
+ "num_tokens": 51084876.0,
583
+ "step": 318,
584
+ "total_flos": 60200843018240.0,
585
+ "train_loss": 0.6736609485914122,
586
+ "train_runtime": 1231.7269,
587
+ "train_samples_per_second": 2.062,
588
+ "train_steps_per_second": 0.258
589
+ }
590
+ ],
591
+ "logging_steps": 5,
592
+ "max_steps": 318,
593
+ "num_input_tokens_seen": 0,
594
+ "num_train_epochs": 2,
595
+ "save_steps": 500,
596
+ "stateful_callbacks": {
597
+ "TrainerControl": {
598
+ "args": {
599
+ "should_epoch_stop": false,
600
+ "should_evaluate": false,
601
+ "should_log": false,
602
+ "should_save": true,
603
+ "should_training_stop": true
604
+ },
605
+ "attributes": {}
606
+ }
607
+ },
608
+ "total_flos": 60200843018240.0,
609
+ "train_batch_size": 1,
610
+ "trial_name": null,
611
+ "trial_params": null
612
+ }