Harley-ml commited on
Commit
02a1a28
·
verified ·
1 Parent(s): 099e596

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -439
trainer_state.json DELETED
@@ -1,439 +0,0 @@
1
- {
2
- "best_global_step": null,
3
- "best_metric": null,
4
- "best_model_checkpoint": null,
5
- "epoch": 2.0,
6
- "eval_steps": 1000,
7
- "global_step": 27070,
8
- "is_hyper_param_search": false,
9
- "is_local_process_zero": true,
10
- "is_world_process_zero": true,
11
- "log_history": [
12
- {
13
- "epoch": 0.07388434636981578,
14
- "grad_norm": 1.2263256311416626,
15
- "learning_rate": 0.000490609710199473,
16
- "loss": 4.0797,
17
- "step": 1000
18
- },
19
- {
20
- "epoch": 0.07388434636981578,
21
- "eval_loss": 3.001145124435425,
22
- "eval_runtime": 1.4766,
23
- "eval_samples_per_second": 662.32,
24
- "eval_steps_per_second": 83.298,
25
- "step": 1000
26
- },
27
- {
28
- "epoch": 0.14776869273963156,
29
- "grad_norm": 1.2999979257583618,
30
- "learning_rate": 0.000471791494166353,
31
- "loss": 2.823,
32
- "step": 2000
33
- },
34
- {
35
- "epoch": 0.14776869273963156,
36
- "eval_loss": 2.6864545345306396,
37
- "eval_runtime": 1.5608,
38
- "eval_samples_per_second": 626.616,
39
- "eval_steps_per_second": 78.808,
40
- "step": 2000
41
- },
42
- {
43
- "epoch": 0.22165303910944734,
44
- "grad_norm": 1.4543421268463135,
45
- "learning_rate": 0.000452973278133233,
46
- "loss": 2.6331,
47
- "step": 3000
48
- },
49
- {
50
- "epoch": 0.22165303910944734,
51
- "eval_loss": 2.5702903270721436,
52
- "eval_runtime": 1.4462,
53
- "eval_samples_per_second": 676.239,
54
- "eval_steps_per_second": 85.048,
55
- "step": 3000
56
- },
57
- {
58
- "epoch": 0.2955373854792631,
59
- "grad_norm": 1.4996088743209839,
60
- "learning_rate": 0.0004341550621001129,
61
- "loss": 2.5457,
62
- "step": 4000
63
- },
64
- {
65
- "epoch": 0.2955373854792631,
66
- "eval_loss": 2.5038747787475586,
67
- "eval_runtime": 1.4384,
68
- "eval_samples_per_second": 679.922,
69
- "eval_steps_per_second": 85.512,
70
- "step": 4000
71
- },
72
- {
73
- "epoch": 0.3694217318490789,
74
- "grad_norm": 1.4575793743133545,
75
- "learning_rate": 0.00041533684606699287,
76
- "loss": 2.4906,
77
- "step": 5000
78
- },
79
- {
80
- "epoch": 0.3694217318490789,
81
- "eval_loss": 2.458561658859253,
82
- "eval_runtime": 1.4575,
83
- "eval_samples_per_second": 671.005,
84
- "eval_steps_per_second": 84.39,
85
- "step": 5000
86
- },
87
- {
88
- "epoch": 0.4433060782188947,
89
- "grad_norm": 1.3582031726837158,
90
- "learning_rate": 0.0003965186300338728,
91
- "loss": 2.4523,
92
- "step": 6000
93
- },
94
- {
95
- "epoch": 0.4433060782188947,
96
- "eval_loss": 2.431638240814209,
97
- "eval_runtime": 1.4677,
98
- "eval_samples_per_second": 666.353,
99
- "eval_steps_per_second": 83.805,
100
- "step": 6000
101
- },
102
- {
103
- "epoch": 0.5171904245887105,
104
- "grad_norm": 1.3987085819244385,
105
- "learning_rate": 0.0003777004140007527,
106
- "loss": 2.4213,
107
- "step": 7000
108
- },
109
- {
110
- "epoch": 0.5171904245887105,
111
- "eval_loss": 2.3989198207855225,
112
- "eval_runtime": 1.4685,
113
- "eval_samples_per_second": 666.003,
114
- "eval_steps_per_second": 83.761,
115
- "step": 7000
116
- },
117
- {
118
- "epoch": 0.5910747709585262,
119
- "grad_norm": 1.4713547229766846,
120
- "learning_rate": 0.00035888219796763267,
121
- "loss": 2.3977,
122
- "step": 8000
123
- },
124
- {
125
- "epoch": 0.5910747709585262,
126
- "eval_loss": 2.374978542327881,
127
- "eval_runtime": 1.4628,
128
- "eval_samples_per_second": 668.602,
129
- "eval_steps_per_second": 84.088,
130
- "step": 8000
131
- },
132
- {
133
- "epoch": 0.664959117328342,
134
- "grad_norm": 1.4406859874725342,
135
- "learning_rate": 0.0003400639819345126,
136
- "loss": 2.37,
137
- "step": 9000
138
- },
139
- {
140
- "epoch": 0.664959117328342,
141
- "eval_loss": 2.3552350997924805,
142
- "eval_runtime": 1.5716,
143
- "eval_samples_per_second": 622.315,
144
- "eval_steps_per_second": 78.267,
145
- "step": 9000
146
- },
147
- {
148
- "epoch": 0.7388434636981578,
149
- "grad_norm": 1.477502703666687,
150
- "learning_rate": 0.00032124576590139255,
151
- "loss": 2.3558,
152
- "step": 10000
153
- },
154
- {
155
- "epoch": 0.7388434636981578,
156
- "eval_loss": 2.3359665870666504,
157
- "eval_runtime": 1.4772,
158
- "eval_samples_per_second": 662.073,
159
- "eval_steps_per_second": 83.267,
160
- "step": 10000
161
- },
162
- {
163
- "epoch": 0.8127278100679736,
164
- "grad_norm": 1.5436781644821167,
165
- "learning_rate": 0.00030242754986827247,
166
- "loss": 2.3375,
167
- "step": 11000
168
- },
169
- {
170
- "epoch": 0.8127278100679736,
171
- "eval_loss": 2.324208974838257,
172
- "eval_runtime": 1.5023,
173
- "eval_samples_per_second": 651.019,
174
- "eval_steps_per_second": 81.877,
175
- "step": 11000
176
- },
177
- {
178
- "epoch": 0.8866121564377893,
179
- "grad_norm": 1.408799171447754,
180
- "learning_rate": 0.00028360933383515243,
181
- "loss": 2.3225,
182
- "step": 12000
183
- },
184
- {
185
- "epoch": 0.8866121564377893,
186
- "eval_loss": 2.310602903366089,
187
- "eval_runtime": 1.4754,
188
- "eval_samples_per_second": 662.856,
189
- "eval_steps_per_second": 83.365,
190
- "step": 12000
191
- },
192
- {
193
- "epoch": 0.9604965028076051,
194
- "grad_norm": 1.4997986555099487,
195
- "learning_rate": 0.00026479111780203235,
196
- "loss": 2.3094,
197
- "step": 13000
198
- },
199
- {
200
- "epoch": 0.9604965028076051,
201
- "eval_loss": 2.2948832511901855,
202
- "eval_runtime": 1.4678,
203
- "eval_samples_per_second": 666.289,
204
- "eval_steps_per_second": 83.797,
205
- "step": 13000
206
- },
207
- {
208
- "epoch": 1.0343562210619643,
209
- "grad_norm": 1.5238574743270874,
210
- "learning_rate": 0.0002459729017689123,
211
- "loss": 2.2898,
212
- "step": 14000
213
- },
214
- {
215
- "epoch": 1.0343562210619643,
216
- "eval_loss": 2.2858312129974365,
217
- "eval_runtime": 1.5046,
218
- "eval_samples_per_second": 650.004,
219
- "eval_steps_per_second": 81.749,
220
- "step": 14000
221
- },
222
- {
223
- "epoch": 1.1082405674317801,
224
- "grad_norm": 1.5487300157546997,
225
- "learning_rate": 0.00022715468573579226,
226
- "loss": 2.272,
227
- "step": 15000
228
- },
229
- {
230
- "epoch": 1.1082405674317801,
231
- "eval_loss": 2.274585723876953,
232
- "eval_runtime": 1.4645,
233
- "eval_samples_per_second": 667.797,
234
- "eval_steps_per_second": 83.987,
235
- "step": 15000
236
- },
237
- {
238
- "epoch": 1.182124913801596,
239
- "grad_norm": 1.4958250522613525,
240
- "learning_rate": 0.0002083364697026722,
241
- "loss": 2.2673,
242
- "step": 16000
243
- },
244
- {
245
- "epoch": 1.182124913801596,
246
- "eval_loss": 2.261155605316162,
247
- "eval_runtime": 1.4807,
248
- "eval_samples_per_second": 660.514,
249
- "eval_steps_per_second": 83.071,
250
- "step": 16000
251
- },
252
- {
253
- "epoch": 1.2560092601714117,
254
- "grad_norm": 1.6432082653045654,
255
- "learning_rate": 0.00018951825366955214,
256
- "loss": 2.2527,
257
- "step": 17000
258
- },
259
- {
260
- "epoch": 1.2560092601714117,
261
- "eval_loss": 2.253293991088867,
262
- "eval_runtime": 1.4847,
263
- "eval_samples_per_second": 658.732,
264
- "eval_steps_per_second": 82.847,
265
- "step": 17000
266
- },
267
- {
268
- "epoch": 1.3298936065412275,
269
- "grad_norm": 1.5579006671905518,
270
- "learning_rate": 0.00017070003763643209,
271
- "loss": 2.2469,
272
- "step": 18000
273
- },
274
- {
275
- "epoch": 1.3298936065412275,
276
- "eval_loss": 2.2448325157165527,
277
- "eval_runtime": 1.4629,
278
- "eval_samples_per_second": 668.546,
279
- "eval_steps_per_second": 84.081,
280
- "step": 18000
281
- },
282
- {
283
- "epoch": 1.4037779529110432,
284
- "grad_norm": 1.6929436922073364,
285
- "learning_rate": 0.00015188182160331203,
286
- "loss": 2.2345,
287
- "step": 19000
288
- },
289
- {
290
- "epoch": 1.4037779529110432,
291
- "eval_loss": 2.236670732498169,
292
- "eval_runtime": 1.4306,
293
- "eval_samples_per_second": 683.609,
294
- "eval_steps_per_second": 85.975,
295
- "step": 19000
296
- },
297
- {
298
- "epoch": 1.477662299280859,
299
- "grad_norm": 1.661308765411377,
300
- "learning_rate": 0.00013306360557019194,
301
- "loss": 2.2312,
302
- "step": 20000
303
- },
304
- {
305
- "epoch": 1.477662299280859,
306
- "eval_loss": 2.2292463779449463,
307
- "eval_runtime": 1.4426,
308
- "eval_samples_per_second": 677.956,
309
- "eval_steps_per_second": 85.264,
310
- "step": 20000
311
- },
312
- {
313
- "epoch": 1.5515466456506748,
314
- "grad_norm": 1.5889065265655518,
315
- "learning_rate": 0.0001142453895370719,
316
- "loss": 2.2239,
317
- "step": 21000
318
- },
319
- {
320
- "epoch": 1.5515466456506748,
321
- "eval_loss": 2.221221923828125,
322
- "eval_runtime": 1.5775,
323
- "eval_samples_per_second": 619.97,
324
- "eval_steps_per_second": 77.972,
325
- "step": 21000
326
- },
327
- {
328
- "epoch": 1.6254309920204906,
329
- "grad_norm": 1.5558372735977173,
330
- "learning_rate": 9.542717350395183e-05,
331
- "loss": 2.2081,
332
- "step": 22000
333
- },
334
- {
335
- "epoch": 1.6254309920204906,
336
- "eval_loss": 2.2122271060943604,
337
- "eval_runtime": 1.5975,
338
- "eval_samples_per_second": 612.189,
339
- "eval_steps_per_second": 76.993,
340
- "step": 22000
341
- },
342
- {
343
- "epoch": 1.6993153383903064,
344
- "grad_norm": 1.7341097593307495,
345
- "learning_rate": 7.660895747083177e-05,
346
- "loss": 2.2043,
347
- "step": 23000
348
- },
349
- {
350
- "epoch": 1.6993153383903064,
351
- "eval_loss": 2.204393148422241,
352
- "eval_runtime": 1.4774,
353
- "eval_samples_per_second": 661.963,
354
- "eval_steps_per_second": 83.253,
355
- "step": 23000
356
- },
357
- {
358
- "epoch": 1.7731996847601221,
359
- "grad_norm": 1.6520919799804688,
360
- "learning_rate": 5.779074143771171e-05,
361
- "loss": 2.1988,
362
- "step": 24000
363
- },
364
- {
365
- "epoch": 1.7731996847601221,
366
- "eval_loss": 2.1986052989959717,
367
- "eval_runtime": 1.4794,
368
- "eval_samples_per_second": 661.067,
369
- "eval_steps_per_second": 83.14,
370
- "step": 24000
371
- },
372
- {
373
- "epoch": 1.847084031129938,
374
- "grad_norm": 1.6864484548568726,
375
- "learning_rate": 3.8972525404591645e-05,
376
- "loss": 2.1885,
377
- "step": 25000
378
- },
379
- {
380
- "epoch": 1.847084031129938,
381
- "eval_loss": 2.1929802894592285,
382
- "eval_runtime": 1.4646,
383
- "eval_samples_per_second": 667.763,
384
- "eval_steps_per_second": 83.982,
385
- "step": 25000
386
- },
387
- {
388
- "epoch": 1.9209683774997537,
389
- "grad_norm": 1.74201500415802,
390
- "learning_rate": 2.0154309371471584e-05,
391
- "loss": 2.1897,
392
- "step": 26000
393
- },
394
- {
395
- "epoch": 1.9209683774997537,
396
- "eval_loss": 2.1882574558258057,
397
- "eval_runtime": 1.5005,
398
- "eval_samples_per_second": 651.777,
399
- "eval_steps_per_second": 81.972,
400
- "step": 26000
401
- },
402
- {
403
- "epoch": 1.9948527238695695,
404
- "grad_norm": 1.7203181982040405,
405
- "learning_rate": 1.3360933383515244e-06,
406
- "loss": 2.1843,
407
- "step": 27000
408
- },
409
- {
410
- "epoch": 1.9948527238695695,
411
- "eval_loss": 2.185438871383667,
412
- "eval_runtime": 1.4469,
413
- "eval_samples_per_second": 675.924,
414
- "eval_steps_per_second": 85.009,
415
- "step": 27000
416
- }
417
- ],
418
- "logging_steps": 1000,
419
- "max_steps": 27070,
420
- "num_input_tokens_seen": 0,
421
- "num_train_epochs": 2,
422
- "save_steps": 1000,
423
- "stateful_callbacks": {
424
- "TrainerControl": {
425
- "args": {
426
- "should_epoch_stop": false,
427
- "should_evaluate": false,
428
- "should_log": false,
429
- "should_save": true,
430
- "should_training_stop": true
431
- },
432
- "attributes": {}
433
- }
434
- },
435
- "total_flos": 147395435692032.0,
436
- "train_batch_size": 6,
437
- "trial_name": null,
438
- "trial_params": null
439
- }