PrachiPatel commited on
Commit
9515192
·
1 Parent(s): aed4ea6

Delete trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +0 -508
trainer_state.json DELETED
@@ -1,508 +0,0 @@
1
- {
2
- "best_metric": 0.7987080812454224,
3
- "best_model_checkpoint": "text3\\checkpoint-256",
4
- "epoch": 2.9970731707317073,
5
- "global_step": 768,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.04,
12
- "learning_rate": 2e-05,
13
- "loss": 1.948,
14
- "step": 10
15
- },
16
- {
17
- "epoch": 0.08,
18
- "learning_rate": 4e-05,
19
- "loss": 1.932,
20
- "step": 20
21
- },
22
- {
23
- "epoch": 0.12,
24
- "learning_rate": 6e-05,
25
- "loss": 1.8378,
26
- "step": 30
27
- },
28
- {
29
- "epoch": 0.16,
30
- "learning_rate": 8e-05,
31
- "loss": 1.6154,
32
- "step": 40
33
- },
34
- {
35
- "epoch": 0.2,
36
- "learning_rate": 0.0001,
37
- "loss": 1.387,
38
- "step": 50
39
- },
40
- {
41
- "epoch": 0.23,
42
- "learning_rate": 0.00012,
43
- "loss": 1.2299,
44
- "step": 60
45
- },
46
- {
47
- "epoch": 0.27,
48
- "learning_rate": 0.00014000000000000001,
49
- "loss": 1.1199,
50
- "step": 70
51
- },
52
- {
53
- "epoch": 0.31,
54
- "learning_rate": 0.00016,
55
- "loss": 1.0253,
56
- "step": 80
57
- },
58
- {
59
- "epoch": 0.35,
60
- "learning_rate": 0.00017999999999999998,
61
- "loss": 1.062,
62
- "step": 90
63
- },
64
- {
65
- "epoch": 0.39,
66
- "learning_rate": 0.0002,
67
- "loss": 0.9251,
68
- "step": 100
69
- },
70
- {
71
- "epoch": 0.43,
72
- "learning_rate": 0.00022,
73
- "loss": 0.9046,
74
- "step": 110
75
- },
76
- {
77
- "epoch": 0.47,
78
- "learning_rate": 0.00024,
79
- "loss": 0.8802,
80
- "step": 120
81
- },
82
- {
83
- "epoch": 0.51,
84
- "learning_rate": 0.00026000000000000003,
85
- "loss": 0.9967,
86
- "step": 130
87
- },
88
- {
89
- "epoch": 0.55,
90
- "learning_rate": 0.00028000000000000003,
91
- "loss": 0.8137,
92
- "step": 140
93
- },
94
- {
95
- "epoch": 0.59,
96
- "learning_rate": 0.0003,
97
- "loss": 0.91,
98
- "step": 150
99
- },
100
- {
101
- "epoch": 0.62,
102
- "learning_rate": 0.00032,
103
- "loss": 0.8306,
104
- "step": 160
105
- },
106
- {
107
- "epoch": 0.66,
108
- "learning_rate": 0.00034,
109
- "loss": 0.8594,
110
- "step": 170
111
- },
112
- {
113
- "epoch": 0.7,
114
- "learning_rate": 0.00035999999999999997,
115
- "loss": 0.928,
116
- "step": 180
117
- },
118
- {
119
- "epoch": 0.74,
120
- "learning_rate": 0.00038,
121
- "loss": 0.8102,
122
- "step": 190
123
- },
124
- {
125
- "epoch": 0.78,
126
- "learning_rate": 0.0004,
127
- "loss": 0.7926,
128
- "step": 200
129
- },
130
- {
131
- "epoch": 0.82,
132
- "learning_rate": 0.00042,
133
- "loss": 0.7343,
134
- "step": 210
135
- },
136
- {
137
- "epoch": 0.86,
138
- "learning_rate": 0.00044,
139
- "loss": 0.8367,
140
- "step": 220
141
- },
142
- {
143
- "epoch": 0.9,
144
- "learning_rate": 0.00046,
145
- "loss": 0.8216,
146
- "step": 230
147
- },
148
- {
149
- "epoch": 0.94,
150
- "learning_rate": 0.00048,
151
- "loss": 0.7632,
152
- "step": 240
153
- },
154
- {
155
- "epoch": 0.98,
156
- "learning_rate": 0.0005,
157
- "loss": 0.818,
158
- "step": 250
159
- },
160
- {
161
- "epoch": 1.0,
162
- "eval_accuracy": 0.7320644216691069,
163
- "eval_f1": 0.7317069643985404,
164
- "eval_loss": 0.7987080812454224,
165
- "eval_precision": 0.7374126435251394,
166
- "eval_recall": 0.7320644216691069,
167
- "eval_runtime": 28.883,
168
- "eval_samples_per_second": 70.941,
169
- "eval_steps_per_second": 4.466,
170
- "step": 256
171
- },
172
- {
173
- "epoch": 1.01,
174
- "learning_rate": 0.0005200000000000001,
175
- "loss": 0.7729,
176
- "step": 260
177
- },
178
- {
179
- "epoch": 1.05,
180
- "learning_rate": 0.00054,
181
- "loss": 0.8005,
182
- "step": 270
183
- },
184
- {
185
- "epoch": 1.09,
186
- "learning_rate": 0.0005600000000000001,
187
- "loss": 0.7229,
188
- "step": 280
189
- },
190
- {
191
- "epoch": 1.13,
192
- "learning_rate": 0.00058,
193
- "loss": 0.7439,
194
- "step": 290
195
- },
196
- {
197
- "epoch": 1.17,
198
- "learning_rate": 0.0006,
199
- "loss": 0.8348,
200
- "step": 300
201
- },
202
- {
203
- "epoch": 1.21,
204
- "learning_rate": 0.00062,
205
- "loss": 0.7075,
206
- "step": 310
207
- },
208
- {
209
- "epoch": 1.25,
210
- "learning_rate": 0.00064,
211
- "loss": 0.7768,
212
- "step": 320
213
- },
214
- {
215
- "epoch": 1.29,
216
- "learning_rate": 0.00066,
217
- "loss": 0.7575,
218
- "step": 330
219
- },
220
- {
221
- "epoch": 1.33,
222
- "learning_rate": 0.00068,
223
- "loss": 0.7574,
224
- "step": 340
225
- },
226
- {
227
- "epoch": 1.37,
228
- "learning_rate": 0.0007,
229
- "loss": 0.8399,
230
- "step": 350
231
- },
232
- {
233
- "epoch": 1.4,
234
- "learning_rate": 0.0007199999999999999,
235
- "loss": 0.8992,
236
- "step": 360
237
- },
238
- {
239
- "epoch": 1.44,
240
- "learning_rate": 0.00074,
241
- "loss": 0.8134,
242
- "step": 370
243
- },
244
- {
245
- "epoch": 1.48,
246
- "learning_rate": 0.00076,
247
- "loss": 0.8662,
248
- "step": 380
249
- },
250
- {
251
- "epoch": 1.52,
252
- "learning_rate": 0.0007800000000000001,
253
- "loss": 0.728,
254
- "step": 390
255
- },
256
- {
257
- "epoch": 1.56,
258
- "learning_rate": 0.0008,
259
- "loss": 0.8599,
260
- "step": 400
261
- },
262
- {
263
- "epoch": 1.6,
264
- "learning_rate": 0.00082,
265
- "loss": 0.8931,
266
- "step": 410
267
- },
268
- {
269
- "epoch": 1.64,
270
- "learning_rate": 0.00084,
271
- "loss": 0.8559,
272
- "step": 420
273
- },
274
- {
275
- "epoch": 1.68,
276
- "learning_rate": 0.00086,
277
- "loss": 0.918,
278
- "step": 430
279
- },
280
- {
281
- "epoch": 1.72,
282
- "learning_rate": 0.00088,
283
- "loss": 0.8785,
284
- "step": 440
285
- },
286
- {
287
- "epoch": 1.76,
288
- "learning_rate": 0.0009000000000000001,
289
- "loss": 0.7987,
290
- "step": 450
291
- },
292
- {
293
- "epoch": 1.8,
294
- "learning_rate": 0.00092,
295
- "loss": 0.7985,
296
- "step": 460
297
- },
298
- {
299
- "epoch": 1.83,
300
- "learning_rate": 0.00094,
301
- "loss": 0.82,
302
- "step": 470
303
- },
304
- {
305
- "epoch": 1.87,
306
- "learning_rate": 0.00096,
307
- "loss": 0.895,
308
- "step": 480
309
- },
310
- {
311
- "epoch": 1.91,
312
- "learning_rate": 0.00098,
313
- "loss": 0.9,
314
- "step": 490
315
- },
316
- {
317
- "epoch": 1.95,
318
- "learning_rate": 0.001,
319
- "loss": 0.7661,
320
- "step": 500
321
- },
322
- {
323
- "epoch": 1.99,
324
- "learning_rate": 0.0009991869918699187,
325
- "loss": 0.8573,
326
- "step": 510
327
- },
328
- {
329
- "epoch": 2.0,
330
- "eval_accuracy": 0.7354807223035628,
331
- "eval_f1": 0.7316570448238782,
332
- "eval_loss": 0.8289155960083008,
333
- "eval_precision": 0.7364917787903197,
334
- "eval_recall": 0.7354807223035628,
335
- "eval_runtime": 28.6407,
336
- "eval_samples_per_second": 71.542,
337
- "eval_steps_per_second": 4.504,
338
- "step": 512
339
- },
340
- {
341
- "epoch": 2.03,
342
- "learning_rate": 0.0009983739837398375,
343
- "loss": 0.7547,
344
- "step": 520
345
- },
346
- {
347
- "epoch": 2.07,
348
- "learning_rate": 0.0009975609756097562,
349
- "loss": 0.7428,
350
- "step": 530
351
- },
352
- {
353
- "epoch": 2.11,
354
- "learning_rate": 0.0009967479674796748,
355
- "loss": 0.7495,
356
- "step": 540
357
- },
358
- {
359
- "epoch": 2.15,
360
- "learning_rate": 0.0009959349593495937,
361
- "loss": 0.8956,
362
- "step": 550
363
- },
364
- {
365
- "epoch": 2.19,
366
- "learning_rate": 0.0009951219512195123,
367
- "loss": 0.7692,
368
- "step": 560
369
- },
370
- {
371
- "epoch": 2.22,
372
- "learning_rate": 0.000994308943089431,
373
- "loss": 0.802,
374
- "step": 570
375
- },
376
- {
377
- "epoch": 2.26,
378
- "learning_rate": 0.0009934959349593496,
379
- "loss": 0.8457,
380
- "step": 580
381
- },
382
- {
383
- "epoch": 2.3,
384
- "learning_rate": 0.0009926829268292682,
385
- "loss": 0.8257,
386
- "step": 590
387
- },
388
- {
389
- "epoch": 2.34,
390
- "learning_rate": 0.0009918699186991869,
391
- "loss": 0.8721,
392
- "step": 600
393
- },
394
- {
395
- "epoch": 2.38,
396
- "learning_rate": 0.0009910569105691057,
397
- "loss": 0.884,
398
- "step": 610
399
- },
400
- {
401
- "epoch": 2.42,
402
- "learning_rate": 0.0009902439024390244,
403
- "loss": 0.859,
404
- "step": 620
405
- },
406
- {
407
- "epoch": 2.46,
408
- "learning_rate": 0.000989430894308943,
409
- "loss": 0.7961,
410
- "step": 630
411
- },
412
- {
413
- "epoch": 2.5,
414
- "learning_rate": 0.0009886178861788619,
415
- "loss": 0.7881,
416
- "step": 640
417
- },
418
- {
419
- "epoch": 2.54,
420
- "learning_rate": 0.0009878048780487805,
421
- "loss": 0.7552,
422
- "step": 650
423
- },
424
- {
425
- "epoch": 2.58,
426
- "learning_rate": 0.0009869918699186991,
427
- "loss": 0.8331,
428
- "step": 660
429
- },
430
- {
431
- "epoch": 2.61,
432
- "learning_rate": 0.000986178861788618,
433
- "loss": 0.8285,
434
- "step": 670
435
- },
436
- {
437
- "epoch": 2.65,
438
- "learning_rate": 0.0009853658536585366,
439
- "loss": 0.8212,
440
- "step": 680
441
- },
442
- {
443
- "epoch": 2.69,
444
- "learning_rate": 0.0009845528455284553,
445
- "loss": 0.8296,
446
- "step": 690
447
- },
448
- {
449
- "epoch": 2.73,
450
- "learning_rate": 0.0009837398373983741,
451
- "loss": 0.8279,
452
- "step": 700
453
- },
454
- {
455
- "epoch": 2.77,
456
- "learning_rate": 0.0009829268292682928,
457
- "loss": 0.7799,
458
- "step": 710
459
- },
460
- {
461
- "epoch": 2.81,
462
- "learning_rate": 0.0009821138211382114,
463
- "loss": 0.7338,
464
- "step": 720
465
- },
466
- {
467
- "epoch": 2.85,
468
- "learning_rate": 0.00098130081300813,
469
- "loss": 0.691,
470
- "step": 730
471
- },
472
- {
473
- "epoch": 2.89,
474
- "learning_rate": 0.0009804878048780487,
475
- "loss": 0.84,
476
- "step": 740
477
- },
478
- {
479
- "epoch": 2.93,
480
- "learning_rate": 0.0009796747967479673,
481
- "loss": 0.7672,
482
- "step": 750
483
- },
484
- {
485
- "epoch": 2.97,
486
- "learning_rate": 0.0009788617886178862,
487
- "loss": 0.7583,
488
- "step": 760
489
- },
490
- {
491
- "epoch": 3.0,
492
- "eval_accuracy": 0.7476817959980478,
493
- "eval_f1": 0.7476581545903426,
494
- "eval_loss": 0.8017462491989136,
495
- "eval_precision": 0.7558306284443703,
496
- "eval_recall": 0.7476817959980478,
497
- "eval_runtime": 28.7319,
498
- "eval_samples_per_second": 71.314,
499
- "eval_steps_per_second": 4.49,
500
- "step": 768
501
- }
502
- ],
503
- "max_steps": 12800,
504
- "num_train_epochs": 50,
505
- "total_flos": 1046595336611346.0,
506
- "trial_name": null,
507
- "trial_params": null
508
- }