bhargav-07-bidkar commited on
Commit
33a06b9
·
verified ·
1 Parent(s): 0d798a1

Upload checkpoint-2913\trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. checkpoint-2913//trainer_state.json +469 -0
checkpoint-2913//trainer_state.json ADDED
@@ -0,0 +1,469 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7821552723059096,
3
+ "best_model_checkpoint": "./legalbert_finetuned\\checkpoint-2913",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2913,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05149330587023687,
13
+ "grad_norm": 15.42212963104248,
14
+ "learning_rate": 1.9656711294198423e-05,
15
+ "loss": 3.664,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.10298661174047374,
20
+ "grad_norm": 15.640098571777344,
21
+ "learning_rate": 1.9313422588396845e-05,
22
+ "loss": 3.2168,
23
+ "step": 100
24
+ },
25
+ {
26
+ "epoch": 0.15447991761071062,
27
+ "grad_norm": 10.927306175231934,
28
+ "learning_rate": 1.8970133882595267e-05,
29
+ "loss": 2.7426,
30
+ "step": 150
31
+ },
32
+ {
33
+ "epoch": 0.2059732234809475,
34
+ "grad_norm": 11.16470718383789,
35
+ "learning_rate": 1.8626845176793685e-05,
36
+ "loss": 2.2935,
37
+ "step": 200
38
+ },
39
+ {
40
+ "epoch": 0.25746652935118436,
41
+ "grad_norm": 8.693652153015137,
42
+ "learning_rate": 1.8283556470992106e-05,
43
+ "loss": 1.9415,
44
+ "step": 250
45
+ },
46
+ {
47
+ "epoch": 0.30895983522142123,
48
+ "grad_norm": 6.318974494934082,
49
+ "learning_rate": 1.7940267765190528e-05,
50
+ "loss": 1.7223,
51
+ "step": 300
52
+ },
53
+ {
54
+ "epoch": 0.3604531410916581,
55
+ "grad_norm": 9.084778785705566,
56
+ "learning_rate": 1.7596979059388946e-05,
57
+ "loss": 1.5035,
58
+ "step": 350
59
+ },
60
+ {
61
+ "epoch": 0.411946446961895,
62
+ "grad_norm": 7.663175582885742,
63
+ "learning_rate": 1.7253690353587368e-05,
64
+ "loss": 1.4208,
65
+ "step": 400
66
+ },
67
+ {
68
+ "epoch": 0.46343975283213185,
69
+ "grad_norm": 6.044983863830566,
70
+ "learning_rate": 1.691040164778579e-05,
71
+ "loss": 1.2055,
72
+ "step": 450
73
+ },
74
+ {
75
+ "epoch": 0.5149330587023687,
76
+ "grad_norm": 7.2318315505981445,
77
+ "learning_rate": 1.6567112941984208e-05,
78
+ "loss": 1.3188,
79
+ "step": 500
80
+ },
81
+ {
82
+ "epoch": 0.5664263645726055,
83
+ "grad_norm": 8.419646263122559,
84
+ "learning_rate": 1.622382423618263e-05,
85
+ "loss": 1.0725,
86
+ "step": 550
87
+ },
88
+ {
89
+ "epoch": 0.6179196704428425,
90
+ "grad_norm": 5.270600318908691,
91
+ "learning_rate": 1.588053553038105e-05,
92
+ "loss": 1.1664,
93
+ "step": 600
94
+ },
95
+ {
96
+ "epoch": 0.6694129763130793,
97
+ "grad_norm": 5.677024841308594,
98
+ "learning_rate": 1.5537246824579473e-05,
99
+ "loss": 1.0537,
100
+ "step": 650
101
+ },
102
+ {
103
+ "epoch": 0.7209062821833162,
104
+ "grad_norm": 7.414009094238281,
105
+ "learning_rate": 1.5193958118777894e-05,
106
+ "loss": 1.0609,
107
+ "step": 700
108
+ },
109
+ {
110
+ "epoch": 0.772399588053553,
111
+ "grad_norm": 9.209087371826172,
112
+ "learning_rate": 1.4850669412976316e-05,
113
+ "loss": 0.9383,
114
+ "step": 750
115
+ },
116
+ {
117
+ "epoch": 0.82389289392379,
118
+ "grad_norm": 7.801745891571045,
119
+ "learning_rate": 1.4507380707174734e-05,
120
+ "loss": 0.9023,
121
+ "step": 800
122
+ },
123
+ {
124
+ "epoch": 0.8753861997940268,
125
+ "grad_norm": 5.991678714752197,
126
+ "learning_rate": 1.4164092001373156e-05,
127
+ "loss": 0.9126,
128
+ "step": 850
129
+ },
130
+ {
131
+ "epoch": 0.9268795056642637,
132
+ "grad_norm": 10.367544174194336,
133
+ "learning_rate": 1.3820803295571576e-05,
134
+ "loss": 0.9172,
135
+ "step": 900
136
+ },
137
+ {
138
+ "epoch": 0.9783728115345005,
139
+ "grad_norm": 8.197531700134277,
140
+ "learning_rate": 1.3477514589769997e-05,
141
+ "loss": 0.9509,
142
+ "step": 950
143
+ },
144
+ {
145
+ "epoch": 1.0,
146
+ "eval_accuracy": 0.7531865585168018,
147
+ "eval_f1": 0.7043263913078888,
148
+ "eval_loss": 0.8081243634223938,
149
+ "eval_runtime": 234.8912,
150
+ "eval_samples_per_second": 3.674,
151
+ "eval_steps_per_second": 0.46,
152
+ "step": 971
153
+ },
154
+ {
155
+ "epoch": 1.0298661174047374,
156
+ "grad_norm": 13.948481559753418,
157
+ "learning_rate": 1.3134225883968419e-05,
158
+ "loss": 0.8781,
159
+ "step": 1000
160
+ },
161
+ {
162
+ "epoch": 1.0813594232749741,
163
+ "grad_norm": 9.951421737670898,
164
+ "learning_rate": 1.279093717816684e-05,
165
+ "loss": 0.745,
166
+ "step": 1050
167
+ },
168
+ {
169
+ "epoch": 1.132852729145211,
170
+ "grad_norm": 3.6247968673706055,
171
+ "learning_rate": 1.244764847236526e-05,
172
+ "loss": 0.7621,
173
+ "step": 1100
174
+ },
175
+ {
176
+ "epoch": 1.184346035015448,
177
+ "grad_norm": 11.013542175292969,
178
+ "learning_rate": 1.210435976656368e-05,
179
+ "loss": 0.6503,
180
+ "step": 1150
181
+ },
182
+ {
183
+ "epoch": 1.235839340885685,
184
+ "grad_norm": 6.411041259765625,
185
+ "learning_rate": 1.1761071060762102e-05,
186
+ "loss": 0.6835,
187
+ "step": 1200
188
+ },
189
+ {
190
+ "epoch": 1.2873326467559219,
191
+ "grad_norm": 4.373047351837158,
192
+ "learning_rate": 1.1417782354960522e-05,
193
+ "loss": 0.764,
194
+ "step": 1250
195
+ },
196
+ {
197
+ "epoch": 1.3388259526261586,
198
+ "grad_norm": 5.105803966522217,
199
+ "learning_rate": 1.1074493649158944e-05,
200
+ "loss": 0.7505,
201
+ "step": 1300
202
+ },
203
+ {
204
+ "epoch": 1.3903192584963955,
205
+ "grad_norm": 9.5465726852417,
206
+ "learning_rate": 1.0731204943357365e-05,
207
+ "loss": 0.7634,
208
+ "step": 1350
209
+ },
210
+ {
211
+ "epoch": 1.4418125643666324,
212
+ "grad_norm": 6.116573810577393,
213
+ "learning_rate": 1.0387916237555787e-05,
214
+ "loss": 0.6366,
215
+ "step": 1400
216
+ },
217
+ {
218
+ "epoch": 1.4933058702368691,
219
+ "grad_norm": 3.2460689544677734,
220
+ "learning_rate": 1.0044627531754205e-05,
221
+ "loss": 0.5989,
222
+ "step": 1450
223
+ },
224
+ {
225
+ "epoch": 1.544799176107106,
226
+ "grad_norm": 6.590632915496826,
227
+ "learning_rate": 9.701338825952627e-06,
228
+ "loss": 0.6879,
229
+ "step": 1500
230
+ },
231
+ {
232
+ "epoch": 1.596292481977343,
233
+ "grad_norm": 10.803317070007324,
234
+ "learning_rate": 9.358050120151047e-06,
235
+ "loss": 0.6825,
236
+ "step": 1550
237
+ },
238
+ {
239
+ "epoch": 1.64778578784758,
240
+ "grad_norm": 4.515988349914551,
241
+ "learning_rate": 9.014761414349469e-06,
242
+ "loss": 0.669,
243
+ "step": 1600
244
+ },
245
+ {
246
+ "epoch": 1.6992790937178168,
247
+ "grad_norm": 10.735493659973145,
248
+ "learning_rate": 8.67147270854789e-06,
249
+ "loss": 0.7096,
250
+ "step": 1650
251
+ },
252
+ {
253
+ "epoch": 1.7507723995880535,
254
+ "grad_norm": 7.9211039543151855,
255
+ "learning_rate": 8.32818400274631e-06,
256
+ "loss": 0.6438,
257
+ "step": 1700
258
+ },
259
+ {
260
+ "epoch": 1.8022657054582905,
261
+ "grad_norm": 4.6672282218933105,
262
+ "learning_rate": 7.984895296944732e-06,
263
+ "loss": 0.577,
264
+ "step": 1750
265
+ },
266
+ {
267
+ "epoch": 1.8537590113285272,
268
+ "grad_norm": 8.310284614562988,
269
+ "learning_rate": 7.641606591143153e-06,
270
+ "loss": 0.6346,
271
+ "step": 1800
272
+ },
273
+ {
274
+ "epoch": 1.905252317198764,
275
+ "grad_norm": 3.1157386302948,
276
+ "learning_rate": 7.2983178853415724e-06,
277
+ "loss": 0.6966,
278
+ "step": 1850
279
+ },
280
+ {
281
+ "epoch": 1.956745623069001,
282
+ "grad_norm": 6.533708572387695,
283
+ "learning_rate": 6.955029179539994e-06,
284
+ "loss": 0.6416,
285
+ "step": 1900
286
+ },
287
+ {
288
+ "epoch": 2.0,
289
+ "eval_accuracy": 0.779837775202781,
290
+ "eval_f1": 0.7431628971507812,
291
+ "eval_loss": 0.6662706732749939,
292
+ "eval_runtime": 233.4018,
293
+ "eval_samples_per_second": 3.697,
294
+ "eval_steps_per_second": 0.463,
295
+ "step": 1942
296
+ },
297
+ {
298
+ "epoch": 2.008238928939238,
299
+ "grad_norm": 4.492364406585693,
300
+ "learning_rate": 6.611740473738415e-06,
301
+ "loss": 0.5988,
302
+ "step": 1950
303
+ },
304
+ {
305
+ "epoch": 2.059732234809475,
306
+ "grad_norm": 2.9183290004730225,
307
+ "learning_rate": 6.268451767936835e-06,
308
+ "loss": 0.5622,
309
+ "step": 2000
310
+ },
311
+ {
312
+ "epoch": 2.111225540679712,
313
+ "grad_norm": 2.6700491905212402,
314
+ "learning_rate": 5.9251630621352564e-06,
315
+ "loss": 0.568,
316
+ "step": 2050
317
+ },
318
+ {
319
+ "epoch": 2.1627188465499483,
320
+ "grad_norm": 8.388718605041504,
321
+ "learning_rate": 5.581874356333677e-06,
322
+ "loss": 0.5647,
323
+ "step": 2100
324
+ },
325
+ {
326
+ "epoch": 2.214212152420185,
327
+ "grad_norm": 5.001288414001465,
328
+ "learning_rate": 5.238585650532097e-06,
329
+ "loss": 0.5651,
330
+ "step": 2150
331
+ },
332
+ {
333
+ "epoch": 2.265705458290422,
334
+ "grad_norm": 4.762598037719727,
335
+ "learning_rate": 4.895296944730519e-06,
336
+ "loss": 0.5591,
337
+ "step": 2200
338
+ },
339
+ {
340
+ "epoch": 2.317198764160659,
341
+ "grad_norm": 3.356541872024536,
342
+ "learning_rate": 4.5520082389289396e-06,
343
+ "loss": 0.5505,
344
+ "step": 2250
345
+ },
346
+ {
347
+ "epoch": 2.368692070030896,
348
+ "grad_norm": 5.389300346374512,
349
+ "learning_rate": 4.20871953312736e-06,
350
+ "loss": 0.5705,
351
+ "step": 2300
352
+ },
353
+ {
354
+ "epoch": 2.420185375901133,
355
+ "grad_norm": 6.133719444274902,
356
+ "learning_rate": 3.865430827325781e-06,
357
+ "loss": 0.583,
358
+ "step": 2350
359
+ },
360
+ {
361
+ "epoch": 2.47167868177137,
362
+ "grad_norm": 20.499935150146484,
363
+ "learning_rate": 3.522142121524202e-06,
364
+ "loss": 0.5125,
365
+ "step": 2400
366
+ },
367
+ {
368
+ "epoch": 2.5231719876416063,
369
+ "grad_norm": 6.460402011871338,
370
+ "learning_rate": 3.178853415722623e-06,
371
+ "loss": 0.529,
372
+ "step": 2450
373
+ },
374
+ {
375
+ "epoch": 2.5746652935118437,
376
+ "grad_norm": 6.227006912231445,
377
+ "learning_rate": 2.8355647099210435e-06,
378
+ "loss": 0.5187,
379
+ "step": 2500
380
+ },
381
+ {
382
+ "epoch": 2.62615859938208,
383
+ "grad_norm": 11.52637004852295,
384
+ "learning_rate": 2.4922760041194647e-06,
385
+ "loss": 0.4892,
386
+ "step": 2550
387
+ },
388
+ {
389
+ "epoch": 2.677651905252317,
390
+ "grad_norm": 10.004145622253418,
391
+ "learning_rate": 2.1489872983178855e-06,
392
+ "loss": 0.5233,
393
+ "step": 2600
394
+ },
395
+ {
396
+ "epoch": 2.729145211122554,
397
+ "grad_norm": 5.854581832885742,
398
+ "learning_rate": 1.8056985925163065e-06,
399
+ "loss": 0.5266,
400
+ "step": 2650
401
+ },
402
+ {
403
+ "epoch": 2.780638516992791,
404
+ "grad_norm": 5.217226982116699,
405
+ "learning_rate": 1.462409886714727e-06,
406
+ "loss": 0.457,
407
+ "step": 2700
408
+ },
409
+ {
410
+ "epoch": 2.832131822863028,
411
+ "grad_norm": 3.1806907653808594,
412
+ "learning_rate": 1.119121180913148e-06,
413
+ "loss": 0.5234,
414
+ "step": 2750
415
+ },
416
+ {
417
+ "epoch": 2.883625128733265,
418
+ "grad_norm": 3.520676374435425,
419
+ "learning_rate": 7.75832475111569e-07,
420
+ "loss": 0.4662,
421
+ "step": 2800
422
+ },
423
+ {
424
+ "epoch": 2.9351184346035017,
425
+ "grad_norm": 3.366509199142456,
426
+ "learning_rate": 4.3254376930998975e-07,
427
+ "loss": 0.4962,
428
+ "step": 2850
429
+ },
430
+ {
431
+ "epoch": 2.9866117404737382,
432
+ "grad_norm": 6.643517971038818,
433
+ "learning_rate": 8.925506350841058e-08,
434
+ "loss": 0.5214,
435
+ "step": 2900
436
+ },
437
+ {
438
+ "epoch": 3.0,
439
+ "eval_accuracy": 0.7821552723059096,
440
+ "eval_f1": 0.7549173833082047,
441
+ "eval_loss": 0.6564987301826477,
442
+ "eval_runtime": 219.0646,
443
+ "eval_samples_per_second": 3.939,
444
+ "eval_steps_per_second": 0.493,
445
+ "step": 2913
446
+ }
447
+ ],
448
+ "logging_steps": 50,
449
+ "max_steps": 2913,
450
+ "num_input_tokens_seen": 0,
451
+ "num_train_epochs": 3,
452
+ "save_steps": 500,
453
+ "stateful_callbacks": {
454
+ "TrainerControl": {
455
+ "args": {
456
+ "should_epoch_stop": false,
457
+ "should_evaluate": false,
458
+ "should_log": false,
459
+ "should_save": true,
460
+ "should_training_stop": true
461
+ },
462
+ "attributes": {}
463
+ }
464
+ },
465
+ "total_flos": 3065824222963200.0,
466
+ "train_batch_size": 8,
467
+ "trial_name": null,
468
+ "trial_params": null
469
+ }