PEFT
Safetensors
Generated from Trainer
paulrichmond commited on
Commit
81ffb5b
·
verified ·
1 Parent(s): 14bead7

Training in progress, step 41612, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ffe6d423820df80905bdc0d2e0141ea93fec4c98e1080dddba651c9575bab46
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5577fd1b503ec752baeafdf2de06e57a83deb0e1e57a41cda45bf9427c95e9
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34b46b4bc8f69e2993bad79cb5d4cb2101a8c7ba0f5b315a917b1803781f769e
3
  size 168150738
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e723c05be219624f96a2ddaa266b6270ac90d0bba2a1e5f85e21a833ea30c99b
3
  size 168150738
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331fcef64d0fd4d570473f6434565278f7bc1c317d278f1f3c839af5759fbcc3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45deb07b720f6c59d34e29c1d4b4f3b955196381388fc9d375a97ed58b1dc34e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33c07fee6b035b991fac72a5c5f8f0cff6e9a9643aafb454b87dbdd0b8b3f3a6
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:733fb082b772b237d84bcc94add0fd700f3286fcadb7278845beeba39bafa7fe
3
  size 1000
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.996058829183889,
5
  "eval_steps": 4619,
6
- "global_step": 41571,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -291184,6 +291184,293 @@
291184
  "eval_test_samples_per_second": 12.589,
291185
  "eval_test_steps_per_second": 0.787,
291186
  "step": 41571
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291187
  }
291188
  ],
291189
  "logging_steps": 1,
@@ -291198,12 +291485,12 @@
291198
  "should_evaluate": false,
291199
  "should_log": false,
291200
  "should_save": true,
291201
- "should_training_stop": false
291202
  },
291203
  "attributes": {}
291204
  }
291205
  },
291206
- "total_flos": 1.0527481641298231e+19,
291207
  "train_batch_size": 16,
291208
  "trial_name": null,
291209
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.0,
5
  "eval_steps": 4619,
6
+ "global_step": 41612,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
291184
  "eval_test_samples_per_second": 12.589,
291185
  "eval_test_steps_per_second": 0.787,
291186
  "step": 41571
291187
+ },
291188
+ {
291189
+ "epoch": 3.996154955301355,
291190
+ "grad_norm": 1.3619403839111328,
291191
+ "learning_rate": 3.00083601127463e-06,
291192
+ "loss": 1.9584,
291193
+ "step": 41572
291194
+ },
291195
+ {
291196
+ "epoch": 3.9962510814188215,
291197
+ "grad_norm": 1.1561639308929443,
291198
+ "learning_rate": 3.0007947332547655e-06,
291199
+ "loss": 1.902,
291200
+ "step": 41573
291201
+ },
291202
+ {
291203
+ "epoch": 3.996347207536288,
291204
+ "grad_norm": 1.2157913446426392,
291205
+ "learning_rate": 3.000754500244385e-06,
291206
+ "loss": 1.8448,
291207
+ "step": 41574
291208
+ },
291209
+ {
291210
+ "epoch": 3.9964433336537537,
291211
+ "grad_norm": 1.290919542312622,
291212
+ "learning_rate": 3.000715312243752e-06,
291213
+ "loss": 2.1262,
291214
+ "step": 41575
291215
+ },
291216
+ {
291217
+ "epoch": 3.9965394597712196,
291218
+ "grad_norm": 1.218934416770935,
291219
+ "learning_rate": 3.00067716925318e-06,
291220
+ "loss": 1.875,
291221
+ "step": 41576
291222
+ },
291223
+ {
291224
+ "epoch": 3.996635585888686,
291225
+ "grad_norm": 1.2158639430999756,
291226
+ "learning_rate": 3.0006400712729e-06,
291227
+ "loss": 1.8464,
291228
+ "step": 41577
291229
+ },
291230
+ {
291231
+ "epoch": 3.9967317120061523,
291232
+ "grad_norm": 1.2294507026672363,
291233
+ "learning_rate": 3.0006040183031917e-06,
291234
+ "loss": 1.8543,
291235
+ "step": 41578
291236
+ },
291237
+ {
291238
+ "epoch": 3.996827838123618,
291239
+ "grad_norm": 1.2768683433532715,
291240
+ "learning_rate": 3.0005690103443025e-06,
291241
+ "loss": 1.9559,
291242
+ "step": 41579
291243
+ },
291244
+ {
291245
+ "epoch": 3.996923964241084,
291246
+ "grad_norm": 1.1993067264556885,
291247
+ "learning_rate": 3.000535047396497e-06,
291248
+ "loss": 1.8008,
291249
+ "step": 41580
291250
+ },
291251
+ {
291252
+ "epoch": 3.9970200903585504,
291253
+ "grad_norm": 1.1862679719924927,
291254
+ "learning_rate": 3.000502129459989e-06,
291255
+ "loss": 1.7558,
291256
+ "step": 41581
291257
+ },
291258
+ {
291259
+ "epoch": 3.997116216476017,
291260
+ "grad_norm": 1.3213326930999756,
291261
+ "learning_rate": 3.000470256535026e-06,
291262
+ "loss": 1.879,
291263
+ "step": 41582
291264
+ },
291265
+ {
291266
+ "epoch": 3.9972123425934827,
291267
+ "grad_norm": 1.3456659317016602,
291268
+ "learning_rate": 3.0004394286218213e-06,
291269
+ "loss": 2.033,
291270
+ "step": 41583
291271
+ },
291272
+ {
291273
+ "epoch": 3.9973084687109486,
291274
+ "grad_norm": 1.140081524848938,
291275
+ "learning_rate": 3.0004096457205907e-06,
291276
+ "loss": 1.8206,
291277
+ "step": 41584
291278
+ },
291279
+ {
291280
+ "epoch": 3.997404594828415,
291281
+ "grad_norm": 1.2168374061584473,
291282
+ "learning_rate": 3.0003809078315644e-06,
291283
+ "loss": 1.7936,
291284
+ "step": 41585
291285
+ },
291286
+ {
291287
+ "epoch": 3.9975007209458813,
291288
+ "grad_norm": 1.3345106840133667,
291289
+ "learning_rate": 3.0003532149549234e-06,
291290
+ "loss": 1.8446,
291291
+ "step": 41586
291292
+ },
291293
+ {
291294
+ "epoch": 3.997596847063347,
291295
+ "grad_norm": 1.368496298789978,
291296
+ "learning_rate": 3.0003265670908657e-06,
291297
+ "loss": 1.8578,
291298
+ "step": 41587
291299
+ },
291300
+ {
291301
+ "epoch": 3.997692973180813,
291302
+ "grad_norm": 1.1256635189056396,
291303
+ "learning_rate": 3.00030096423959e-06,
291304
+ "loss": 1.6587,
291305
+ "step": 41588
291306
+ },
291307
+ {
291308
+ "epoch": 3.9977890992982794,
291309
+ "grad_norm": 1.1468359231948853,
291310
+ "learning_rate": 3.0002764064012764e-06,
291311
+ "loss": 1.8116,
291312
+ "step": 41589
291313
+ },
291314
+ {
291315
+ "epoch": 3.9978852254157453,
291316
+ "grad_norm": 1.3567719459533691,
291317
+ "learning_rate": 3.0002528935760904e-06,
291318
+ "loss": 2.0766,
291319
+ "step": 41590
291320
+ },
291321
+ {
291322
+ "epoch": 3.9979813515332117,
291323
+ "grad_norm": 1.2336657047271729,
291324
+ "learning_rate": 3.0002304257641807e-06,
291325
+ "loss": 1.8646,
291326
+ "step": 41591
291327
+ },
291328
+ {
291329
+ "epoch": 3.9980774776506776,
291330
+ "grad_norm": 1.2067503929138184,
291331
+ "learning_rate": 3.0002090029657445e-06,
291332
+ "loss": 1.7902,
291333
+ "step": 41592
291334
+ },
291335
+ {
291336
+ "epoch": 3.998173603768144,
291337
+ "grad_norm": 1.170020580291748,
291338
+ "learning_rate": 3.0001886251808972e-06,
291339
+ "loss": 1.8235,
291340
+ "step": 41593
291341
+ },
291342
+ {
291343
+ "epoch": 3.99826972988561,
291344
+ "grad_norm": 1.202685832977295,
291345
+ "learning_rate": 3.000169292409788e-06,
291346
+ "loss": 1.8446,
291347
+ "step": 41594
291348
+ },
291349
+ {
291350
+ "epoch": 3.998365856003076,
291351
+ "grad_norm": 1.1868846416473389,
291352
+ "learning_rate": 3.0001510046525807e-06,
291353
+ "loss": 1.9727,
291354
+ "step": 41595
291355
+ },
291356
+ {
291357
+ "epoch": 3.998461982120542,
291358
+ "grad_norm": 1.2645318508148193,
291359
+ "learning_rate": 3.0001337619093587e-06,
291360
+ "loss": 1.8983,
291361
+ "step": 41596
291362
+ },
291363
+ {
291364
+ "epoch": 3.9985581082380084,
291365
+ "grad_norm": 1.2477843761444092,
291366
+ "learning_rate": 3.000117564180286e-06,
291367
+ "loss": 1.9145,
291368
+ "step": 41597
291369
+ },
291370
+ {
291371
+ "epoch": 3.9986542343554743,
291372
+ "grad_norm": 1.0654802322387695,
291373
+ "learning_rate": 3.0001024114654626e-06,
291374
+ "loss": 1.8887,
291375
+ "step": 41598
291376
+ },
291377
+ {
291378
+ "epoch": 3.9987503604729406,
291379
+ "grad_norm": 1.2192732095718384,
291380
+ "learning_rate": 3.000088303764986e-06,
291381
+ "loss": 2.0114,
291382
+ "step": 41599
291383
+ },
291384
+ {
291385
+ "epoch": 3.9988464865904065,
291386
+ "grad_norm": 1.243283987045288,
291387
+ "learning_rate": 3.0000752410789563e-06,
291388
+ "loss": 1.9751,
291389
+ "step": 41600
291390
+ },
291391
+ {
291392
+ "epoch": 3.998942612707873,
291393
+ "grad_norm": 1.2228025197982788,
291394
+ "learning_rate": 3.0000632234074715e-06,
291395
+ "loss": 2.0104,
291396
+ "step": 41601
291397
+ },
291398
+ {
291399
+ "epoch": 3.999038738825339,
291400
+ "grad_norm": 1.0860533714294434,
291401
+ "learning_rate": 3.000052250750632e-06,
291402
+ "loss": 1.5139,
291403
+ "step": 41602
291404
+ },
291405
+ {
291406
+ "epoch": 3.999134864942805,
291407
+ "grad_norm": 1.1760326623916626,
291408
+ "learning_rate": 3.0000423231084854e-06,
291409
+ "loss": 1.7311,
291410
+ "step": 41603
291411
+ },
291412
+ {
291413
+ "epoch": 3.999230991060271,
291414
+ "grad_norm": 1.3011332750320435,
291415
+ "learning_rate": 3.0000334404811154e-06,
291416
+ "loss": 1.8016,
291417
+ "step": 41604
291418
+ },
291419
+ {
291420
+ "epoch": 3.999327117177737,
291421
+ "grad_norm": 1.3964388370513916,
291422
+ "learning_rate": 3.0000256028685704e-06,
291423
+ "loss": 1.9478,
291424
+ "step": 41605
291425
+ },
291426
+ {
291427
+ "epoch": 3.9994232432952033,
291428
+ "grad_norm": 1.4257408380508423,
291429
+ "learning_rate": 3.0000188102709344e-06,
291430
+ "loss": 1.9333,
291431
+ "step": 41606
291432
+ },
291433
+ {
291434
+ "epoch": 3.9995193694126696,
291435
+ "grad_norm": 1.2944358587265015,
291436
+ "learning_rate": 3.0000130626882386e-06,
291437
+ "loss": 1.9648,
291438
+ "step": 41607
291439
+ },
291440
+ {
291441
+ "epoch": 3.9996154955301355,
291442
+ "grad_norm": 1.1968276500701904,
291443
+ "learning_rate": 3.0000083601205175e-06,
291444
+ "loss": 1.7864,
291445
+ "step": 41608
291446
+ },
291447
+ {
291448
+ "epoch": 3.9997116216476014,
291449
+ "grad_norm": 1.2650930881500244,
291450
+ "learning_rate": 3.000004702567804e-06,
291451
+ "loss": 2.0297,
291452
+ "step": 41609
291453
+ },
291454
+ {
291455
+ "epoch": 3.9998077477650678,
291456
+ "grad_norm": 1.3876334428787231,
291457
+ "learning_rate": 3.0000020900301457e-06,
291458
+ "loss": 1.9798,
291459
+ "step": 41610
291460
+ },
291461
+ {
291462
+ "epoch": 3.999903873882534,
291463
+ "grad_norm": 1.294616460800171,
291464
+ "learning_rate": 3.0000005225075443e-06,
291465
+ "loss": 1.8283,
291466
+ "step": 41611
291467
+ },
291468
+ {
291469
+ "epoch": 4.0,
291470
+ "grad_norm": 1.340853214263916,
291471
+ "learning_rate": 2.9999999999999997e-06,
291472
+ "loss": 1.9542,
291473
+ "step": 41612
291474
  }
291475
  ],
291476
  "logging_steps": 1,
 
291485
  "should_evaluate": false,
291486
  "should_log": false,
291487
  "should_save": true,
291488
+ "should_training_stop": true
291489
  },
291490
  "attributes": {}
291491
  }
291492
  },
291493
+ "total_flos": 1.0537773454858715e+19,
291494
  "train_batch_size": 16,
291495
  "trial_name": null,
291496
  "trial_params": null