irishprancer commited on
Commit
d93b081
·
verified ·
1 Parent(s): ef6bc7e

Training in progress, step 4800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a80296700bbec8fac1fb407554306ca32ff6044fcb3bf9450e5d4002d6675b80
3
  size 1482788592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ece60875a5af29008a1db947f37d0c4e41a5f40d67190f9268f23085b8ae7125
3
  size 1482788592
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9140ffcb7a076479a2cdfed6c16b89cae69908eed57b3fe47619040eeed8784
3
  size 2897966842
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b613bbe7e00aa50cc4953564c3dd94412444451a44d0c95d0dfbdaf287ec8a09
3
  size 2897966842
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6ad74da39d39419907bbd72b2c6fb28029f5ea893201638cfb40daf17a380719
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc93c8df3d2c508d95b256c21be191d97f1b117d9c86f242d9f503ffa40419f3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f72d12e08f2981b12196a00ff48fe5cac0ba4d9d1aa54f91464a195ecde87c8
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716d7ef0d2def98440e32b2cba336f73e613b85c0427aef8f0c8a6789d61bd46
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.6921895742416382,
3
  "best_model_checkpoint": "./output/checkpoint-4500",
4
- "epoch": 0.14539110206455366,
5
  "eval_steps": 150,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3397,6 +3397,232 @@
3397
  "eval_samples_per_second": 10.053,
3398
  "eval_steps_per_second": 10.053,
3399
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3400
  }
3401
  ],
3402
  "logging_steps": 10,
@@ -3416,7 +3642,7 @@
3416
  "attributes": {}
3417
  }
3418
  },
3419
- "total_flos": 3.769852918977331e+17,
3420
  "train_batch_size": 4,
3421
  "trial_name": null,
3422
  "trial_params": null
 
1
  {
2
  "best_metric": 1.6921895742416382,
3
  "best_model_checkpoint": "./output/checkpoint-4500",
4
+ "epoch": 0.15508384220219057,
5
  "eval_steps": 150,
6
+ "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3397
  "eval_samples_per_second": 10.053,
3398
  "eval_steps_per_second": 10.053,
3399
  "step": 4500
3400
+ },
3401
+ {
3402
+ "epoch": 0.14571419340247488,
3403
+ "grad_norm": 7.294159889221191,
3404
+ "learning_rate": 5.472047830984499e-07,
3405
+ "loss": 1.7577,
3406
+ "step": 4510
3407
+ },
3408
+ {
3409
+ "epoch": 0.14603728474039612,
3410
+ "grad_norm": 9.507523536682129,
3411
+ "learning_rate": 5.252725889984403e-07,
3412
+ "loss": 1.7748,
3413
+ "step": 4520
3414
+ },
3415
+ {
3416
+ "epoch": 0.14636037607831734,
3417
+ "grad_norm": 10.296547889709473,
3418
+ "learning_rate": 5.037783829820298e-07,
3419
+ "loss": 1.6676,
3420
+ "step": 4530
3421
+ },
3422
+ {
3423
+ "epoch": 0.14668346741623858,
3424
+ "grad_norm": 10.683934211730957,
3425
+ "learning_rate": 4.827230485918372e-07,
3426
+ "loss": 1.703,
3427
+ "step": 4540
3428
+ },
3429
+ {
3430
+ "epoch": 0.1470065587541598,
3431
+ "grad_norm": 13.149202346801758,
3432
+ "learning_rate": 4.6210745133019236e-07,
3433
+ "loss": 1.8596,
3434
+ "step": 4550
3435
+ },
3436
+ {
3437
+ "epoch": 0.14732965009208104,
3438
+ "grad_norm": 10.627421379089355,
3439
+ "learning_rate": 4.419324386235529e-07,
3440
+ "loss": 1.5863,
3441
+ "step": 4560
3442
+ },
3443
+ {
3444
+ "epoch": 0.14765274143000226,
3445
+ "grad_norm": 8.185441970825195,
3446
+ "learning_rate": 4.2219883978767386e-07,
3447
+ "loss": 1.7421,
3448
+ "step": 4570
3449
+ },
3450
+ {
3451
+ "epoch": 0.1479758327679235,
3452
+ "grad_norm": 6.5582804679870605,
3453
+ "learning_rate": 4.029074659935082e-07,
3454
+ "loss": 1.7486,
3455
+ "step": 4580
3456
+ },
3457
+ {
3458
+ "epoch": 0.14829892410584472,
3459
+ "grad_norm": 7.293984413146973,
3460
+ "learning_rate": 3.8405911023387444e-07,
3461
+ "loss": 1.7631,
3462
+ "step": 4590
3463
+ },
3464
+ {
3465
+ "epoch": 0.14862201544376596,
3466
+ "grad_norm": 10.495855331420898,
3467
+ "learning_rate": 3.6565454729085526e-07,
3468
+ "loss": 1.8289,
3469
+ "step": 4600
3470
+ },
3471
+ {
3472
+ "epoch": 0.14894510678168718,
3473
+ "grad_norm": 7.07685661315918,
3474
+ "learning_rate": 3.4769453370394753e-07,
3475
+ "loss": 1.6386,
3476
+ "step": 4610
3477
+ },
3478
+ {
3479
+ "epoch": 0.14926819811960843,
3480
+ "grad_norm": 8.069764137268066,
3481
+ "learning_rate": 3.301798077389637e-07,
3482
+ "loss": 1.585,
3483
+ "step": 4620
3484
+ },
3485
+ {
3486
+ "epoch": 0.14959128945752964,
3487
+ "grad_norm": 8.399779319763184,
3488
+ "learning_rate": 3.1311108935768926e-07,
3489
+ "loss": 1.5544,
3490
+ "step": 4630
3491
+ },
3492
+ {
3493
+ "epoch": 0.14991438079545089,
3494
+ "grad_norm": 7.10072660446167,
3495
+ "learning_rate": 2.964890801882817e-07,
3496
+ "loss": 1.7765,
3497
+ "step": 4640
3498
+ },
3499
+ {
3500
+ "epoch": 0.1502374721333721,
3501
+ "grad_norm": 12.693696022033691,
3502
+ "learning_rate": 2.8031446349643393e-07,
3503
+ "loss": 1.5691,
3504
+ "step": 4650
3505
+ },
3506
+ {
3507
+ "epoch": 0.1502374721333721,
3508
+ "eval_loss": 1.6924811601638794,
3509
+ "eval_runtime": 50.4888,
3510
+ "eval_samples_per_second": 9.923,
3511
+ "eval_steps_per_second": 9.923,
3512
+ "step": 4650
3513
+ },
3514
+ {
3515
+ "epoch": 0.15056056347129335,
3516
+ "grad_norm": 8.841912269592285,
3517
+ "learning_rate": 2.645879041572891e-07,
3518
+ "loss": 1.6589,
3519
+ "step": 4660
3520
+ },
3521
+ {
3522
+ "epoch": 0.15088365480921456,
3523
+ "grad_norm": 7.690126895904541,
3524
+ "learning_rate": 2.4931004862810295e-07,
3525
+ "loss": 1.7137,
3526
+ "step": 4670
3527
+ },
3528
+ {
3529
+ "epoch": 0.1512067461471358,
3530
+ "grad_norm": 14.600467681884766,
3531
+ "learning_rate": 2.3448152492167586e-07,
3532
+ "loss": 1.8001,
3533
+ "step": 4680
3534
+ },
3535
+ {
3536
+ "epoch": 0.15152983748505702,
3537
+ "grad_norm": 8.619688034057617,
3538
+ "learning_rate": 2.201029425805393e-07,
3539
+ "loss": 1.7615,
3540
+ "step": 4690
3541
+ },
3542
+ {
3543
+ "epoch": 0.15185292882297827,
3544
+ "grad_norm": 12.033727645874023,
3545
+ "learning_rate": 2.061748926518972e-07,
3546
+ "loss": 1.6317,
3547
+ "step": 4700
3548
+ },
3549
+ {
3550
+ "epoch": 0.15217602016089948,
3551
+ "grad_norm": 9.276659965515137,
3552
+ "learning_rate": 1.9269794766333073e-07,
3553
+ "loss": 1.6155,
3554
+ "step": 4710
3555
+ },
3556
+ {
3557
+ "epoch": 0.15249911149882073,
3558
+ "grad_norm": 8.645523071289062,
3559
+ "learning_rate": 1.7967266159925864e-07,
3560
+ "loss": 1.5958,
3561
+ "step": 4720
3562
+ },
3563
+ {
3564
+ "epoch": 0.15282220283674194,
3565
+ "grad_norm": 13.718961715698242,
3566
+ "learning_rate": 1.670995698781777e-07,
3567
+ "loss": 1.5768,
3568
+ "step": 4730
3569
+ },
3570
+ {
3571
+ "epoch": 0.1531452941746632,
3572
+ "grad_norm": 12.2525634765625,
3573
+ "learning_rate": 1.549791893306424e-07,
3574
+ "loss": 1.571,
3575
+ "step": 4740
3576
+ },
3577
+ {
3578
+ "epoch": 0.1534683855125844,
3579
+ "grad_norm": 7.851583003997803,
3580
+ "learning_rate": 1.4331201817802332e-07,
3581
+ "loss": 1.7923,
3582
+ "step": 4750
3583
+ },
3584
+ {
3585
+ "epoch": 0.15379147685050565,
3586
+ "grad_norm": 10.048659324645996,
3587
+ "learning_rate": 1.320985360120322e-07,
3588
+ "loss": 1.7102,
3589
+ "step": 4760
3590
+ },
3591
+ {
3592
+ "epoch": 0.15411456818842686,
3593
+ "grad_norm": 9.430795669555664,
3594
+ "learning_rate": 1.2133920377499848e-07,
3595
+ "loss": 1.6879,
3596
+ "step": 4770
3597
+ },
3598
+ {
3599
+ "epoch": 0.1544376595263481,
3600
+ "grad_norm": 12.329809188842773,
3601
+ "learning_rate": 1.1103446374092981e-07,
3602
+ "loss": 1.7557,
3603
+ "step": 4780
3604
+ },
3605
+ {
3606
+ "epoch": 0.15476075086426933,
3607
+ "grad_norm": 11.180129051208496,
3608
+ "learning_rate": 1.0118473949732765e-07,
3609
+ "loss": 1.7791,
3610
+ "step": 4790
3611
+ },
3612
+ {
3613
+ "epoch": 0.15508384220219057,
3614
+ "grad_norm": 8.690634727478027,
3615
+ "learning_rate": 9.179043592777716e-08,
3616
+ "loss": 1.6464,
3617
+ "step": 4800
3618
+ },
3619
+ {
3620
+ "epoch": 0.15508384220219057,
3621
+ "eval_loss": 1.6925097703933716,
3622
+ "eval_runtime": 44.8573,
3623
+ "eval_samples_per_second": 11.169,
3624
+ "eval_steps_per_second": 11.169,
3625
+ "step": 4800
3626
  }
3627
  ],
3628
  "logging_steps": 10,
 
3642
  "attributes": {}
3643
  }
3644
  },
3645
+ "total_flos": 4.0155209275981824e+17,
3646
  "train_batch_size": 4,
3647
  "trial_name": null,
3648
  "trial_params": null