irishprancer commited on
Commit
d0ff816
·
verified ·
1 Parent(s): 0c6c6b2

Training in progress, step 3750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fcde8fff671a9e3943a7206216f5aa93c2c1394ad15e27c488e3a5ce5334895
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7586d4afd9fadcaa6083460f8f6841b5a702dba00cbd480cc156933cd79c41b2
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efd66e2f145ff8ba7f09e1ad5b11fdf963e4fd8ce95a14181fab94269e9fb8ca
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:474241a6101d37df838210445f01853e23b11101c165e76bd69cf2cda41699a2
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44222b1bb3193020a7e558d8efc91533b7bf22b40de2edd049f9d11da894b760
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0e93581e91c352d5ee493f505f8757c94a31fb5b16f71a9d85577535431525
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3293218b6796a13a9f95a7300ab605072092402c0dbdc9fe7b53627646555830
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cea37f66b9478389c6f1b54e20d4b883ef028f78a1a1497fe4ee340f7d291f09
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 156.52173913043478,
5
  "eval_steps": 150,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3487,6 +3487,151 @@
3487
  "EMA_steps_per_second": 24.982,
3488
  "epoch": 156.52173913043478,
3489
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3490
  }
3491
  ],
3492
  "logging_steps": 10,
@@ -3506,7 +3651,7 @@
3506
  "attributes": {}
3507
  }
3508
  },
3509
- "total_flos": 9.28760054861906e+16,
3510
  "train_batch_size": 4,
3511
  "trial_name": null,
3512
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7177689671516418,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 163.04347826086956,
5
  "eval_steps": 150,
6
+ "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3487
  "EMA_steps_per_second": 24.982,
3488
  "epoch": 156.52173913043478,
3489
  "step": 3600
3490
+ },
3491
+ {
3492
+ "epoch": 156.95652173913044,
3493
+ "grad_norm": 2.5946836471557617,
3494
+ "learning_rate": 2.513936770131954e-06,
3495
+ "loss": 0.1973,
3496
+ "step": 3610
3497
+ },
3498
+ {
3499
+ "epoch": 157.3913043478261,
3500
+ "grad_norm": 1.8816180229187012,
3501
+ "learning_rate": 2.5139335316856892e-06,
3502
+ "loss": 0.2551,
3503
+ "step": 3620
3504
+ },
3505
+ {
3506
+ "epoch": 157.82608695652175,
3507
+ "grad_norm": 1.969436764717102,
3508
+ "learning_rate": 2.5139297950203775e-06,
3509
+ "loss": 0.2349,
3510
+ "step": 3630
3511
+ },
3512
+ {
3513
+ "epoch": 158.2608695652174,
3514
+ "grad_norm": 2.1921560764312744,
3515
+ "learning_rate": 2.5139255601375007e-06,
3516
+ "loss": 0.2243,
3517
+ "step": 3640
3518
+ },
3519
+ {
3520
+ "epoch": 158.69565217391303,
3521
+ "grad_norm": 3.598989725112915,
3522
+ "learning_rate": 2.513920827038737e-06,
3523
+ "loss": 0.2276,
3524
+ "step": 3650
3525
+ },
3526
+ {
3527
+ "epoch": 159.1304347826087,
3528
+ "grad_norm": 2.583705186843872,
3529
+ "learning_rate": 2.513915595725963e-06,
3530
+ "loss": 0.2528,
3531
+ "step": 3660
3532
+ },
3533
+ {
3534
+ "epoch": 159.56521739130434,
3535
+ "grad_norm": 1.8946772813796997,
3536
+ "learning_rate": 2.5139098662012514e-06,
3537
+ "loss": 0.2368,
3538
+ "step": 3670
3539
+ },
3540
+ {
3541
+ "epoch": 160.0,
3542
+ "grad_norm": 2.685317039489746,
3543
+ "learning_rate": 2.513903638466874e-06,
3544
+ "loss": 0.2026,
3545
+ "step": 3680
3546
+ },
3547
+ {
3548
+ "epoch": 160.43478260869566,
3549
+ "grad_norm": 1.9969098567962646,
3550
+ "learning_rate": 2.5138969125252985e-06,
3551
+ "loss": 0.228,
3552
+ "step": 3690
3553
+ },
3554
+ {
3555
+ "epoch": 160.8695652173913,
3556
+ "grad_norm": 1.5398179292678833,
3557
+ "learning_rate": 2.5138896883791913e-06,
3558
+ "loss": 0.2437,
3559
+ "step": 3700
3560
+ },
3561
+ {
3562
+ "epoch": 161.30434782608697,
3563
+ "grad_norm": 1.6144198179244995,
3564
+ "learning_rate": 2.5138819660314154e-06,
3565
+ "loss": 0.2764,
3566
+ "step": 3710
3567
+ },
3568
+ {
3569
+ "epoch": 161.7391304347826,
3570
+ "grad_norm": 2.053276777267456,
3571
+ "learning_rate": 2.513873745485033e-06,
3572
+ "loss": 0.2278,
3573
+ "step": 3720
3574
+ },
3575
+ {
3576
+ "epoch": 162.17391304347825,
3577
+ "grad_norm": 2.3131282329559326,
3578
+ "learning_rate": 2.513865026743301e-06,
3579
+ "loss": 0.2157,
3580
+ "step": 3730
3581
+ },
3582
+ {
3583
+ "epoch": 162.6086956521739,
3584
+ "grad_norm": 2.0463197231292725,
3585
+ "learning_rate": 2.5138558098096753e-06,
3586
+ "loss": 0.2233,
3587
+ "step": 3740
3588
+ },
3589
+ {
3590
+ "epoch": 163.04347826086956,
3591
+ "grad_norm": 2.3754689693450928,
3592
+ "learning_rate": 2.51384609468781e-06,
3593
+ "loss": 0.2231,
3594
+ "step": 3750
3595
+ },
3596
+ {
3597
+ "epoch": 163.04347826086956,
3598
+ "eval_loss": 0.9596047401428223,
3599
+ "eval_runtime": 0.4563,
3600
+ "eval_samples_per_second": 21.916,
3601
+ "eval_steps_per_second": 21.916,
3602
+ "step": 3750
3603
+ },
3604
+ {
3605
+ "Start_State_loss": 0.861186683177948,
3606
+ "Start_State_runtime": 0.456,
3607
+ "Start_State_samples_per_second": 21.93,
3608
+ "Start_State_steps_per_second": 21.93,
3609
+ "epoch": 163.04347826086956,
3610
+ "step": 3750
3611
+ },
3612
+ {
3613
+ "Raw_Model_loss": 0.9596047401428223,
3614
+ "Raw_Model_runtime": 0.4822,
3615
+ "Raw_Model_samples_per_second": 20.737,
3616
+ "Raw_Model_steps_per_second": 20.737,
3617
+ "epoch": 163.04347826086956,
3618
+ "step": 3750
3619
+ },
3620
+ {
3621
+ "SWA_loss": 0.7939289808273315,
3622
+ "SWA_runtime": 0.4295,
3623
+ "SWA_samples_per_second": 23.281,
3624
+ "SWA_steps_per_second": 23.281,
3625
+ "epoch": 163.04347826086956,
3626
+ "step": 3750
3627
+ },
3628
+ {
3629
+ "EMA_loss": 0.8596266508102417,
3630
+ "EMA_runtime": 0.4196,
3631
+ "EMA_samples_per_second": 23.833,
3632
+ "EMA_steps_per_second": 23.833,
3633
+ "epoch": 163.04347826086956,
3634
+ "step": 3750
3635
  }
3636
  ],
3637
  "logging_steps": 10,
 
3651
  "attributes": {}
3652
  }
3653
  },
3654
+ "total_flos": 9.668631592798618e+16,
3655
  "train_batch_size": 4,
3656
  "trial_name": null,
3657
  "trial_params": null