irishprancer commited on
Commit
e105fa7
·
verified ·
1 Parent(s): 8dce557

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e0b1badd7d84a5c0c9aef2bf63d064ebfdd33636947dbe1e3894f11675840815
3
  size 1054440872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f24adccb95c81ee4912788a49365bc365e72a83cf1379fe122a59887e5c66b52
3
  size 1054440872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02539deaff23dcce772b150af7d433fea548ab5aa433ebf90e2971806afc26ee
3
  size 2041777658
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e10e2705285563c433da849a5f9c91279b1ae7ed198ac73b107604b1fbb1f2b9
3
  size 2041777658
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83122a59510e09cc6734a576e969d7de23f6fcba00de763d07e4c6f71f89bcdd
3
  size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e1fa13cea721d252d1b5381877e675d0dd5b5c6c16c73ad16af5ee83182613
3
  size 14180
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40e90e2223ff2c7ae70ba3b595790f41997f446b5fed8742eb4eecd8d1c96aed
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2786f10edc62c6468aaba4027f9b5395a026ade9759e1b2908beee8dcad71ab
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.23261234164237976,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.24390243902439024,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 13.147,
3624
  "eval_steps_per_second": 13.147,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 3.754024479625052e+17,
3646
  "train_batch_size": 32,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.2325473576784134,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 0.25152439024390244,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 13.147,
3624
  "eval_steps_per_second": 13.147,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.24441056910569106,
3629
+ "grad_norm": 1.7274725437164307,
3630
+ "learning_rate": 2.4152175047355394e-07,
3631
+ "loss": 0.2246,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.24491869918699186,
3636
+ "grad_norm": 1.694223403930664,
3637
+ "learning_rate": 2.167949258434371e-07,
3638
+ "loss": 0.2436,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.24542682926829268,
3643
+ "grad_norm": 1.4516280889511108,
3644
+ "learning_rate": 1.93398911576507e-07,
3645
+ "loss": 0.2293,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.2459349593495935,
3650
+ "grad_norm": 1.6254228353500366,
3651
+ "learning_rate": 1.71334669391276e-07,
3652
+ "loss": 0.2742,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.2464430894308943,
3657
+ "grad_norm": 1.6434032917022705,
3658
+ "learning_rate": 1.5060310626231875e-07,
3659
+ "loss": 0.2547,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.24695121951219512,
3664
+ "grad_norm": 1.3775815963745117,
3665
+ "learning_rate": 1.3120507438299515e-07,
3666
+ "loss": 0.2243,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.24745934959349594,
3671
+ "grad_norm": 1.8971216678619385,
3672
+ "learning_rate": 1.1314137113042047e-07,
3673
+ "loss": 0.2544,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.24796747967479674,
3678
+ "grad_norm": 1.6187238693237305,
3679
+ "learning_rate": 9.641273903268614e-08,
3680
+ "loss": 0.2411,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.24847560975609756,
3685
+ "grad_norm": 1.726989507675171,
3686
+ "learning_rate": 8.101986573833107e-08,
3687
+ "loss": 0.2268,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.24898373983739838,
3692
+ "grad_norm": 1.6802349090576172,
3693
+ "learning_rate": 6.696338398808907e-08,
3694
+ "loss": 0.2528,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.24949186991869918,
3699
+ "grad_norm": 1.6394628286361694,
3700
+ "learning_rate": 5.4243871588872467e-08,
3701
+ "loss": 0.2461,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.25,
3706
+ "grad_norm": 1.4841700792312622,
3707
+ "learning_rate": 4.286185139000989e-08,
3708
+ "loss": 0.2774,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.2505081300813008,
3713
+ "grad_norm": 1.4838964939117432,
3714
+ "learning_rate": 3.281779126178181e-08,
3715
+ "loss": 0.2609,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.25101626016260165,
3720
+ "grad_norm": 1.592568278312683,
3721
+ "learning_rate": 2.411210407614891e-08,
3722
+ "loss": 0.2217,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.25152439024390244,
3727
+ "grad_norm": 1.7387011051177979,
3728
+ "learning_rate": 1.6745147689821428e-08,
3729
+ "loss": 0.2692,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.25152439024390244,
3734
+ "eval_loss": 0.2325473576784134,
3735
+ "eval_runtime": 38.6311,
3736
+ "eval_samples_per_second": 12.943,
3737
+ "eval_steps_per_second": 12.943,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 3.871514436033577e+17,
3759
  "train_batch_size": 32,
3760
  "trial_name": null,
3761
  "trial_params": null