irishprancer commited on
Commit
f354587
·
verified ·
1 Parent(s): 314f81a

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:991f10c981f9592387dd7753b36c43b21c6c2af47814597501b8296c5914daa7
3
  size 669617872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc300f720c52ce6f3cd18b320232dea68406fcf133e08011500c5f512f789da1
3
  size 669617872
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bb66faa951bc7aca6dba91f0673552ec3ccf8f61efc7d57cc9c47eb06178a7e
3
  size 1320908538
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a04644459eff389650195ca99eb4a8bb847ef98b8429328f8e274f35cb977ec7
3
  size 1320908538
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f7e925c713f0d6846b5c9763a1639e269b3beca389e894b85a658f539d66716
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3c92af5e941d9081ad40d95b82eb20070bde831678358994cc336d0b329ec34
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94a369128c5b93a4b43c8e66197ae4ca667e5997fbbafe1d1355adecc3c6ef0f
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f242c3383ac7154df1ed246b8a5975289a3670034ee2643ea1b5bdc78340f806
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6936928629875183,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.11656143759106362,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 9.972,
3624
  "eval_steps_per_second": 9.972,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 5.530457526439772e+17,
3646
  "train_batch_size": 16,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.6936842203140259,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 0.12020398251578436,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 9.972,
3624
  "eval_steps_per_second": 9.972,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.11680427391937834,
3629
+ "grad_norm": 14.187041282653809,
3630
+ "learning_rate": 5.028799785689844e-08,
3631
+ "loss": 0.6897,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.11704711024769306,
3636
+ "grad_norm": 14.955177307128906,
3637
+ "learning_rate": 4.513954848714541e-08,
3638
+ "loss": 0.7353,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.11728994657600778,
3643
+ "grad_norm": 13.828136444091797,
3644
+ "learning_rate": 4.026819129878247e-08,
3645
+ "loss": 0.7186,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.11753278290432248,
3650
+ "grad_norm": 14.279440879821777,
3651
+ "learning_rate": 3.5674126534224215e-08,
3652
+ "loss": 0.7437,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.1177756192326372,
3657
+ "grad_norm": 14.531624794006348,
3658
+ "learning_rate": 3.135754303747901e-08,
3659
+ "loss": 0.7275,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.11801845556095192,
3664
+ "grad_norm": 15.192167282104492,
3665
+ "learning_rate": 2.7318618246387424e-08,
3666
+ "loss": 0.7829,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.11826129188926664,
3671
+ "grad_norm": 14.022039413452148,
3672
+ "learning_rate": 2.3557518185328572e-08,
3673
+ "loss": 0.7444,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.11850412821758136,
3678
+ "grad_norm": 15.658608436584473,
3679
+ "learning_rate": 2.007439745839503e-08,
3680
+ "loss": 0.7373,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.11874696454589606,
3685
+ "grad_norm": 14.228999137878418,
3686
+ "learning_rate": 1.686939924303638e-08,
3687
+ "loss": 0.707,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.11898980087421078,
3692
+ "grad_norm": 14.242233276367188,
3693
+ "learning_rate": 1.3942655284176649e-08,
3694
+ "loss": 0.7373,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.1192326372025255,
3699
+ "grad_norm": 13.022191047668457,
3700
+ "learning_rate": 1.1294285888797343e-08,
3701
+ "loss": 0.6695,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.11947547353084022,
3706
+ "grad_norm": 15.798416137695312,
3707
+ "learning_rate": 8.924399920989856e-09,
3708
+ "loss": 0.7065,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.11971830985915492,
3713
+ "grad_norm": 15.76127815246582,
3714
+ "learning_rate": 6.8330947974862936e-09,
3715
+ "loss": 0.7246,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.11996114618746964,
3720
+ "grad_norm": 13.73786735534668,
3721
+ "learning_rate": 5.020456483646842e-09,
3722
+ "loss": 0.7168,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.12020398251578436,
3727
+ "grad_norm": 15.974238395690918,
3728
+ "learning_rate": 3.486559489934608e-09,
3729
+ "loss": 0.7356,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.12020398251578436,
3734
+ "eval_loss": 0.6936842203140259,
3735
+ "eval_runtime": 49.6215,
3736
+ "eval_samples_per_second": 10.076,
3737
+ "eval_steps_per_second": 10.076,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 5.70332684353536e+17,
3759
  "train_batch_size": 16,
3760
  "trial_name": null,
3761
  "trial_params": null