texanrangee commited on
Commit
a46a36b
·
verified ·
1 Parent(s): 2f09d4e

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a48677be121fbdceeeabfe986e73dec1b480151800c1936b91cc4ace7b80e51
3
- size 1266749168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ef156f9d60232c47caba6b4769b54973aaa74da4450d27d25fa35eff601d5c
3
+ size 4418777208
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44e4a6a867bbc060d0c9facba78b7da76aadb4c3e6f9cafb001ecea1e69880d2
3
  size 2499769978
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40d31e86fb3b6de80883470d4ce17a9b1a506b44fb2ceb7dc1ff790daa5f0db0
3
  size 2499769978
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e347613182c5807d2f6db7accfb1c61cd49dff9dda43fd034155068ae72f90f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d167f207748dc95c9dcda746fc5ce0dae8e8f9d3544099aa270f86a99297608
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8493cf662a0ef9cc409c1e71c561b4f018c668b3acc0b65f5c029d848604c14
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:848d7aad2c2d474b8df3e1c4a379fb71bd6c1fcc4154ae3a9b8234e5418c2c78
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.8088018894195557,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.15402881622436865,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 11.221,
3624
  "eval_steps_per_second": 11.221,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 7.062562841064975e+17,
3646
  "train_batch_size": 8,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.808562994003296,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 0.15884221673138016,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 11.221,
3624
  "eval_steps_per_second": 11.221,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.15434970959150274,
3629
+ "grad_norm": 12.37609577178955,
3630
+ "learning_rate": 3.7052513621674843e-07,
3631
+ "loss": 2.895,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.15467060295863685,
3636
+ "grad_norm": 17.5330753326416,
3637
+ "learning_rate": 3.325910369220976e-07,
3638
+ "loss": 2.7705,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.15499149632577094,
3643
+ "grad_norm": 13.448247909545898,
3644
+ "learning_rate": 2.9669857027598287e-07,
3645
+ "loss": 2.7588,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.15531238969290503,
3650
+ "grad_norm": 12.854496955871582,
3651
+ "learning_rate": 2.628492116771298e-07,
3652
+ "loss": 2.8088,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.15563328306003915,
3657
+ "grad_norm": 18.642213821411133,
3658
+ "learning_rate": 2.3104435254008856e-07,
3659
+ "loss": 2.8231,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.15595417642717324,
3664
+ "grad_norm": 16.102710723876953,
3665
+ "learning_rate": 2.0128530023804664e-07,
3666
+ "loss": 2.8234,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.15627506979430736,
3671
+ "grad_norm": 13.156707763671875,
3672
+ "learning_rate": 1.7357327804908846e-07,
3673
+ "loss": 2.6889,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.15659596316144145,
3678
+ "grad_norm": 21.432458877563477,
3679
+ "learning_rate": 1.479094251059077e-07,
3680
+ "loss": 2.6573,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.15691685652857557,
3685
+ "grad_norm": 17.190486907958984,
3686
+ "learning_rate": 1.2429479634897272e-07,
3687
+ "loss": 2.6614,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.15723774989570966,
3692
+ "grad_norm": 15.629843711853027,
3693
+ "learning_rate": 1.0273036248318327e-07,
3694
+ "loss": 2.5595,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.15755864326284374,
3699
+ "grad_norm": 14.262920379638672,
3700
+ "learning_rate": 8.321700993795814e-08,
3701
+ "loss": 2.6362,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.15787953662997786,
3706
+ "grad_norm": 45.12715148925781,
3707
+ "learning_rate": 6.575554083078086e-08,
3708
+ "loss": 2.8154,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.15820042999711195,
3713
+ "grad_norm": 17.036636352539062,
3714
+ "learning_rate": 5.034667293427055e-08,
3715
+ "loss": 2.7985,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.15852132336424607,
3720
+ "grad_norm": 17.798555374145508,
3721
+ "learning_rate": 3.6991039646616666e-08,
3722
+ "loss": 2.7622,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.15884221673138016,
3727
+ "grad_norm": 15.753199577331543,
3728
+ "learning_rate": 2.5689189965605327e-08,
3729
+ "loss": 2.7289,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.15884221673138016,
3734
+ "eval_loss": 2.808562994003296,
3735
+ "eval_runtime": 45.1194,
3736
+ "eval_samples_per_second": 11.082,
3737
+ "eval_steps_per_second": 11.082,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 7.268168492012636e+17,
3759
  "train_batch_size": 8,
3760
  "trial_name": null,
3761
  "trial_params": null