TweedleDeepLearnings commited on
Commit
fdcf82e
·
verified ·
1 Parent(s): 86cf9a1

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19ae8e66a3e7180ff406d73d26175f28f065c62b5e96be9581974c2a27f6e281
3
  size 2231685024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ed20b6874093a7161b65b1e1200cb511c0e3bd7419c8e25d5b81c923b598f96
3
  size 2231685024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6bbb46111783306a8de9b03ae71ff3bcc36e74645ee420fba5fedae748ca5446
3
  size 4383116999
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:327ab0018c15664bce2839f2927d845c65c2d8a53e134dfacfbde2cfb2327877
3
  size 4383116999
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4413b3c1c89ddb3d57ddabb40868d44f33618613ca6b14588ab3262fd959fe1a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8bf0fcb1f7b613e5c7f00ca071aeebb5941a801d1488690efaa0b8ff1605c4f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:536269e746606d89d7092eefc1d242400387136c0a490c65f4003318a00e54ce
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b104801e8f2977a5280eca927cca74cec17e750775fe40d3bc9414c93058be59
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7983394265174866,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.34500107812836917,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 6.133,
3624
  "eval_steps_per_second": 6.133,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 4.088392743032586e+17,
3646
  "train_batch_size": 8,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7982437610626221,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 0.3557823618198807,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 6.133,
3624
  "eval_steps_per_second": 6.133,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.3457198303744699,
3629
+ "grad_norm": 4.458255767822266,
3630
+ "learning_rate": 2.964201089733987e-07,
3631
+ "loss": 0.7048,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.3464385826205707,
3636
+ "grad_norm": 3.821779489517212,
3637
+ "learning_rate": 2.66072829537678e-07,
3638
+ "loss": 0.7115,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.34715733486667144,
3643
+ "grad_norm": 3.816742181777954,
3644
+ "learning_rate": 2.3735885622078624e-07,
3645
+ "loss": 0.7119,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.34787608711277224,
3650
+ "grad_norm": 4.855486869812012,
3651
+ "learning_rate": 2.102793693417038e-07,
3652
+ "loss": 0.6831,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.348594839358873,
3657
+ "grad_norm": 4.475304126739502,
3658
+ "learning_rate": 1.8483548203207081e-07,
3659
+ "loss": 0.7049,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.34931359160497377,
3664
+ "grad_norm": 3.8661746978759766,
3665
+ "learning_rate": 1.6102824019043728e-07,
3666
+ "loss": 0.6819,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.3500323438510745,
3671
+ "grad_norm": 4.284508228302002,
3672
+ "learning_rate": 1.3885862243927072e-07,
3673
+ "loss": 0.7462,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.3507510960971753,
3678
+ "grad_norm": 3.998692512512207,
3679
+ "learning_rate": 1.1832754008472614e-07,
3680
+ "loss": 0.7378,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.35146984834327605,
3685
+ "grad_norm": 4.27777624130249,
3686
+ "learning_rate": 9.943583707917815e-08,
3687
+ "loss": 0.7321,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.35218860058937684,
3692
+ "grad_norm": 3.2360732555389404,
3693
+ "learning_rate": 8.21842899865466e-08,
3694
+ "loss": 0.7669,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.3529073528354776,
3699
+ "grad_norm": 3.907960891723633,
3700
+ "learning_rate": 6.65736079503665e-08,
3701
+ "loss": 0.7666,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.3536261050815784,
3706
+ "grad_norm": 3.8733742237091064,
3707
+ "learning_rate": 5.260443266462467e-08,
3708
+ "loss": 0.716,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.3543448573276792,
3713
+ "grad_norm": 3.6972267627716064,
3714
+ "learning_rate": 4.0277338347416426e-08,
3715
+ "loss": 0.7439,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.3550636095737799,
3720
+ "grad_norm": 3.7578091621398926,
3721
+ "learning_rate": 2.9592831717293326e-08,
3722
+ "loss": 0.6808,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.3557823618198807,
3727
+ "grad_norm": 3.6677637100219727,
3728
+ "learning_rate": 2.0551351972484257e-08,
3729
+ "loss": 0.7256,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.3557823618198807,
3734
+ "eval_loss": 0.7982437610626221,
3735
+ "eval_runtime": 79.6541,
3736
+ "eval_samples_per_second": 6.277,
3737
+ "eval_steps_per_second": 6.277,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 4.215132315739914e+17,
3759
  "train_batch_size": 8,
3760
  "trial_name": null,
3761
  "trial_params": null