irishprancer commited on
Commit
8cd2409
·
verified ·
1 Parent(s): 8ea2f5a

Training in progress, step 4950, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc40fbefead84ea7caad963c1efa70501e832ef22239e7a54d927cfa0939bcba
3
  size 774409936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae90e0ed80cb5637bb6c9ecabcc99991c9828716da3d8c0cefea14ccc2ecbd9d
3
  size 774409936
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:650f15d525b37de4912730572da2d80697b8f8c54898d7d1c7fdfbc5a72fa5e9
3
  size 1523152634
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ab85ba546c598f5673a4214419dd308b49a195a3355405daed92697e13424a
3
  size 1523152634
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:113aa991f3a0567dee9572abb91fc32e8bd02c99557fffc0f47d83da41eca9ef
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bc94cd5f166b08af777c923ba842bf10db572ab5700f8d6003b85076b1592f9
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:716d7ef0d2def98440e32b2cba336f73e613b85c0427aef8f0c8a6789d61bd46
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83dd037c783110e2dc4f61307500d62937b8821c50649baf8ed55dd7f5bddf19
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.4709917306900024,
3
- "best_model_checkpoint": "./output/checkpoint-4800",
4
- "epoch": 0.45201996421508617,
5
  "eval_steps": 150,
6
- "global_step": 4800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3623,6 +3623,119 @@
3623
  "eval_samples_per_second": 10.873,
3624
  "eval_steps_per_second": 10.873,
3625
  "step": 4800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3626
  }
3627
  ],
3628
  "logging_steps": 10,
@@ -3642,7 +3755,7 @@
3642
  "attributes": {}
3643
  }
3644
  },
3645
- "total_flos": 3.0622411684439654e+17,
3646
  "train_batch_size": 4,
3647
  "trial_name": null,
3648
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.4705742597579956,
3
+ "best_model_checkpoint": "./output/checkpoint-4950",
4
+ "epoch": 0.4661455880968076,
5
  "eval_steps": 150,
6
+ "global_step": 4950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3623
  "eval_samples_per_second": 10.873,
3624
  "eval_steps_per_second": 10.873,
3625
  "step": 4800
3626
+ },
3627
+ {
3628
+ "epoch": 0.4529616724738676,
3629
+ "grad_norm": 3.871877431869507,
3630
+ "learning_rate": 8.285193919530187e-08,
3631
+ "loss": 1.1957,
3632
+ "step": 4810
3633
+ },
3634
+ {
3635
+ "epoch": 0.453903380732649,
3636
+ "grad_norm": 9.62677001953125,
3637
+ "learning_rate": 7.436961672649524e-08,
3638
+ "loss": 1.4019,
3639
+ "step": 4820
3640
+ },
3641
+ {
3642
+ "epoch": 0.45484508899143045,
3643
+ "grad_norm": 8.656726837158203,
3644
+ "learning_rate": 6.634381719640962e-08,
3645
+ "loss": 1.7807,
3646
+ "step": 4830
3647
+ },
3648
+ {
3649
+ "epoch": 0.4557867972502119,
3650
+ "grad_norm": 14.876380920410156,
3651
+ "learning_rate": 5.877487051422937e-08,
3652
+ "loss": 1.0471,
3653
+ "step": 4840
3654
+ },
3655
+ {
3656
+ "epoch": 0.4567285055089933,
3657
+ "grad_norm": 3.9531829357147217,
3658
+ "learning_rate": 5.166308780970642e-08,
3659
+ "loss": 1.394,
3660
+ "step": 4850
3661
+ },
3662
+ {
3663
+ "epoch": 0.4576702137677747,
3664
+ "grad_norm": 7.217214107513428,
3665
+ "learning_rate": 4.500876142037269e-08,
3666
+ "loss": 1.4854,
3667
+ "step": 4860
3668
+ },
3669
+ {
3670
+ "epoch": 0.45861192202655615,
3671
+ "grad_norm": 2.900088310241699,
3672
+ "learning_rate": 3.881216487952338e-08,
3673
+ "loss": 1.0854,
3674
+ "step": 4870
3675
+ },
3676
+ {
3677
+ "epoch": 0.4595536302853376,
3678
+ "grad_norm": 7.573949337005615,
3679
+ "learning_rate": 3.307355290497236e-08,
3680
+ "loss": 1.5179,
3681
+ "step": 4880
3682
+ },
3683
+ {
3684
+ "epoch": 0.46049533854411906,
3685
+ "grad_norm": 5.3885087966918945,
3686
+ "learning_rate": 2.7793161388579562e-08,
3687
+ "loss": 1.3822,
3688
+ "step": 4890
3689
+ },
3690
+ {
3691
+ "epoch": 0.4614370468029005,
3692
+ "grad_norm": 9.68319320678711,
3693
+ "learning_rate": 2.2971207386559186e-08,
3694
+ "loss": 1.4108,
3695
+ "step": 4900
3696
+ },
3697
+ {
3698
+ "epoch": 0.4623787550616819,
3699
+ "grad_norm": 11.433544158935547,
3700
+ "learning_rate": 1.8607889110554993e-08,
3701
+ "loss": 1.48,
3702
+ "step": 4910
3703
+ },
3704
+ {
3705
+ "epoch": 0.46332046332046334,
3706
+ "grad_norm": 14.155740737915039,
3707
+ "learning_rate": 1.4703385919488896e-08,
3708
+ "loss": 1.4005,
3709
+ "step": 4920
3710
+ },
3711
+ {
3712
+ "epoch": 0.46426217157924476,
3713
+ "grad_norm": 3.100614309310913,
3714
+ "learning_rate": 1.1257858312197773e-08,
3715
+ "loss": 1.249,
3716
+ "step": 4930
3717
+ },
3718
+ {
3719
+ "epoch": 0.4652038798380262,
3720
+ "grad_norm": 4.316708087921143,
3721
+ "learning_rate": 8.271447920822464e-09,
3722
+ "loss": 1.3052,
3723
+ "step": 4940
3724
+ },
3725
+ {
3726
+ "epoch": 0.4661455880968076,
3727
+ "grad_norm": 9.99028491973877,
3728
+ "learning_rate": 5.744277504999899e-09,
3729
+ "loss": 1.1356,
3730
+ "step": 4950
3731
+ },
3732
+ {
3733
+ "epoch": 0.4661455880968076,
3734
+ "eval_loss": 1.4705742597579956,
3735
+ "eval_runtime": 47.4682,
3736
+ "eval_samples_per_second": 10.533,
3737
+ "eval_steps_per_second": 10.533,
3738
+ "step": 4950
3739
  }
3740
  ],
3741
  "logging_steps": 10,
 
3755
  "attributes": {}
3756
  }
3757
  },
3758
+ "total_flos": 3.1532401080395366e+17,
3759
  "train_batch_size": 4,
3760
  "trial_name": null,
3761
  "trial_params": null