FredericFan commited on
Commit
b0dfb2d
·
verified ·
1 Parent(s): 37017a3

Training in progress, step 25000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a24784d7d4d1fc3fbb8fc67b836bb1c712f3f7b0c64c99ff1439b5fe2a051a5b
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5271a77ad1e021749695ce7d0b16c4d3727b85c97d485c7bb3129da662dddf6
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:970bf634ac4ebe2edc929d89a76708d0db104aba94f7c07086ba7355e47fc214
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00b65ea8887464da7fcfa35adec6122142ff8b2f23f27b66874391d481d4ab50
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21c4e82da06fbd2a474e3defc7564dd624a46c9abb731c268ce51609d77b1972
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88a612cd3882628c0e5813e2d93583f2975d34afc744ee2ddc650af9cf2374f6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81a72d6c77b06746b972f475e80b7808db62786b79e89637fe4fa97684c444ab
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c12f3d0d653641cfe7f85f17f77b9ba95d2a97d1ea4e5bfd08bc3a80818a1f85
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.08147666603326797,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-24500",
4
- "epoch": 1.96,
5
  "eval_steps": 500,
6
- "global_step": 24500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3829,6 +3829,84 @@
3829
  "eval_samples_per_second": 22.712,
3830
  "eval_steps_per_second": 5.678,
3831
  "step": 24500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3832
  }
3833
  ],
3834
  "logging_steps": 50,
@@ -3843,12 +3921,12 @@
3843
  "should_evaluate": false,
3844
  "should_log": false,
3845
  "should_save": true,
3846
- "should_training_stop": false
3847
  },
3848
  "attributes": {}
3849
  }
3850
  },
3851
- "total_flos": 5.967787327488e+16,
3852
  "train_batch_size": 4,
3853
  "trial_name": null,
3854
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08146847784519196,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-25000",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3829
  "eval_samples_per_second": 22.712,
3830
  "eval_steps_per_second": 5.678,
3831
  "step": 24500
3832
+ },
3833
+ {
3834
+ "epoch": 1.964,
3835
+ "grad_norm": 0.1472010761499405,
3836
+ "learning_rate": 5.496e-07,
3837
+ "loss": 0.0485,
3838
+ "step": 24550
3839
+ },
3840
+ {
3841
+ "epoch": 1.968,
3842
+ "grad_norm": 0.14501795172691345,
3843
+ "learning_rate": 4.896e-07,
3844
+ "loss": 0.0554,
3845
+ "step": 24600
3846
+ },
3847
+ {
3848
+ "epoch": 1.972,
3849
+ "grad_norm": 0.09672766178846359,
3850
+ "learning_rate": 4.296e-07,
3851
+ "loss": 0.0534,
3852
+ "step": 24650
3853
+ },
3854
+ {
3855
+ "epoch": 1.976,
3856
+ "grad_norm": 0.1332007199525833,
3857
+ "learning_rate": 3.696e-07,
3858
+ "loss": 0.0513,
3859
+ "step": 24700
3860
+ },
3861
+ {
3862
+ "epoch": 1.98,
3863
+ "grad_norm": 0.0856068879365921,
3864
+ "learning_rate": 3.0959999999999997e-07,
3865
+ "loss": 0.0531,
3866
+ "step": 24750
3867
+ },
3868
+ {
3869
+ "epoch": 1.984,
3870
+ "grad_norm": 0.07875853031873703,
3871
+ "learning_rate": 2.4959999999999996e-07,
3872
+ "loss": 0.0511,
3873
+ "step": 24800
3874
+ },
3875
+ {
3876
+ "epoch": 1.988,
3877
+ "grad_norm": 0.1319677084684372,
3878
+ "learning_rate": 1.896e-07,
3879
+ "loss": 0.0493,
3880
+ "step": 24850
3881
+ },
3882
+ {
3883
+ "epoch": 1.992,
3884
+ "grad_norm": 0.20934289693832397,
3885
+ "learning_rate": 1.296e-07,
3886
+ "loss": 0.0559,
3887
+ "step": 24900
3888
+ },
3889
+ {
3890
+ "epoch": 1.996,
3891
+ "grad_norm": 0.12190008908510208,
3892
+ "learning_rate": 6.96e-08,
3893
+ "loss": 0.0589,
3894
+ "step": 24950
3895
+ },
3896
+ {
3897
+ "epoch": 2.0,
3898
+ "grad_norm": 0.2332906574010849,
3899
+ "learning_rate": 9.600000000000002e-09,
3900
+ "loss": 0.0557,
3901
+ "step": 25000
3902
+ },
3903
+ {
3904
+ "epoch": 2.0,
3905
+ "eval_loss": 0.08146847784519196,
3906
+ "eval_runtime": 88.0931,
3907
+ "eval_samples_per_second": 22.703,
3908
+ "eval_steps_per_second": 5.676,
3909
+ "step": 25000
3910
  }
3911
  ],
3912
  "logging_steps": 50,
 
3921
  "should_evaluate": false,
3922
  "should_log": false,
3923
  "should_save": true,
3924
+ "should_training_stop": true
3925
  },
3926
  "attributes": {}
3927
  }
3928
  },
3929
+ "total_flos": 6.0895789056e+16,
3930
  "train_batch_size": 4,
3931
  "trial_name": null,
3932
  "trial_params": null