FredericFan commited on
Commit
3f8a684
·
verified ·
1 Parent(s): 74a1463

Training in progress, step 18500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e87cb14acd4e6fe6bcad897a8f042f31acae6fb2e34f61ed68b17e469e4f393
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9f8e1e9f015d9568bf75922d777d726b01fb6e502bd445c08094a91f28ae1a8
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e64bb265a77f08e4da4240e30c68e0a0ae7eb01df9530a76fcba45020acfd6b
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01ffcfafbc1c4522f01ae60dab6db775fcdcd8fec900d37f15a5127a63ce98b4
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea1c5d0777adfd9f21aec03650885c94d8b0325360d164ecc81c3a16d777cc03
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6769e966cbd01b0928c6dfa08d9183af00ab69c61a86a4a6ef846a74f2cd4f12
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bc3c023f1bacc02e7db4990bce2636fd592e49ea544612a30431a586cc170fc
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88eb0259375aeb4797384085a6556dffb88f3f28e3b811d250261aef798e28f2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0821109265089035,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-17500",
4
- "epoch": 1.44,
5
  "eval_steps": 500,
6
- "global_step": 18000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2815,6 +2815,84 @@
2815
  "eval_samples_per_second": 22.709,
2816
  "eval_steps_per_second": 5.677,
2817
  "step": 18000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2818
  }
2819
  ],
2820
  "logging_steps": 50,
@@ -2834,7 +2912,7 @@
2834
  "attributes": {}
2835
  }
2836
  },
2837
- "total_flos": 4.384496812032e+16,
2838
  "train_batch_size": 4,
2839
  "trial_name": null,
2840
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0821109265089035,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-17500",
4
+ "epoch": 1.48,
5
  "eval_steps": 500,
6
+ "global_step": 18500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2815
  "eval_samples_per_second": 22.709,
2816
  "eval_steps_per_second": 5.677,
2817
  "step": 18000
2818
+ },
2819
+ {
2820
+ "epoch": 1.444,
2821
+ "grad_norm": 0.150528684258461,
2822
+ "learning_rate": 8.3448e-06,
2823
+ "loss": 0.0519,
2824
+ "step": 18050
2825
+ },
2826
+ {
2827
+ "epoch": 1.448,
2828
+ "grad_norm": 0.08878117799758911,
2829
+ "learning_rate": 8.2848e-06,
2830
+ "loss": 0.0544,
2831
+ "step": 18100
2832
+ },
2833
+ {
2834
+ "epoch": 1.452,
2835
+ "grad_norm": 0.12337563931941986,
2836
+ "learning_rate": 8.224800000000001e-06,
2837
+ "loss": 0.0509,
2838
+ "step": 18150
2839
+ },
2840
+ {
2841
+ "epoch": 1.456,
2842
+ "grad_norm": 0.12858565151691437,
2843
+ "learning_rate": 8.1648e-06,
2844
+ "loss": 0.0561,
2845
+ "step": 18200
2846
+ },
2847
+ {
2848
+ "epoch": 1.46,
2849
+ "grad_norm": 0.14318782091140747,
2850
+ "learning_rate": 8.1048e-06,
2851
+ "loss": 0.0539,
2852
+ "step": 18250
2853
+ },
2854
+ {
2855
+ "epoch": 1.464,
2856
+ "grad_norm": 0.16946159303188324,
2857
+ "learning_rate": 8.044800000000001e-06,
2858
+ "loss": 0.0545,
2859
+ "step": 18300
2860
+ },
2861
+ {
2862
+ "epoch": 1.468,
2863
+ "grad_norm": 0.11605024337768555,
2864
+ "learning_rate": 7.9848e-06,
2865
+ "loss": 0.0563,
2866
+ "step": 18350
2867
+ },
2868
+ {
2869
+ "epoch": 1.472,
2870
+ "grad_norm": 0.20185694098472595,
2871
+ "learning_rate": 7.9248e-06,
2872
+ "loss": 0.0536,
2873
+ "step": 18400
2874
+ },
2875
+ {
2876
+ "epoch": 1.476,
2877
+ "grad_norm": 0.16189858317375183,
2878
+ "learning_rate": 7.8648e-06,
2879
+ "loss": 0.0523,
2880
+ "step": 18450
2881
+ },
2882
+ {
2883
+ "epoch": 1.48,
2884
+ "grad_norm": 0.24366913735866547,
2885
+ "learning_rate": 7.8048e-06,
2886
+ "loss": 0.057,
2887
+ "step": 18500
2888
+ },
2889
+ {
2890
+ "epoch": 1.48,
2891
+ "eval_loss": 0.08215340971946716,
2892
+ "eval_runtime": 88.0726,
2893
+ "eval_samples_per_second": 22.709,
2894
+ "eval_steps_per_second": 5.677,
2895
+ "step": 18500
2896
  }
2897
  ],
2898
  "logging_steps": 50,
 
2912
  "attributes": {}
2913
  }
2914
  },
2915
+ "total_flos": 4.506288390144e+16,
2916
  "train_batch_size": 4,
2917
  "trial_name": null,
2918
  "trial_params": null