FredericFan commited on
Commit
a124076
·
verified ·
1 Parent(s): 2a50d7f

Training in progress, step 19000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9f8e1e9f015d9568bf75922d777d726b01fb6e502bd445c08094a91f28ae1a8
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b6ab3ff42b2cdf0354b7f70bd9a1a4076a91b639655aef18c9e1ddc6f4867c
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:01ffcfafbc1c4522f01ae60dab6db775fcdcd8fec900d37f15a5127a63ce98b4
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa8b871e21aa935f37782ccd210ed25bd8f9e62b08ef3de2bd22cc4ef35b1df
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6769e966cbd01b0928c6dfa08d9183af00ab69c61a86a4a6ef846a74f2cd4f12
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd0f95a80e92c03f24f22ba71db4680031a4c2ab72b3c821c67aa452d49e368d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88eb0259375aeb4797384085a6556dffb88f3f28e3b811d250261aef798e28f2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a24b364686c3a4df0863387bf61d379d6122e40cd9c6c72454e7921c0155ca34
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.0821109265089035,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-17500",
4
- "epoch": 1.48,
5
  "eval_steps": 500,
6
- "global_step": 18500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2893,6 +2893,84 @@
2893
  "eval_samples_per_second": 22.709,
2894
  "eval_steps_per_second": 5.677,
2895
  "step": 18500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2896
  }
2897
  ],
2898
  "logging_steps": 50,
@@ -2912,7 +2990,7 @@
2912
  "attributes": {}
2913
  }
2914
  },
2915
- "total_flos": 4.506288390144e+16,
2916
  "train_batch_size": 4,
2917
  "trial_name": null,
2918
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08186879754066467,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
+ "epoch": 1.52,
5
  "eval_steps": 500,
6
+ "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2893
  "eval_samples_per_second": 22.709,
2894
  "eval_steps_per_second": 5.677,
2895
  "step": 18500
2896
+ },
2897
+ {
2898
+ "epoch": 1.484,
2899
+ "grad_norm": 0.13516181707382202,
2900
+ "learning_rate": 7.7448e-06,
2901
+ "loss": 0.0516,
2902
+ "step": 18550
2903
+ },
2904
+ {
2905
+ "epoch": 1.488,
2906
+ "grad_norm": 0.14996372163295746,
2907
+ "learning_rate": 7.6848e-06,
2908
+ "loss": 0.0545,
2909
+ "step": 18600
2910
+ },
2911
+ {
2912
+ "epoch": 1.492,
2913
+ "grad_norm": 0.11676355451345444,
2914
+ "learning_rate": 7.6248e-06,
2915
+ "loss": 0.0546,
2916
+ "step": 18650
2917
+ },
2918
+ {
2919
+ "epoch": 1.496,
2920
+ "grad_norm": 0.0986240953207016,
2921
+ "learning_rate": 7.5648e-06,
2922
+ "loss": 0.054,
2923
+ "step": 18700
2924
+ },
2925
+ {
2926
+ "epoch": 1.5,
2927
+ "grad_norm": 0.16591283679008484,
2928
+ "learning_rate": 7.5048e-06,
2929
+ "loss": 0.0554,
2930
+ "step": 18750
2931
+ },
2932
+ {
2933
+ "epoch": 1.504,
2934
+ "grad_norm": 0.13870297372341156,
2935
+ "learning_rate": 7.4448e-06,
2936
+ "loss": 0.0486,
2937
+ "step": 18800
2938
+ },
2939
+ {
2940
+ "epoch": 1.508,
2941
+ "grad_norm": 0.1334107369184494,
2942
+ "learning_rate": 7.3848e-06,
2943
+ "loss": 0.0532,
2944
+ "step": 18850
2945
+ },
2946
+ {
2947
+ "epoch": 1.512,
2948
+ "grad_norm": 0.1280679702758789,
2949
+ "learning_rate": 7.3248e-06,
2950
+ "loss": 0.0543,
2951
+ "step": 18900
2952
+ },
2953
+ {
2954
+ "epoch": 1.516,
2955
+ "grad_norm": 0.08572965115308762,
2956
+ "learning_rate": 7.2647999999999995e-06,
2957
+ "loss": 0.0516,
2958
+ "step": 18950
2959
+ },
2960
+ {
2961
+ "epoch": 1.52,
2962
+ "grad_norm": 0.10845118761062622,
2963
+ "learning_rate": 7.204800000000001e-06,
2964
+ "loss": 0.0554,
2965
+ "step": 19000
2966
+ },
2967
+ {
2968
+ "epoch": 1.52,
2969
+ "eval_loss": 0.08186879754066467,
2970
+ "eval_runtime": 88.0768,
2971
+ "eval_samples_per_second": 22.707,
2972
+ "eval_steps_per_second": 5.677,
2973
+ "step": 19000
2974
  }
2975
  ],
2976
  "logging_steps": 50,
 
2990
  "attributes": {}
2991
  }
2992
  },
2993
+ "total_flos": 4.628079968256e+16,
2994
  "train_batch_size": 4,
2995
  "trial_name": null,
2996
  "trial_params": null