FredericFan commited on
Commit
a0364fe
·
verified ·
1 Parent(s): 34860b6

Training in progress, step 19500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66b6ab3ff42b2cdf0354b7f70bd9a1a4076a91b639655aef18c9e1ddc6f4867c
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:505ab1c5a34c51302d4bbe4a328e757e4ed0c5b5a72411048c8f13bbf2635e0c
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fa8b871e21aa935f37782ccd210ed25bd8f9e62b08ef3de2bd22cc4ef35b1df
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7d32e738e2880d3c24e7619fc2f522beb9e9c3515dd4633aed48a29ef433cf5
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd0f95a80e92c03f24f22ba71db4680031a4c2ab72b3c821c67aa452d49e368d
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93c8742c8826e2530a7b338bfc6b583a37586dd446431d091cb023f04fe4b53a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a24b364686c3a4df0863387bf61d379d6122e40cd9c6c72454e7921c0155ca34
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:588932b38df48361d8c321bcc1bc5968dc8111dbf384cded84b91367b4837f6a
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.08186879754066467,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
- "epoch": 1.52,
5
  "eval_steps": 500,
6
- "global_step": 19000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2971,6 +2971,84 @@
2971
  "eval_samples_per_second": 22.707,
2972
  "eval_steps_per_second": 5.677,
2973
  "step": 19000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2974
  }
2975
  ],
2976
  "logging_steps": 50,
@@ -2990,7 +3068,7 @@
2990
  "attributes": {}
2991
  }
2992
  },
2993
- "total_flos": 4.628079968256e+16,
2994
  "train_batch_size": 4,
2995
  "trial_name": null,
2996
  "trial_params": null
 
1
  {
2
  "best_metric": 0.08186879754066467,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-19000",
4
+ "epoch": 1.56,
5
  "eval_steps": 500,
6
+ "global_step": 19500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2971
  "eval_samples_per_second": 22.707,
2972
  "eval_steps_per_second": 5.677,
2973
  "step": 19000
2974
+ },
2975
+ {
2976
+ "epoch": 1.524,
2977
+ "grad_norm": 0.09976433962583542,
2978
+ "learning_rate": 7.144800000000001e-06,
2979
+ "loss": 0.0496,
2980
+ "step": 19050
2981
+ },
2982
+ {
2983
+ "epoch": 1.528,
2984
+ "grad_norm": 0.19336478412151337,
2985
+ "learning_rate": 7.0848e-06,
2986
+ "loss": 0.0528,
2987
+ "step": 19100
2988
+ },
2989
+ {
2990
+ "epoch": 1.532,
2991
+ "grad_norm": 0.13555702567100525,
2992
+ "learning_rate": 7.0248e-06,
2993
+ "loss": 0.0559,
2994
+ "step": 19150
2995
+ },
2996
+ {
2997
+ "epoch": 1.536,
2998
+ "grad_norm": 0.2016674280166626,
2999
+ "learning_rate": 6.964800000000001e-06,
3000
+ "loss": 0.0568,
3001
+ "step": 19200
3002
+ },
3003
+ {
3004
+ "epoch": 1.54,
3005
+ "grad_norm": 0.8713797330856323,
3006
+ "learning_rate": 6.9048e-06,
3007
+ "loss": 0.0522,
3008
+ "step": 19250
3009
+ },
3010
+ {
3011
+ "epoch": 1.544,
3012
+ "grad_norm": 0.13156233727931976,
3013
+ "learning_rate": 6.8448e-06,
3014
+ "loss": 0.0557,
3015
+ "step": 19300
3016
+ },
3017
+ {
3018
+ "epoch": 1.548,
3019
+ "grad_norm": 0.1713368147611618,
3020
+ "learning_rate": 6.7848e-06,
3021
+ "loss": 0.0574,
3022
+ "step": 19350
3023
+ },
3024
+ {
3025
+ "epoch": 1.552,
3026
+ "grad_norm": 0.13423492014408112,
3027
+ "learning_rate": 6.7248e-06,
3028
+ "loss": 0.0494,
3029
+ "step": 19400
3030
+ },
3031
+ {
3032
+ "epoch": 1.556,
3033
+ "grad_norm": 0.1513233631849289,
3034
+ "learning_rate": 6.6648e-06,
3035
+ "loss": 0.0528,
3036
+ "step": 19450
3037
+ },
3038
+ {
3039
+ "epoch": 1.56,
3040
+ "grad_norm": 0.1609751433134079,
3041
+ "learning_rate": 6.606000000000001e-06,
3042
+ "loss": 0.0512,
3043
+ "step": 19500
3044
+ },
3045
+ {
3046
+ "epoch": 1.56,
3047
+ "eval_loss": 0.08218736946582794,
3048
+ "eval_runtime": 88.094,
3049
+ "eval_samples_per_second": 22.703,
3050
+ "eval_steps_per_second": 5.676,
3051
+ "step": 19500
3052
  }
3053
  ],
3054
  "logging_steps": 50,
 
3068
  "attributes": {}
3069
  }
3070
  },
3071
+ "total_flos": 4.749871546368e+16,
3072
  "train_batch_size": 4,
3073
  "trial_name": null,
3074
  "trial_params": null