FredericFan commited on
Commit
af02e94
·
verified ·
1 Parent(s): 51f692b

Training in progress, step 14500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9704e41640d7704f052e32113e34a80ebd155dba2fd8f3a818c35af9ef8e5e5
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:054ab94a66b126df267b052c3963349825d38029b9947a5eeef3e088fc94d5e3
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38547c985ce9b90055e73bc70569507cc2022f06756bda43feaaa7134440ed4a
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c700b2458cc3a7d705b174e5b082b75ff9b46e4556e47eb6bc98ed85f7b5b362
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2249892101c67b7f09df7f3b33fbce8ad4fc7b712e0895251ba03419a8b657
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c9886b9061bb2e70af0da0a78b4bba065bbf4e416078705ff5fff6c95adfc84
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2dd16cd3f7a9b47079af7541224a232c825207a9e0cc8410dcba6e13de89ef34
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ce624ffb18558fd63335de21c66bfccbf585f56176b1bb9297748553d5fb95
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.0824647843837738,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
- "epoch": 1.12,
5
  "eval_steps": 500,
6
- "global_step": 14000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2191,6 +2191,84 @@
2191
  "eval_samples_per_second": 22.715,
2192
  "eval_steps_per_second": 5.679,
2193
  "step": 14000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2194
  }
2195
  ],
2196
  "logging_steps": 50,
@@ -2210,7 +2288,7 @@
2210
  "attributes": {}
2211
  }
2212
  },
2213
- "total_flos": 3.410164187136e+16,
2214
  "train_batch_size": 4,
2215
  "trial_name": null,
2216
  "trial_params": null
 
1
  {
2
  "best_metric": 0.0824647843837738,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-12500",
4
+ "epoch": 1.16,
5
  "eval_steps": 500,
6
+ "global_step": 14500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2191
  "eval_samples_per_second": 22.715,
2192
  "eval_steps_per_second": 5.679,
2193
  "step": 14000
2194
+ },
2195
+ {
2196
+ "epoch": 1.124,
2197
+ "grad_norm": 0.17533883452415466,
2198
+ "learning_rate": 1.31436e-05,
2199
+ "loss": 0.0552,
2200
+ "step": 14050
2201
+ },
2202
+ {
2203
+ "epoch": 1.1280000000000001,
2204
+ "grad_norm": 0.10817945748567581,
2205
+ "learning_rate": 1.3083600000000001e-05,
2206
+ "loss": 0.049,
2207
+ "step": 14100
2208
+ },
2209
+ {
2210
+ "epoch": 1.1320000000000001,
2211
+ "grad_norm": 0.09337913990020752,
2212
+ "learning_rate": 1.30236e-05,
2213
+ "loss": 0.0573,
2214
+ "step": 14150
2215
+ },
2216
+ {
2217
+ "epoch": 1.1360000000000001,
2218
+ "grad_norm": 0.15710942447185516,
2219
+ "learning_rate": 1.29636e-05,
2220
+ "loss": 0.0605,
2221
+ "step": 14200
2222
+ },
2223
+ {
2224
+ "epoch": 1.1400000000000001,
2225
+ "grad_norm": 0.10915792733430862,
2226
+ "learning_rate": 1.29036e-05,
2227
+ "loss": 0.0581,
2228
+ "step": 14250
2229
+ },
2230
+ {
2231
+ "epoch": 1.144,
2232
+ "grad_norm": 0.10125772655010223,
2233
+ "learning_rate": 1.28436e-05,
2234
+ "loss": 0.0599,
2235
+ "step": 14300
2236
+ },
2237
+ {
2238
+ "epoch": 1.148,
2239
+ "grad_norm": 0.0998956710100174,
2240
+ "learning_rate": 1.27836e-05,
2241
+ "loss": 0.0479,
2242
+ "step": 14350
2243
+ },
2244
+ {
2245
+ "epoch": 1.152,
2246
+ "grad_norm": 0.13762612640857697,
2247
+ "learning_rate": 1.27236e-05,
2248
+ "loss": 0.0589,
2249
+ "step": 14400
2250
+ },
2251
+ {
2252
+ "epoch": 1.156,
2253
+ "grad_norm": 0.11048023402690887,
2254
+ "learning_rate": 1.26636e-05,
2255
+ "loss": 0.0591,
2256
+ "step": 14450
2257
+ },
2258
+ {
2259
+ "epoch": 1.16,
2260
+ "grad_norm": 0.14803436398506165,
2261
+ "learning_rate": 1.26036e-05,
2262
+ "loss": 0.0553,
2263
+ "step": 14500
2264
+ },
2265
+ {
2266
+ "epoch": 1.16,
2267
+ "eval_loss": 0.08270228654146194,
2268
+ "eval_runtime": 88.0514,
2269
+ "eval_samples_per_second": 22.714,
2270
+ "eval_steps_per_second": 5.678,
2271
+ "step": 14500
2272
  }
2273
  ],
2274
  "logging_steps": 50,
 
2288
  "attributes": {}
2289
  }
2290
  },
2291
+ "total_flos": 3.531955765248e+16,
2292
  "train_batch_size": 4,
2293
  "trial_name": null,
2294
  "trial_params": null