Rakhman16 commited on
Commit
8d964cf
·
verified ·
1 Parent(s): 17536c7

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6332954eced366f37dfa0feeafda882ef0b7137fc497129122aa90280c207e12
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6db9a1befbdf96a10eb4549f5c11bb62b71c871fc1449a4ceb1d04e854c30a
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c451d16ef10ec74b175dfb29380c22e605ba1df9ea11b88d2ccb1b29333371d
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c66c42c906aa76007f6b840d28d3671397c29fafb8ec2a00a943aefb2881f063
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:934ae21caf3ad57aad1e085732f92e778c42e733d2e77db72cee584a6cee29f1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf8fd01cb1f6f9e37877eae1b37636fde2b75f869af10bcba05747b44361cb5
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bd00e418debe68a52c354ffbeb17299ae0b053a4079b4b4c3f7c38706aea24d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d701c4e4bcddd7438a47fc5bf2bf52cf308baa5fb5488432aa3ee78319007661
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.2125701606273651,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-1000",
4
- "epoch": 0.7024938531787847,
5
  "eval_steps": 100,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -227,6 +227,116 @@
227
  "eval_samples_per_second": 66.047,
228
  "eval_steps_per_second": 2.073,
229
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "logging_steps": 50,
@@ -246,7 +356,7 @@
246
  "attributes": {}
247
  }
248
  },
249
- "total_flos": 9743326248960000.0,
250
  "train_batch_size": 32,
251
  "trial_name": null,
252
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.20940540730953217,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-1500",
4
+ "epoch": 1.053740779768177,
5
  "eval_steps": 100,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
227
  "eval_samples_per_second": 66.047,
228
  "eval_steps_per_second": 2.073,
229
  "step": 1000
230
+ },
231
+ {
232
+ "epoch": 0.7376185458377239,
233
+ "grad_norm": 26256.35546875,
234
+ "learning_rate": 2.4465917076598737e-05,
235
+ "loss": 0.2224,
236
+ "step": 1050
237
+ },
238
+ {
239
+ "epoch": 0.7727432384966632,
240
+ "grad_norm": 29107.78515625,
241
+ "learning_rate": 2.420238931834153e-05,
242
+ "loss": 0.2211,
243
+ "step": 1100
244
+ },
245
+ {
246
+ "epoch": 0.7727432384966632,
247
+ "eval_loss": 0.2117428034543991,
248
+ "eval_runtime": 67.5369,
249
+ "eval_samples_per_second": 66.038,
250
+ "eval_steps_per_second": 2.073,
251
+ "step": 1100
252
+ },
253
+ {
254
+ "epoch": 0.8078679311556024,
255
+ "grad_norm": 98354.15625,
256
+ "learning_rate": 2.393886156008433e-05,
257
+ "loss": 0.215,
258
+ "step": 1150
259
+ },
260
+ {
261
+ "epoch": 0.8429926238145417,
262
+ "grad_norm": 22886.3984375,
263
+ "learning_rate": 2.3675333801827128e-05,
264
+ "loss": 0.2229,
265
+ "step": 1200
266
+ },
267
+ {
268
+ "epoch": 0.8429926238145417,
269
+ "eval_loss": 0.2107735425233841,
270
+ "eval_runtime": 67.6295,
271
+ "eval_samples_per_second": 65.948,
272
+ "eval_steps_per_second": 2.07,
273
+ "step": 1200
274
+ },
275
+ {
276
+ "epoch": 0.8781173164734809,
277
+ "grad_norm": 20510.26171875,
278
+ "learning_rate": 2.3411806043569923e-05,
279
+ "loss": 0.2105,
280
+ "step": 1250
281
+ },
282
+ {
283
+ "epoch": 0.91324200913242,
284
+ "grad_norm": 20053.85546875,
285
+ "learning_rate": 2.314827828531272e-05,
286
+ "loss": 0.2195,
287
+ "step": 1300
288
+ },
289
+ {
290
+ "epoch": 0.91324200913242,
291
+ "eval_loss": 0.20966531336307526,
292
+ "eval_runtime": 67.6112,
293
+ "eval_samples_per_second": 65.965,
294
+ "eval_steps_per_second": 2.071,
295
+ "step": 1300
296
+ },
297
+ {
298
+ "epoch": 0.9483667017913593,
299
+ "grad_norm": 28154.595703125,
300
+ "learning_rate": 2.2884750527055516e-05,
301
+ "loss": 0.2215,
302
+ "step": 1350
303
+ },
304
+ {
305
+ "epoch": 0.9834913944502985,
306
+ "grad_norm": 28011.71484375,
307
+ "learning_rate": 2.2621222768798314e-05,
308
+ "loss": 0.2172,
309
+ "step": 1400
310
+ },
311
+ {
312
+ "epoch": 0.9834913944502985,
313
+ "eval_loss": 0.20960816740989685,
314
+ "eval_runtime": 67.6089,
315
+ "eval_samples_per_second": 65.968,
316
+ "eval_steps_per_second": 2.071,
317
+ "step": 1400
318
+ },
319
+ {
320
+ "epoch": 1.0186160871092378,
321
+ "grad_norm": 26518.01171875,
322
+ "learning_rate": 2.2357695010541112e-05,
323
+ "loss": 0.21,
324
+ "step": 1450
325
+ },
326
+ {
327
+ "epoch": 1.053740779768177,
328
+ "grad_norm": 20411.26171875,
329
+ "learning_rate": 2.2094167252283907e-05,
330
+ "loss": 0.2139,
331
+ "step": 1500
332
+ },
333
+ {
334
+ "epoch": 1.053740779768177,
335
+ "eval_loss": 0.20940540730953217,
336
+ "eval_runtime": 67.4684,
337
+ "eval_samples_per_second": 66.105,
338
+ "eval_steps_per_second": 2.075,
339
+ "step": 1500
340
  }
341
  ],
342
  "logging_steps": 50,
 
356
  "attributes": {}
357
  }
358
  },
359
+ "total_flos": 1.46127057813504e+16,
360
  "train_batch_size": 32,
361
  "trial_name": null,
362
  "trial_params": null