irishprancer commited on
Commit
ae08019
·
verified ·
1 Parent(s): 998b0d3

Training in progress, step 450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d4578ea45525b754cb730822b4b5e19895f177b38e0a8204f0f0d2d34a3d2c2
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35820b18cb2d1840f01298abaa2a6ee2d1da18435d42aa5d26681751a1378e1c
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c252f56184e8af807bdd63c3b61b4ad344e01d2c51768cbb39510437a12c6e36
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e724a055f269185a0e46082ca9a545eaf1f95d0e10536516c1a0c1dc261b97
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:037d13720220086c05f76f1146cd4356e8b9d075b5d306338df00d366045e1c1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f76c4c2dca19bfb0a463693a0e409b44510488650d816e566bccd2a2851e9524
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f4c00d522bdde510099aafe1617b13d114dce17a17b44e05876f016f4e4d7af
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da0b9a1e2fa3da24e9af8b74787d70ff4c95d9bc9b74eeab135df0350a00b462
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7313653230667114,
3
- "best_model_checkpoint": "./output/checkpoint-300",
4
- "epoch": 13.043478260869565,
5
  "eval_steps": 150,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -297,6 +297,151 @@
297
  "eval_samples_per_second": 24.847,
298
  "eval_steps_per_second": 24.847,
299
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
300
  }
301
  ],
302
  "logging_steps": 10,
@@ -316,7 +461,7 @@
316
  "attributes": {}
317
  }
318
  },
319
- "total_flos": 7724643094462464.0,
320
  "train_batch_size": 4,
321
  "trial_name": null,
322
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7164942026138306,
3
+ "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 19.565217391304348,
5
  "eval_steps": 150,
6
+ "global_step": 450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
297
  "eval_samples_per_second": 24.847,
298
  "eval_steps_per_second": 24.847,
299
  "step": 300
300
+ },
301
+ {
302
+ "epoch": 13.478260869565217,
303
+ "grad_norm": 1.7789413928985596,
304
+ "learning_rate": 2.9998689031728636e-05,
305
+ "loss": 0.5145,
306
+ "step": 310
307
+ },
308
+ {
309
+ "epoch": 13.91304347826087,
310
+ "grad_norm": 1.532348394393921,
311
+ "learning_rate": 2.9998561206933938e-05,
312
+ "loss": 0.6494,
313
+ "step": 320
314
+ },
315
+ {
316
+ "epoch": 14.347826086956522,
317
+ "grad_norm": 1.4854978322982788,
318
+ "learning_rate": 2.9998427437188786e-05,
319
+ "loss": 0.5741,
320
+ "step": 330
321
+ },
322
+ {
323
+ "epoch": 14.782608695652174,
324
+ "grad_norm": 1.3134292364120483,
325
+ "learning_rate": 2.99982877225462e-05,
326
+ "loss": 0.6014,
327
+ "step": 340
328
+ },
329
+ {
330
+ "epoch": 15.217391304347826,
331
+ "grad_norm": 0.9749585390090942,
332
+ "learning_rate": 2.9998142063061564e-05,
333
+ "loss": 0.4988,
334
+ "step": 350
335
+ },
336
+ {
337
+ "epoch": 15.652173913043478,
338
+ "grad_norm": 1.6409484148025513,
339
+ "learning_rate": 2.9997990458792603e-05,
340
+ "loss": 0.5625,
341
+ "step": 360
342
+ },
343
+ {
344
+ "epoch": 16.08695652173913,
345
+ "grad_norm": 1.6361374855041504,
346
+ "learning_rate": 2.9997832909799417e-05,
347
+ "loss": 0.6672,
348
+ "step": 370
349
+ },
350
+ {
351
+ "epoch": 16.52173913043478,
352
+ "grad_norm": 0.9572365880012512,
353
+ "learning_rate": 2.9997669416144452e-05,
354
+ "loss": 0.5129,
355
+ "step": 380
356
+ },
357
+ {
358
+ "epoch": 16.956521739130434,
359
+ "grad_norm": 0.9357342720031738,
360
+ "learning_rate": 2.999749997789251e-05,
361
+ "loss": 0.5798,
362
+ "step": 390
363
+ },
364
+ {
365
+ "epoch": 17.391304347826086,
366
+ "grad_norm": 1.1117758750915527,
367
+ "learning_rate": 2.9997324595110743e-05,
368
+ "loss": 0.5179,
369
+ "step": 400
370
+ },
371
+ {
372
+ "epoch": 17.82608695652174,
373
+ "grad_norm": 1.2845892906188965,
374
+ "learning_rate": 2.9997143267868683e-05,
375
+ "loss": 0.5874,
376
+ "step": 410
377
+ },
378
+ {
379
+ "epoch": 18.26086956521739,
380
+ "grad_norm": 1.1638511419296265,
381
+ "learning_rate": 2.9996955996238192e-05,
382
+ "loss": 0.506,
383
+ "step": 420
384
+ },
385
+ {
386
+ "epoch": 18.695652173913043,
387
+ "grad_norm": 1.2020405530929565,
388
+ "learning_rate": 2.9996762780293503e-05,
389
+ "loss": 0.5316,
390
+ "step": 430
391
+ },
392
+ {
393
+ "epoch": 19.130434782608695,
394
+ "grad_norm": 1.2133735418319702,
395
+ "learning_rate": 2.9996563620111197e-05,
396
+ "loss": 0.5338,
397
+ "step": 440
398
+ },
399
+ {
400
+ "epoch": 19.565217391304348,
401
+ "grad_norm": 1.4260774850845337,
402
+ "learning_rate": 2.9996358515770218e-05,
403
+ "loss": 0.5681,
404
+ "step": 450
405
+ },
406
+ {
407
+ "epoch": 19.565217391304348,
408
+ "eval_loss": 0.7164942026138306,
409
+ "eval_runtime": 0.4121,
410
+ "eval_samples_per_second": 24.267,
411
+ "eval_steps_per_second": 24.267,
412
+ "step": 450
413
+ },
414
+ {
415
+ "epoch": 19.565217391304348,
416
+ "eval_loss": 0.8609360456466675,
417
+ "eval_runtime": 0.4438,
418
+ "eval_samples_per_second": 22.531,
419
+ "eval_steps_per_second": 22.531,
420
+ "step": 450
421
+ },
422
+ {
423
+ "epoch": 19.565217391304348,
424
+ "eval_loss": 0.7164942026138306,
425
+ "eval_runtime": 0.4426,
426
+ "eval_samples_per_second": 22.593,
427
+ "eval_steps_per_second": 22.593,
428
+ "step": 450
429
+ },
430
+ {
431
+ "epoch": 19.565217391304348,
432
+ "eval_loss": 0.7564777731895447,
433
+ "eval_runtime": 0.4418,
434
+ "eval_samples_per_second": 22.634,
435
+ "eval_steps_per_second": 22.634,
436
+ "step": 450
437
+ },
438
+ {
439
+ "epoch": 19.565217391304348,
440
+ "eval_loss": 0.8607853055000305,
441
+ "eval_runtime": 0.4381,
442
+ "eval_samples_per_second": 22.828,
443
+ "eval_steps_per_second": 22.828,
444
+ "step": 450
445
  }
446
  ],
447
  "logging_steps": 10,
 
461
  "attributes": {}
462
  }
463
  },
464
+ "total_flos": 1.1591800814075904e+16,
465
  "train_batch_size": 4,
466
  "trial_name": null,
467
  "trial_params": null