baby-dev commited on
Commit
a7abe71
·
verified ·
1 Parent(s): f7edc75

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e28a0969e0d12e5c82f2f9386cc5bfa4aadffff4210ed049daa68d9f0f9b6eb9
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7627bee5f59d424331a7a8c8fa59f2519833f6c9a0b76bf47db086e2d057ba3e
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a840335b9b9afe60a3604be7778ac7251bed11b91667e56629bded1cb7947850
3
  size 36135892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a277a9128921e08f49f245502ecfafe5e811a6fdcec51f4c80f5c64f8c88065c
3
  size 36135892
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e01dfd723622e9677c7b52a2436bae7b33f9b2cab0de4c18546921773d165195
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8276354487b9292066c9bf7fb070ef98904061db81bdcaf8141dd5e722f15d1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:504bbfdc996899ae0fed191469707d16864e324e1af3e19030b955ead524b577
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d842ae89b9008565268495397aa2a41cfc409fd6aefac567cbdd9ac926ecf81
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.213444709777832,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-40",
4
- "epoch": 0.016925843647519305,
5
  "eval_steps": 20,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -311,6 +311,154 @@
311
  "eval_samples_per_second": 26.138,
312
  "eval_steps_per_second": 6.541,
313
  "step": 40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
  }
315
  ],
316
  "logging_steps": 1,
@@ -339,7 +487,7 @@
339
  "attributes": {}
340
  }
341
  },
342
- "total_flos": 738252281610240.0,
343
  "train_batch_size": 4,
344
  "trial_name": null,
345
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.1694796085357666,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-60",
4
+ "epoch": 0.025388765471278957,
5
  "eval_steps": 20,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
311
  "eval_samples_per_second": 26.138,
312
  "eval_steps_per_second": 6.541,
313
  "step": 40
314
+ },
315
+ {
316
+ "epoch": 0.01734898973870729,
317
+ "grad_norm": 1.311227798461914,
318
+ "learning_rate": 0.00011785568947986367,
319
+ "loss": 2.2835,
320
+ "step": 41
321
+ },
322
+ {
323
+ "epoch": 0.017772135829895272,
324
+ "grad_norm": 5.271040916442871,
325
+ "learning_rate": 0.00011342332658176555,
326
+ "loss": 2.3215,
327
+ "step": 42
328
+ },
329
+ {
330
+ "epoch": 0.018195281921083255,
331
+ "grad_norm": 1.3460105657577515,
332
+ "learning_rate": 0.00010896393089034336,
333
+ "loss": 2.1808,
334
+ "step": 43
335
+ },
336
+ {
337
+ "epoch": 0.018618428012271235,
338
+ "grad_norm": 1.4446858167648315,
339
+ "learning_rate": 0.00010448648303505151,
340
+ "loss": 2.2533,
341
+ "step": 44
342
+ },
343
+ {
344
+ "epoch": 0.01904157410345922,
345
+ "grad_norm": 1.5081716775894165,
346
+ "learning_rate": 0.0001,
347
+ "loss": 2.1724,
348
+ "step": 45
349
+ },
350
+ {
351
+ "epoch": 0.019464720194647202,
352
+ "grad_norm": 1.2462027072906494,
353
+ "learning_rate": 9.551351696494854e-05,
354
+ "loss": 2.1896,
355
+ "step": 46
356
+ },
357
+ {
358
+ "epoch": 0.019887866285835186,
359
+ "grad_norm": 1.4999518394470215,
360
+ "learning_rate": 9.103606910965666e-05,
361
+ "loss": 2.3309,
362
+ "step": 47
363
+ },
364
+ {
365
+ "epoch": 0.020311012377023166,
366
+ "grad_norm": 1.250173807144165,
367
+ "learning_rate": 8.657667341823448e-05,
368
+ "loss": 2.1242,
369
+ "step": 48
370
+ },
371
+ {
372
+ "epoch": 0.02073415846821115,
373
+ "grad_norm": 5.076053142547607,
374
+ "learning_rate": 8.214431052013634e-05,
375
+ "loss": 2.3353,
376
+ "step": 49
377
+ },
378
+ {
379
+ "epoch": 0.021157304559399133,
380
+ "grad_norm": 1.4977030754089355,
381
+ "learning_rate": 7.774790660436858e-05,
382
+ "loss": 2.3957,
383
+ "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.021580450650587116,
387
+ "grad_norm": 1.3641694784164429,
388
+ "learning_rate": 7.339631544333249e-05,
389
+ "loss": 2.1407,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.022003596741775096,
394
+ "grad_norm": 1.596494197845459,
395
+ "learning_rate": 6.909830056250527e-05,
396
+ "loss": 2.1233,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.02242674283296308,
401
+ "grad_norm": 1.3014681339263916,
402
+ "learning_rate": 6.486251759186572e-05,
403
+ "loss": 2.0812,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.022849888924151063,
408
+ "grad_norm": 1.2006770372390747,
409
+ "learning_rate": 6.069749683460765e-05,
410
+ "loss": 2.171,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.023273035015339047,
415
+ "grad_norm": 1.1349421739578247,
416
+ "learning_rate": 5.6611626088244194e-05,
417
+ "loss": 2.0624,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.023696181106527027,
422
+ "grad_norm": 1.238204002380371,
423
+ "learning_rate": 5.261313375270014e-05,
424
+ "loss": 2.2335,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.02411932719771501,
429
+ "grad_norm": 1.2505645751953125,
430
+ "learning_rate": 4.87100722594094e-05,
431
+ "loss": 2.2071,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.024542473288902994,
436
+ "grad_norm": 1.2006800174713135,
437
+ "learning_rate": 4.491030185478976e-05,
438
+ "loss": 2.0714,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 0.024965619380090977,
443
+ "grad_norm": 1.3659069538116455,
444
+ "learning_rate": 4.12214747707527e-05,
445
+ "loss": 2.1511,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 0.025388765471278957,
450
+ "grad_norm": 1.116264820098877,
451
+ "learning_rate": 3.7651019814126654e-05,
452
+ "loss": 2.1554,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 0.025388765471278957,
457
+ "eval_loss": 2.1694796085357666,
458
+ "eval_runtime": 71.9131,
459
+ "eval_samples_per_second": 27.672,
460
+ "eval_steps_per_second": 6.925,
461
+ "step": 60
462
  }
463
  ],
464
  "logging_steps": 1,
 
487
  "attributes": {}
488
  }
489
  },
490
+ "total_flos": 1107378422415360.0,
491
  "train_batch_size": 4,
492
  "trial_name": null,
493
  "trial_params": null