baby-dev commited on
Commit
942f0b3
·
verified ·
1 Parent(s): aac7313

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7627bee5f59d424331a7a8c8fa59f2519833f6c9a0b76bf47db086e2d057ba3e
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:468b063969066b67543986fcdfd9612e1cf13bd6f9ed9c8cb089fa8deef7fbbd
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a277a9128921e08f49f245502ecfafe5e811a6fdcec51f4c80f5c64f8c88065c
3
  size 36135892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802bb23f2e39ef6455fe99fb42b6fc5e3f5b93e4b39a391d6ac0d7800d92e4dd
3
  size 36135892
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8276354487b9292066c9bf7fb070ef98904061db81bdcaf8141dd5e722f15d1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:039c1d81fb146d103517ea6bea4a7a40c383f02afe64a858fb2196fad801a152
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d842ae89b9008565268495397aa2a41cfc409fd6aefac567cbdd9ac926ecf81
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d0ad751608bb1f65d75e4c79bf22f0a1cfe737e1d81d092b1221458be1c8fb4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.1694796085357666,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-60",
4
- "epoch": 0.025388765471278957,
5
  "eval_steps": 20,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -459,6 +459,154 @@
459
  "eval_samples_per_second": 27.672,
460
  "eval_steps_per_second": 6.925,
461
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
462
  }
463
  ],
464
  "logging_steps": 1,
@@ -482,12 +630,12 @@
482
  "should_evaluate": false,
483
  "should_log": false,
484
  "should_save": true,
485
- "should_training_stop": false
486
  },
487
  "attributes": {}
488
  }
489
  },
490
- "total_flos": 1107378422415360.0,
491
  "train_batch_size": 4,
492
  "trial_name": null,
493
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.1631815433502197,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.03385168729503861,
5
  "eval_steps": 20,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
459
  "eval_samples_per_second": 27.672,
460
  "eval_steps_per_second": 6.925,
461
  "step": 60
462
+ },
463
+ {
464
+ "epoch": 0.02581191156246694,
465
+ "grad_norm": 1.1517850160598755,
466
+ "learning_rate": 3.4206127406028745e-05,
467
+ "loss": 2.2314,
468
+ "step": 61
469
+ },
470
+ {
471
+ "epoch": 0.026235057653654924,
472
+ "grad_norm": 1.0954445600509644,
473
+ "learning_rate": 3.089373510131354e-05,
474
+ "loss": 2.1453,
475
+ "step": 62
476
+ },
477
+ {
478
+ "epoch": 0.026658203744842908,
479
+ "grad_norm": 1.129459261894226,
480
+ "learning_rate": 2.7720513617260856e-05,
481
+ "loss": 2.1496,
482
+ "step": 63
483
+ },
484
+ {
485
+ "epoch": 0.027081349836030888,
486
+ "grad_norm": 1.2924052476882935,
487
+ "learning_rate": 2.4692853399638917e-05,
488
+ "loss": 2.1504,
489
+ "step": 64
490
+ },
491
+ {
492
+ "epoch": 0.02750449592721887,
493
+ "grad_norm": 1.1182539463043213,
494
+ "learning_rate": 2.181685175319702e-05,
495
+ "loss": 2.0713,
496
+ "step": 65
497
+ },
498
+ {
499
+ "epoch": 0.027927642018406855,
500
+ "grad_norm": 1.2294206619262695,
501
+ "learning_rate": 1.9098300562505266e-05,
502
+ "loss": 2.1622,
503
+ "step": 66
504
+ },
505
+ {
506
+ "epoch": 0.02835078810959484,
507
+ "grad_norm": 1.191117763519287,
508
+ "learning_rate": 1.6542674627869737e-05,
509
+ "loss": 2.208,
510
+ "step": 67
511
+ },
512
+ {
513
+ "epoch": 0.028773934200782822,
514
+ "grad_norm": 1.1100726127624512,
515
+ "learning_rate": 1.415512063981339e-05,
516
+ "loss": 2.1405,
517
+ "step": 68
518
+ },
519
+ {
520
+ "epoch": 0.029197080291970802,
521
+ "grad_norm": 1.0611038208007812,
522
+ "learning_rate": 1.19404468143262e-05,
523
+ "loss": 2.1244,
524
+ "step": 69
525
+ },
526
+ {
527
+ "epoch": 0.029620226383158785,
528
+ "grad_norm": 1.193991780281067,
529
+ "learning_rate": 9.903113209758096e-06,
530
+ "loss": 2.2719,
531
+ "step": 70
532
+ },
533
+ {
534
+ "epoch": 0.03004337247434677,
535
+ "grad_norm": 1.1647319793701172,
536
+ "learning_rate": 8.047222744854943e-06,
537
+ "loss": 2.1554,
538
+ "step": 71
539
+ },
540
+ {
541
+ "epoch": 0.030466518565534752,
542
+ "grad_norm": 1.1668955087661743,
543
+ "learning_rate": 6.37651293602628e-06,
544
+ "loss": 2.1259,
545
+ "step": 72
546
+ },
547
+ {
548
+ "epoch": 0.030889664656722732,
549
+ "grad_norm": 1.1693888902664185,
550
+ "learning_rate": 4.8943483704846475e-06,
551
+ "loss": 2.2087,
552
+ "step": 73
553
+ },
554
+ {
555
+ "epoch": 0.031312810747910716,
556
+ "grad_norm": 1.1683961153030396,
557
+ "learning_rate": 3.6037139304146762e-06,
558
+ "loss": 2.0883,
559
+ "step": 74
560
+ },
561
+ {
562
+ "epoch": 0.031735956839098696,
563
+ "grad_norm": 1.1831532716751099,
564
+ "learning_rate": 2.5072087818176382e-06,
565
+ "loss": 2.1605,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 0.03215910293028668,
570
+ "grad_norm": 1.237900972366333,
571
+ "learning_rate": 1.6070411401370334e-06,
572
+ "loss": 2.1349,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 0.03258224902147466,
577
+ "grad_norm": 1.3579156398773193,
578
+ "learning_rate": 9.0502382320653e-07,
579
+ "loss": 2.166,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 0.03300539511266265,
584
+ "grad_norm": 1.1819465160369873,
585
+ "learning_rate": 4.025706004760932e-07,
586
+ "loss": 2.2146,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 0.03342854120385063,
591
+ "grad_norm": 1.3723968267440796,
592
+ "learning_rate": 1.0069334586854107e-07,
593
+ "loss": 2.2131,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 0.03385168729503861,
598
+ "grad_norm": 1.183445930480957,
599
+ "learning_rate": 0.0,
600
+ "loss": 2.266,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 0.03385168729503861,
605
+ "eval_loss": 2.1631815433502197,
606
+ "eval_runtime": 63.4411,
607
+ "eval_samples_per_second": 31.368,
608
+ "eval_steps_per_second": 7.85,
609
+ "step": 80
610
  }
611
  ],
612
  "logging_steps": 1,
 
630
  "should_evaluate": false,
631
  "should_log": false,
632
  "should_save": true,
633
+ "should_training_stop": true
634
  },
635
  "attributes": {}
636
  }
637
  },
638
+ "total_flos": 1476504563220480.0,
639
  "train_batch_size": 4,
640
  "trial_name": null,
641
  "trial_params": null