besimray commited on
Commit
ea6e9c2
·
verified ·
1 Parent(s): 13f1a17

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c1c9190f08199ff0609b272bf4812d872ecf6af7ddaae2ad14d54a949f937f8
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72d67265c9a443fe69b3354dfa4ae158837e22dced7d2197ec76e9fc1416d452
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3ab265eb30355919991822e0cc6c38dc53b6b2d245118f01060a4abbff6110b
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18e24e09b1e55b470e3716dd0556376e6189a9a5ee317a63b79489e372305eff
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9d863956ee027610692953e2f8b517eb62237dedd2129c265b5b8401104a9871
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa2e51ab7a64ce18512351c5dbd00e07cf7826b590c6de414616260bdbb9965
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed29baad6d6611c736eccf18bdd7afdc5d6f3612cde61e7bfa83472d3e2068d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7736f824b0a772b3806e37af9e860068207311e879196bc19f8a76d97eaf6bce
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7697538137435913,
3
- "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
4
- "epoch": 1.0289389067524115,
5
  "eval_steps": 20,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,154 @@
607
  "eval_samples_per_second": 24.246,
608
  "eval_steps_per_second": 2.513,
609
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 1,
@@ -635,7 +783,7 @@
635
  "attributes": {}
636
  }
637
  },
638
- "total_flos": 1.911441083203584e+16,
639
  "train_batch_size": 10,
640
  "trial_name": null,
641
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7664662599563599,
3
+ "best_model_checkpoint": "miner_id_besimray/checkpoint-100",
4
+ "epoch": 1.2861736334405145,
5
  "eval_steps": 20,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 24.246,
608
  "eval_steps_per_second": 2.513,
609
  "step": 80
610
+ },
611
+ {
612
+ "epoch": 1.0418006430868167,
613
+ "grad_norm": 0.21907751262187958,
614
+ "learning_rate": 9.775619357041952e-05,
615
+ "loss": 0.6622,
616
+ "step": 81
617
+ },
618
+ {
619
+ "epoch": 1.0546623794212218,
620
+ "grad_norm": 0.2108103632926941,
621
+ "learning_rate": 9.551351696494854e-05,
622
+ "loss": 0.6832,
623
+ "step": 82
624
+ },
625
+ {
626
+ "epoch": 1.067524115755627,
627
+ "grad_norm": 0.21840904653072357,
628
+ "learning_rate": 9.327309943879604e-05,
629
+ "loss": 0.7518,
630
+ "step": 83
631
+ },
632
+ {
633
+ "epoch": 1.0803858520900322,
634
+ "grad_norm": 0.23524457216262817,
635
+ "learning_rate": 9.103606910965666e-05,
636
+ "loss": 0.7071,
637
+ "step": 84
638
+ },
639
+ {
640
+ "epoch": 1.0932475884244373,
641
+ "grad_norm": 0.2435670644044876,
642
+ "learning_rate": 8.880355238966923e-05,
643
+ "loss": 0.7268,
644
+ "step": 85
645
+ },
646
+ {
647
+ "epoch": 1.1061093247588425,
648
+ "grad_norm": 0.208921417593956,
649
+ "learning_rate": 8.657667341823448e-05,
650
+ "loss": 0.6816,
651
+ "step": 86
652
+ },
653
+ {
654
+ "epoch": 1.1189710610932475,
655
+ "grad_norm": 0.2085571438074112,
656
+ "learning_rate": 8.435655349597689e-05,
657
+ "loss": 0.6773,
658
+ "step": 87
659
+ },
660
+ {
661
+ "epoch": 1.1318327974276527,
662
+ "grad_norm": 0.2277572900056839,
663
+ "learning_rate": 8.214431052013634e-05,
664
+ "loss": 0.7188,
665
+ "step": 88
666
+ },
667
+ {
668
+ "epoch": 1.144694533762058,
669
+ "grad_norm": 0.24401521682739258,
670
+ "learning_rate": 7.994105842167273e-05,
671
+ "loss": 0.6896,
672
+ "step": 89
673
+ },
674
+ {
675
+ "epoch": 1.157556270096463,
676
+ "grad_norm": 0.23171570897102356,
677
+ "learning_rate": 7.774790660436858e-05,
678
+ "loss": 0.6567,
679
+ "step": 90
680
+ },
681
+ {
682
+ "epoch": 1.1704180064308682,
683
+ "grad_norm": 0.2330271154642105,
684
+ "learning_rate": 7.556595938621058e-05,
685
+ "loss": 0.7833,
686
+ "step": 91
687
+ },
688
+ {
689
+ "epoch": 1.1832797427652733,
690
+ "grad_norm": 0.23862923681735992,
691
+ "learning_rate": 7.339631544333249e-05,
692
+ "loss": 0.6973,
693
+ "step": 92
694
+ },
695
+ {
696
+ "epoch": 1.1961414790996785,
697
+ "grad_norm": 0.2100347876548767,
698
+ "learning_rate": 7.124006725679828e-05,
699
+ "loss": 0.6078,
700
+ "step": 93
701
+ },
702
+ {
703
+ "epoch": 1.2090032154340835,
704
+ "grad_norm": 0.21575500071048737,
705
+ "learning_rate": 6.909830056250527e-05,
706
+ "loss": 0.6561,
707
+ "step": 94
708
+ },
709
+ {
710
+ "epoch": 1.2218649517684887,
711
+ "grad_norm": 0.2418583333492279,
712
+ "learning_rate": 6.697209380448333e-05,
713
+ "loss": 0.732,
714
+ "step": 95
715
+ },
716
+ {
717
+ "epoch": 1.234726688102894,
718
+ "grad_norm": 0.22877807915210724,
719
+ "learning_rate": 6.486251759186572e-05,
720
+ "loss": 0.6474,
721
+ "step": 96
722
+ },
723
+ {
724
+ "epoch": 1.247588424437299,
725
+ "grad_norm": 0.24375984072685242,
726
+ "learning_rate": 6.277063415980549e-05,
727
+ "loss": 0.7595,
728
+ "step": 97
729
+ },
730
+ {
731
+ "epoch": 1.2604501607717042,
732
+ "grad_norm": 0.21191298961639404,
733
+ "learning_rate": 6.069749683460765e-05,
734
+ "loss": 0.6969,
735
+ "step": 98
736
+ },
737
+ {
738
+ "epoch": 1.2733118971061093,
739
+ "grad_norm": 0.2615202069282532,
740
+ "learning_rate": 5.864414950334796e-05,
741
+ "loss": 0.7311,
742
+ "step": 99
743
+ },
744
+ {
745
+ "epoch": 1.2861736334405145,
746
+ "grad_norm": 0.2501668930053711,
747
+ "learning_rate": 5.6611626088244194e-05,
748
+ "loss": 0.7258,
749
+ "step": 100
750
+ },
751
+ {
752
+ "epoch": 1.2861736334405145,
753
+ "eval_loss": 0.7664662599563599,
754
+ "eval_runtime": 7.6076,
755
+ "eval_samples_per_second": 21.558,
756
+ "eval_steps_per_second": 2.235,
757
+ "step": 100
758
  }
759
  ],
760
  "logging_steps": 1,
 
783
  "attributes": {}
784
  }
785
  },
786
+ "total_flos": 2.394546334728192e+16,
787
  "train_batch_size": 10,
788
  "trial_name": null,
789
  "trial_params": null