besimray commited on
Commit
7e762bc
·
verified ·
1 Parent(s): 30c6e06

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9580f23f6650701b8c5c6baa7b6251793876fc45c8700caf4e48d91b6ad89806
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96ccc5b9ac087dba0f10fdc4d5342763320b3d393d47476a061e07710ee9b638
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5a855a05342866a4aba16944f07a8978153f6a90eff7647124c560db0db5cc6
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e67d1ef70988eb681b3fa8d5f97f7cf8dd017881f8271c13d1e14c3a983051
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c899beef0e4b648cad8954a66a8d655d219bebd0e6f90f7ad69eea43e18a96c4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0432b165814871079f9ee89b9acfe77d274309a01b9cb7e1f0b9e05d72efa489
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed29baad6d6611c736eccf18bdd7afdc5d6f3612cde61e7bfa83472d3e2068d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7736f824b0a772b3806e37af9e860068207311e879196bc19f8a76d97eaf6bce
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.10349678248167038,
3
- "best_model_checkpoint": "miner_id_besimray/checkpoint-80",
4
- "epoch": 1.0289389067524115,
5
  "eval_steps": 20,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,154 @@
607
  "eval_samples_per_second": 24.005,
608
  "eval_steps_per_second": 2.488,
609
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 1,
@@ -635,7 +783,7 @@
635
  "attributes": {}
636
  }
637
  },
638
- "total_flos": 1.911441083203584e+16,
639
  "train_batch_size": 10,
640
  "trial_name": null,
641
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.09471629559993744,
3
+ "best_model_checkpoint": "miner_id_besimray/checkpoint-100",
4
+ "epoch": 1.2861736334405145,
5
  "eval_steps": 20,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 24.005,
608
  "eval_steps_per_second": 2.488,
609
  "step": 80
610
+ },
611
+ {
612
+ "epoch": 1.0418006430868167,
613
+ "grad_norm": 1.8745784759521484,
614
+ "learning_rate": 9.775619357041952e-05,
615
+ "loss": 0.1944,
616
+ "step": 81
617
+ },
618
+ {
619
+ "epoch": 1.0546623794212218,
620
+ "grad_norm": 2.0719668865203857,
621
+ "learning_rate": 9.551351696494854e-05,
622
+ "loss": 0.1917,
623
+ "step": 82
624
+ },
625
+ {
626
+ "epoch": 1.067524115755627,
627
+ "grad_norm": 1.204854965209961,
628
+ "learning_rate": 9.327309943879604e-05,
629
+ "loss": 0.0717,
630
+ "step": 83
631
+ },
632
+ {
633
+ "epoch": 1.0803858520900322,
634
+ "grad_norm": 0.9742199182510376,
635
+ "learning_rate": 9.103606910965666e-05,
636
+ "loss": 0.0752,
637
+ "step": 84
638
+ },
639
+ {
640
+ "epoch": 1.0932475884244373,
641
+ "grad_norm": 1.1170694828033447,
642
+ "learning_rate": 8.880355238966923e-05,
643
+ "loss": 0.0936,
644
+ "step": 85
645
+ },
646
+ {
647
+ "epoch": 1.1061093247588425,
648
+ "grad_norm": 1.186493158340454,
649
+ "learning_rate": 8.657667341823448e-05,
650
+ "loss": 0.1443,
651
+ "step": 86
652
+ },
653
+ {
654
+ "epoch": 1.1189710610932475,
655
+ "grad_norm": 0.8638191223144531,
656
+ "learning_rate": 8.435655349597689e-05,
657
+ "loss": 0.0455,
658
+ "step": 87
659
+ },
660
+ {
661
+ "epoch": 1.1318327974276527,
662
+ "grad_norm": 0.8235441446304321,
663
+ "learning_rate": 8.214431052013634e-05,
664
+ "loss": 0.0438,
665
+ "step": 88
666
+ },
667
+ {
668
+ "epoch": 1.144694533762058,
669
+ "grad_norm": 1.0054430961608887,
670
+ "learning_rate": 7.994105842167273e-05,
671
+ "loss": 0.0574,
672
+ "step": 89
673
+ },
674
+ {
675
+ "epoch": 1.157556270096463,
676
+ "grad_norm": 1.3182615041732788,
677
+ "learning_rate": 7.774790660436858e-05,
678
+ "loss": 0.0656,
679
+ "step": 90
680
+ },
681
+ {
682
+ "epoch": 1.1704180064308682,
683
+ "grad_norm": 1.3295056819915771,
684
+ "learning_rate": 7.556595938621058e-05,
685
+ "loss": 0.0805,
686
+ "step": 91
687
+ },
688
+ {
689
+ "epoch": 1.1832797427652733,
690
+ "grad_norm": 0.636367917060852,
691
+ "learning_rate": 7.339631544333249e-05,
692
+ "loss": 0.0327,
693
+ "step": 92
694
+ },
695
+ {
696
+ "epoch": 1.1961414790996785,
697
+ "grad_norm": 0.9409329891204834,
698
+ "learning_rate": 7.124006725679828e-05,
699
+ "loss": 0.039,
700
+ "step": 93
701
+ },
702
+ {
703
+ "epoch": 1.2090032154340835,
704
+ "grad_norm": 1.8021913766860962,
705
+ "learning_rate": 6.909830056250527e-05,
706
+ "loss": 0.1653,
707
+ "step": 94
708
+ },
709
+ {
710
+ "epoch": 1.2218649517684887,
711
+ "grad_norm": 1.4615201950073242,
712
+ "learning_rate": 6.697209380448333e-05,
713
+ "loss": 0.0553,
714
+ "step": 95
715
+ },
716
+ {
717
+ "epoch": 1.234726688102894,
718
+ "grad_norm": 1.356886386871338,
719
+ "learning_rate": 6.486251759186572e-05,
720
+ "loss": 0.0814,
721
+ "step": 96
722
+ },
723
+ {
724
+ "epoch": 1.247588424437299,
725
+ "grad_norm": 2.497354745864868,
726
+ "learning_rate": 6.277063415980549e-05,
727
+ "loss": 0.0964,
728
+ "step": 97
729
+ },
730
+ {
731
+ "epoch": 1.2604501607717042,
732
+ "grad_norm": 1.3293228149414062,
733
+ "learning_rate": 6.069749683460765e-05,
734
+ "loss": 0.0668,
735
+ "step": 98
736
+ },
737
+ {
738
+ "epoch": 1.2733118971061093,
739
+ "grad_norm": 1.5753332376480103,
740
+ "learning_rate": 5.864414950334796e-05,
741
+ "loss": 0.0494,
742
+ "step": 99
743
+ },
744
+ {
745
+ "epoch": 1.2861736334405145,
746
+ "grad_norm": 0.9486237168312073,
747
+ "learning_rate": 5.6611626088244194e-05,
748
+ "loss": 0.0493,
749
+ "step": 100
750
+ },
751
+ {
752
+ "epoch": 1.2861736334405145,
753
+ "eval_loss": 0.09471629559993744,
754
+ "eval_runtime": 6.8732,
755
+ "eval_samples_per_second": 23.861,
756
+ "eval_steps_per_second": 2.473,
757
+ "step": 100
758
  }
759
  ],
760
  "logging_steps": 1,
 
783
  "attributes": {}
784
  }
785
  },
786
+ "total_flos": 2.394546334728192e+16,
787
  "train_batch_size": 10,
788
  "trial_name": null,
789
  "trial_params": null