besimray commited on
Commit
b0aeee1
·
verified ·
1 Parent(s): 695bf6e

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e1cf37faf9885b6cb74c0c4ef4e2c2d7e9e4aab0773875932068d778e74f6f5
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:818a070534240e91d68e2f203e19637a55fb6d281983e6b1f4db372769f90baf
3
  size 45118424
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00e3c513aa284f4a659f6163efed2ca9c3075b3ed338078bb8fc52b80c446eec
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:632626d38f137f1b532904c86748923f618de2d84d09066f3d0aab67269c0719
3
  size 23159290
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb63af91c38df442748945f1270836c41afb394cb114946f4943111c086f7fbe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c212ac5b1e7f34fdb83be045ed0267f2d345126b22dfd1da63ed82ec4ee137d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed29baad6d6611c736eccf18bdd7afdc5d6f3612cde61e7bfa83472d3e2068d2
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7736f824b0a772b3806e37af9e860068207311e879196bc19f8a76d97eaf6bce
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.1519354581832886,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
- "epoch": 1.6842105263157894,
5
  "eval_steps": 20,
6
- "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -607,6 +607,154 @@
607
  "eval_samples_per_second": 48.871,
608
  "eval_steps_per_second": 4.887,
609
  "step": 80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
610
  }
611
  ],
612
  "logging_steps": 1,
@@ -621,7 +769,7 @@
621
  "early_stopping_threshold": 0.0
622
  },
623
  "attributes": {
624
- "early_stopping_patience_counter": 1
625
  }
626
  },
627
  "TrainerControl": {
@@ -635,7 +783,7 @@
635
  "attributes": {}
636
  }
637
  },
638
- "total_flos": 8119797185249280.0,
639
  "train_batch_size": 10,
640
  "trial_name": null,
641
  "trial_params": null
 
1
  {
2
  "best_metric": 1.1519354581832886,
3
  "best_model_checkpoint": "miner_id_besimray/checkpoint-60",
4
+ "epoch": 2.1052631578947367,
5
  "eval_steps": 20,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
607
  "eval_samples_per_second": 48.871,
608
  "eval_steps_per_second": 4.887,
609
  "step": 80
610
+ },
611
+ {
612
+ "epoch": 1.7052631578947368,
613
+ "grad_norm": 0.44070982933044434,
614
+ "learning_rate": 9.775619357041952e-05,
615
+ "loss": 1.1024,
616
+ "step": 81
617
+ },
618
+ {
619
+ "epoch": 1.7263157894736842,
620
+ "grad_norm": 0.5059276819229126,
621
+ "learning_rate": 9.551351696494854e-05,
622
+ "loss": 1.1214,
623
+ "step": 82
624
+ },
625
+ {
626
+ "epoch": 1.7473684210526317,
627
+ "grad_norm": 0.4155433773994446,
628
+ "learning_rate": 9.327309943879604e-05,
629
+ "loss": 1.1853,
630
+ "step": 83
631
+ },
632
+ {
633
+ "epoch": 1.768421052631579,
634
+ "grad_norm": 0.5396384596824646,
635
+ "learning_rate": 9.103606910965666e-05,
636
+ "loss": 1.1497,
637
+ "step": 84
638
+ },
639
+ {
640
+ "epoch": 1.7894736842105263,
641
+ "grad_norm": 0.43235623836517334,
642
+ "learning_rate": 8.880355238966923e-05,
643
+ "loss": 1.1753,
644
+ "step": 85
645
+ },
646
+ {
647
+ "epoch": 1.8105263157894735,
648
+ "grad_norm": 0.3918503224849701,
649
+ "learning_rate": 8.657667341823448e-05,
650
+ "loss": 1.1254,
651
+ "step": 86
652
+ },
653
+ {
654
+ "epoch": 1.831578947368421,
655
+ "grad_norm": 0.4692346155643463,
656
+ "learning_rate": 8.435655349597689e-05,
657
+ "loss": 1.3666,
658
+ "step": 87
659
+ },
660
+ {
661
+ "epoch": 1.8526315789473684,
662
+ "grad_norm": 0.4968159794807434,
663
+ "learning_rate": 8.214431052013634e-05,
664
+ "loss": 0.9668,
665
+ "step": 88
666
+ },
667
+ {
668
+ "epoch": 1.8736842105263158,
669
+ "grad_norm": 0.4856269061565399,
670
+ "learning_rate": 7.994105842167273e-05,
671
+ "loss": 1.1482,
672
+ "step": 89
673
+ },
674
+ {
675
+ "epoch": 1.8947368421052633,
676
+ "grad_norm": 0.5288775563240051,
677
+ "learning_rate": 7.774790660436858e-05,
678
+ "loss": 1.13,
679
+ "step": 90
680
+ },
681
+ {
682
+ "epoch": 1.9157894736842105,
683
+ "grad_norm": 0.5403844118118286,
684
+ "learning_rate": 7.556595938621058e-05,
685
+ "loss": 1.1483,
686
+ "step": 91
687
+ },
688
+ {
689
+ "epoch": 1.936842105263158,
690
+ "grad_norm": 0.45445382595062256,
691
+ "learning_rate": 7.339631544333249e-05,
692
+ "loss": 1.0528,
693
+ "step": 92
694
+ },
695
+ {
696
+ "epoch": 1.9578947368421051,
697
+ "grad_norm": 0.48713403940200806,
698
+ "learning_rate": 7.124006725679828e-05,
699
+ "loss": 1.2208,
700
+ "step": 93
701
+ },
702
+ {
703
+ "epoch": 1.9789473684210526,
704
+ "grad_norm": 0.4627130627632141,
705
+ "learning_rate": 6.909830056250527e-05,
706
+ "loss": 1.0794,
707
+ "step": 94
708
+ },
709
+ {
710
+ "epoch": 2.0,
711
+ "grad_norm": 0.46807029843330383,
712
+ "learning_rate": 6.697209380448333e-05,
713
+ "loss": 1.12,
714
+ "step": 95
715
+ },
716
+ {
717
+ "epoch": 2.0210526315789474,
718
+ "grad_norm": 0.41066575050354004,
719
+ "learning_rate": 6.486251759186572e-05,
720
+ "loss": 1.0634,
721
+ "step": 96
722
+ },
723
+ {
724
+ "epoch": 2.042105263157895,
725
+ "grad_norm": 0.3904050886631012,
726
+ "learning_rate": 6.277063415980549e-05,
727
+ "loss": 0.9888,
728
+ "step": 97
729
+ },
730
+ {
731
+ "epoch": 2.0631578947368423,
732
+ "grad_norm": 0.49676060676574707,
733
+ "learning_rate": 6.069749683460765e-05,
734
+ "loss": 0.8783,
735
+ "step": 98
736
+ },
737
+ {
738
+ "epoch": 2.0842105263157893,
739
+ "grad_norm": 0.46549147367477417,
740
+ "learning_rate": 5.864414950334796e-05,
741
+ "loss": 0.9815,
742
+ "step": 99
743
+ },
744
+ {
745
+ "epoch": 2.1052631578947367,
746
+ "grad_norm": 0.5622740387916565,
747
+ "learning_rate": 5.6611626088244194e-05,
748
+ "loss": 1.0091,
749
+ "step": 100
750
+ },
751
+ {
752
+ "epoch": 2.1052631578947367,
753
+ "eval_loss": 1.1575236320495605,
754
+ "eval_runtime": 2.0589,
755
+ "eval_samples_per_second": 48.569,
756
+ "eval_steps_per_second": 4.857,
757
+ "step": 100
758
  }
759
  ],
760
  "logging_steps": 1,
 
769
  "early_stopping_threshold": 0.0
770
  },
771
  "attributes": {
772
+ "early_stopping_patience_counter": 2
773
  }
774
  },
775
  "TrainerControl": {
 
783
  "attributes": {}
784
  }
785
  },
786
+ "total_flos": 1.01497464815616e+16,
787
  "train_batch_size": 10,
788
  "trial_name": null,
789
  "trial_params": null