robertou2 commited on
Commit
1db0c7d
·
verified ·
1 Parent(s): b595f50

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:adbf32216f68817ac7b8e81d84ec05581ee1d4aec78db3102b8b8bfda9c3203a
3
  size 161515608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1238e11a7bd83abb0038d7f1ee20d6d90f9c39b3e70e08a93260b11901cee5c5
3
  size 161515608
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b74bcd870dc58a45d5857957da63a7b34ce5562b9a8ed24f282d74c1daa703e
3
  size 323181259
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0230fe0e059307ec2503aabf08f5e2bde7daf4b09ee960fcb69b3dfba125cec1
3
  size 323181259
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5b517d1b8e2b0f837c8b00170b154961d4d989feba4326ac25583df7a55c57a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7063580a565cb4ab0c1d36b25d817a35a16d1f21f4a993a9f25cdba6efadcb9d
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e3ed70b691deef80930296c31c1f2faec5c46190c3c196aae31c4481cc14ad8
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc1343ebe01037162a057bcccefc9f328f82750a217d5974a02a6ad6a4bc5ce
3
  size 1465
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 4.175182481751825,
6
  "eval_steps": 500,
7
- "global_step": 75,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -558,206 +558,6 @@
558
  "mean_token_accuracy": 0.765391580760479,
559
  "num_tokens": 204197.0,
560
  "step": 55
561
- },
562
- {
563
- "entropy": 1.411361187696457,
564
- "epoch": 3.116788321167883,
565
- "grad_norm": 4.21875,
566
- "learning_rate": 1.3432073050985201e-05,
567
- "loss": 0.7665,
568
- "mean_token_accuracy": 0.7553833983838558,
569
- "num_tokens": 207610.0,
570
- "step": 56
571
- },
572
- {
573
- "entropy": 1.3223325684666634,
574
- "epoch": 3.1751824817518246,
575
- "grad_norm": 3.71875,
576
- "learning_rate": 1.2808754571563827e-05,
577
- "loss": 0.804,
578
- "mean_token_accuracy": 0.7530029378831387,
579
- "num_tokens": 211730.0,
580
- "step": 57
581
- },
582
- {
583
- "entropy": 1.2704328149557114,
584
- "epoch": 3.2335766423357666,
585
- "grad_norm": 3.46875,
586
- "learning_rate": 1.2189280281214128e-05,
587
- "loss": 0.7542,
588
- "mean_token_accuracy": 0.775670263916254,
589
- "num_tokens": 216415.0,
590
- "step": 58
591
- },
592
- {
593
- "entropy": 1.3555709198117256,
594
- "epoch": 3.291970802919708,
595
- "grad_norm": 3.9375,
596
- "learning_rate": 1.1574736948340163e-05,
597
- "loss": 0.7992,
598
- "mean_token_accuracy": 0.7488890923559666,
599
- "num_tokens": 219953.0,
600
- "step": 59
601
- },
602
- {
603
- "entropy": 1.2632866501808167,
604
- "epoch": 3.3503649635036497,
605
- "grad_norm": 3.578125,
606
- "learning_rate": 1.0966202690771015e-05,
607
- "loss": 0.75,
608
- "mean_token_accuracy": 0.7654453739523888,
609
- "num_tokens": 224335.0,
610
- "step": 60
611
- },
612
- {
613
- "entropy": 1.2773741334676743,
614
- "epoch": 3.408759124087591,
615
- "grad_norm": 4.125,
616
- "learning_rate": 1.036474508437579e-05,
617
- "loss": 0.8394,
618
- "mean_token_accuracy": 0.7538279145956039,
619
- "num_tokens": 228300.0,
620
- "step": 61
621
- },
622
- {
623
- "entropy": 1.2203935906291008,
624
- "epoch": 3.4671532846715327,
625
- "grad_norm": 4.3125,
626
- "learning_rate": 9.771419290172776e-06,
627
- "loss": 0.7866,
628
- "mean_token_accuracy": 0.7759390734136105,
629
- "num_tokens": 231820.0,
630
- "step": 62
631
- },
632
- {
633
- "entropy": 1.2281916178762913,
634
- "epoch": 3.5255474452554747,
635
- "grad_norm": 4.5,
636
- "learning_rate": 9.187266203218457e-06,
637
- "loss": 0.7456,
638
- "mean_token_accuracy": 0.7896540127694607,
639
- "num_tokens": 235502.0,
640
- "step": 63
641
- },
642
- {
643
- "entropy": 1.1479723155498505,
644
- "epoch": 3.5839416058394162,
645
- "grad_norm": 3.84375,
646
- "learning_rate": 8.61331062652391e-06,
647
- "loss": 0.6779,
648
- "mean_token_accuracy": 0.7954859808087349,
649
- "num_tokens": 239847.0,
650
- "step": 64
651
- },
652
- {
653
- "entropy": 1.227071214467287,
654
- "epoch": 3.6423357664233578,
655
- "grad_norm": 4.78125,
656
- "learning_rate": 8.050559473202078e-06,
657
- "loss": 0.7642,
658
- "mean_token_accuracy": 0.7581925354897976,
659
- "num_tokens": 243356.0,
660
- "step": 65
661
- },
662
- {
663
- "entropy": 1.131257489323616,
664
- "epoch": 3.7007299270072993,
665
- "grad_norm": 3.5625,
666
- "learning_rate": 7.500000000000004e-06,
667
- "loss": 0.7819,
668
- "mean_token_accuracy": 0.7654204778373241,
669
- "num_tokens": 249682.0,
670
- "step": 66
671
- },
672
- {
673
- "entropy": 1.16723557934165,
674
- "epoch": 3.759124087591241,
675
- "grad_norm": 4.5,
676
- "learning_rate": 6.962598075315047e-06,
677
- "loss": 0.6689,
678
- "mean_token_accuracy": 0.783266007900238,
679
- "num_tokens": 253238.0,
680
- "step": 67
681
- },
682
- {
683
- "entropy": 1.2070689871907234,
684
- "epoch": 3.8175182481751824,
685
- "grad_norm": 5.1875,
686
- "learning_rate": 6.439296484733526e-06,
687
- "loss": 0.7421,
688
- "mean_token_accuracy": 0.7796755991876125,
689
- "num_tokens": 256423.0,
690
- "step": 68
691
- },
692
- {
693
- "entropy": 1.1488405130803585,
694
- "epoch": 3.875912408759124,
695
- "grad_norm": 5.34375,
696
- "learning_rate": 5.931013277064377e-06,
697
- "loss": 0.7267,
698
- "mean_token_accuracy": 0.7691169492900372,
699
- "num_tokens": 259934.0,
700
- "step": 69
701
- },
702
- {
703
- "entropy": 1.130510926246643,
704
- "epoch": 3.9343065693430654,
705
- "grad_norm": 5.25,
706
- "learning_rate": 5.438640153769654e-06,
707
- "loss": 0.7209,
708
- "mean_token_accuracy": 0.7871466726064682,
709
- "num_tokens": 263187.0,
710
- "step": 70
711
- },
712
- {
713
- "entropy": 1.1477855034172535,
714
- "epoch": 3.9927007299270074,
715
- "grad_norm": 4.75,
716
- "learning_rate": 4.963040904617131e-06,
717
- "loss": 0.7762,
718
- "mean_token_accuracy": 0.7656804099678993,
719
- "num_tokens": 267097.0,
720
- "step": 71
721
- },
722
- {
723
- "entropy": 1.09878408908844,
724
- "epoch": 4.0,
725
- "grad_norm": 12.875,
726
- "learning_rate": 4.505049892299517e-06,
727
- "loss": 0.7072,
728
- "mean_token_accuracy": 0.7617444694042206,
729
- "num_tokens": 267588.0,
730
- "step": 72
731
- },
732
- {
733
- "entropy": 1.0318926461040974,
734
- "epoch": 4.0583941605839415,
735
- "grad_norm": 4.28125,
736
- "learning_rate": 4.06547058867883e-06,
737
- "loss": 0.5992,
738
- "mean_token_accuracy": 0.8166146464645863,
739
- "num_tokens": 271589.0,
740
- "step": 73
741
- },
742
- {
743
- "entropy": 1.1504660807549953,
744
- "epoch": 4.116788321167883,
745
- "grad_norm": 4.78125,
746
- "learning_rate": 3.645074165223656e-06,
747
- "loss": 0.606,
748
- "mean_token_accuracy": 0.8282722532749176,
749
- "num_tokens": 274468.0,
750
- "step": 74
751
- },
752
- {
753
- "entropy": 1.1046061255037785,
754
- "epoch": 4.175182481751825,
755
- "grad_norm": 3.671875,
756
- "learning_rate": 3.244598140112404e-06,
757
- "loss": 0.6325,
758
- "mean_token_accuracy": 0.8047133162617683,
759
- "num_tokens": 278830.0,
760
- "step": 75
761
  }
762
  ],
763
  "logging_steps": 1,
@@ -777,7 +577,7 @@
777
  "attributes": {}
778
  }
779
  },
780
- "total_flos": 7471994807169024.0,
781
  "train_batch_size": 2,
782
  "trial_name": null,
783
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 3.0583941605839415,
6
  "eval_steps": 500,
7
+ "global_step": 55,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
558
  "mean_token_accuracy": 0.765391580760479,
559
  "num_tokens": 204197.0,
560
  "step": 55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561
  }
562
  ],
563
  "logging_steps": 1,
 
577
  "attributes": {}
578
  }
579
  },
580
+ "total_flos": 5469020090400768.0,
581
  "train_batch_size": 2,
582
  "trial_name": null,
583
  "trial_params": null