schnell commited on
Commit
7f626cf
·
1 Parent(s): 02166a3

Training in progress, epoch 10

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0eb4dcf9ed1ce73d5d32482193ed272b0ff98916f9ae6c370fad43e65a6259a2
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e50c16003edb8883d4149aea560899fa302da8a5ea265041b7b2b0e6e753ea6
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af26e70d13d97dd1148f504edda0ca0d4b3e70b4d9f65a19697b44e426a3580
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020c47f09229e7b4397da1597dd814d8bad9db375a9c2b2366593d834ff17bb0
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f39204b4e0d21ca3c9794332a74eb829d80abede2633c846a34ad11056cbd2f4
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:811189d72660f95f34538db512a28cda03a6481472b571473a39751393e425ed
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:981a20d97caa849fd69d09144a00fa71a090a40d23907501e2ed06e6c009f28d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68eb61321f52d319beb3b0aea14b956dcbfbc7a51e4307bb6194168d92337ac6
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c5c09b553671832cbd2235f75894fd5507dbddfab709bf1c35b62744443e806
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1cd8f88a9cad614bc2f0545599431a338717269118bde820f7a2ebef8c3d177
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90ca4aeeda8da7670d23742adcfecc2c9d6f9e133399a23226811c91518226ab
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5a601c907d442bfc98aecb4b414394e5a711f038902c4eca06616f9bff709ff
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da7d64569f6b34d0d97ddba566149385f2d6d8171b6a32d36e23c2c2476fc151
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90712adf282f1d59d322b20c807b46ede7a0fd0f43d34d14f6b8d9ee4800700e
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.0,
5
- "global_step": 625257,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -7587,11 +7587,854 @@
7587
  "eval_samples_per_second": 966.392,
7588
  "eval_steps_per_second": 40.267,
7589
  "step": 625257
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7590
  }
7591
  ],
7592
  "max_steps": 972622,
7593
  "num_train_epochs": 14,
7594
- "total_flos": 3.845195420804317e+18,
7595
  "trial_name": null,
7596
  "trial_params": null
7597
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 694730,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
7587
  "eval_samples_per_second": 966.392,
7588
  "eval_steps_per_second": 40.267,
7589
  "step": 625257
7590
+ },
7591
+ {
7592
+ "epoch": 9.0,
7593
+ "learning_rate": 3.6078492462833434e-05,
7594
+ "loss": 2.84,
7595
+ "step": 625500
7596
+ },
7597
+ {
7598
+ "epoch": 9.01,
7599
+ "learning_rate": 3.60265657210807e-05,
7600
+ "loss": 2.8314,
7601
+ "step": 626000
7602
+ },
7603
+ {
7604
+ "epoch": 9.02,
7605
+ "learning_rate": 3.597463897932797e-05,
7606
+ "loss": 2.8356,
7607
+ "step": 626500
7608
+ },
7609
+ {
7610
+ "epoch": 9.03,
7611
+ "learning_rate": 3.592271223757523e-05,
7612
+ "loss": 2.8391,
7613
+ "step": 627000
7614
+ },
7615
+ {
7616
+ "epoch": 9.03,
7617
+ "learning_rate": 3.5870785495822494e-05,
7618
+ "loss": 2.8317,
7619
+ "step": 627500
7620
+ },
7621
+ {
7622
+ "epoch": 9.04,
7623
+ "learning_rate": 3.581896260755327e-05,
7624
+ "loss": 2.8298,
7625
+ "step": 628000
7626
+ },
7627
+ {
7628
+ "epoch": 9.05,
7629
+ "learning_rate": 3.576703586580053e-05,
7630
+ "loss": 2.8356,
7631
+ "step": 628500
7632
+ },
7633
+ {
7634
+ "epoch": 9.05,
7635
+ "learning_rate": 3.5715109124047796e-05,
7636
+ "loss": 2.8319,
7637
+ "step": 629000
7638
+ },
7639
+ {
7640
+ "epoch": 9.06,
7641
+ "learning_rate": 3.566318238229506e-05,
7642
+ "loss": 2.8361,
7643
+ "step": 629500
7644
+ },
7645
+ {
7646
+ "epoch": 9.07,
7647
+ "learning_rate": 3.5611255640542326e-05,
7648
+ "loss": 2.8352,
7649
+ "step": 630000
7650
+ },
7651
+ {
7652
+ "epoch": 9.08,
7653
+ "learning_rate": 3.555932889878959e-05,
7654
+ "loss": 2.8362,
7655
+ "step": 630500
7656
+ },
7657
+ {
7658
+ "epoch": 9.08,
7659
+ "learning_rate": 3.5507402157036856e-05,
7660
+ "loss": 2.838,
7661
+ "step": 631000
7662
+ },
7663
+ {
7664
+ "epoch": 9.09,
7665
+ "learning_rate": 3.545547541528412e-05,
7666
+ "loss": 2.8323,
7667
+ "step": 631500
7668
+ },
7669
+ {
7670
+ "epoch": 9.1,
7671
+ "learning_rate": 3.540365252701489e-05,
7672
+ "loss": 2.8342,
7673
+ "step": 632000
7674
+ },
7675
+ {
7676
+ "epoch": 9.1,
7677
+ "learning_rate": 3.535172578526215e-05,
7678
+ "loss": 2.8374,
7679
+ "step": 632500
7680
+ },
7681
+ {
7682
+ "epoch": 9.11,
7683
+ "learning_rate": 3.5299902896992925e-05,
7684
+ "loss": 2.835,
7685
+ "step": 633000
7686
+ },
7687
+ {
7688
+ "epoch": 9.12,
7689
+ "learning_rate": 3.524797615524019e-05,
7690
+ "loss": 2.8346,
7691
+ "step": 633500
7692
+ },
7693
+ {
7694
+ "epoch": 9.13,
7695
+ "learning_rate": 3.5196049413487455e-05,
7696
+ "loss": 2.8382,
7697
+ "step": 634000
7698
+ },
7699
+ {
7700
+ "epoch": 9.13,
7701
+ "learning_rate": 3.5144122671734716e-05,
7702
+ "loss": 2.8309,
7703
+ "step": 634500
7704
+ },
7705
+ {
7706
+ "epoch": 9.14,
7707
+ "learning_rate": 3.5092195929981985e-05,
7708
+ "loss": 2.8379,
7709
+ "step": 635000
7710
+ },
7711
+ {
7712
+ "epoch": 9.15,
7713
+ "learning_rate": 3.504037304171275e-05,
7714
+ "loss": 2.8338,
7715
+ "step": 635500
7716
+ },
7717
+ {
7718
+ "epoch": 9.15,
7719
+ "learning_rate": 3.498844629996002e-05,
7720
+ "loss": 2.8332,
7721
+ "step": 636000
7722
+ },
7723
+ {
7724
+ "epoch": 9.16,
7725
+ "learning_rate": 3.493651955820728e-05,
7726
+ "loss": 2.8359,
7727
+ "step": 636500
7728
+ },
7729
+ {
7730
+ "epoch": 9.17,
7731
+ "learning_rate": 3.488459281645455e-05,
7732
+ "loss": 2.8306,
7733
+ "step": 637000
7734
+ },
7735
+ {
7736
+ "epoch": 9.18,
7737
+ "learning_rate": 3.483266607470182e-05,
7738
+ "loss": 2.839,
7739
+ "step": 637500
7740
+ },
7741
+ {
7742
+ "epoch": 9.18,
7743
+ "learning_rate": 3.478073933294907e-05,
7744
+ "loss": 2.8349,
7745
+ "step": 638000
7746
+ },
7747
+ {
7748
+ "epoch": 9.19,
7749
+ "learning_rate": 3.472881259119634e-05,
7750
+ "loss": 2.8302,
7751
+ "step": 638500
7752
+ },
7753
+ {
7754
+ "epoch": 9.2,
7755
+ "learning_rate": 3.467698970292711e-05,
7756
+ "loss": 2.828,
7757
+ "step": 639000
7758
+ },
7759
+ {
7760
+ "epoch": 9.21,
7761
+ "learning_rate": 3.4625062961174375e-05,
7762
+ "loss": 2.8344,
7763
+ "step": 639500
7764
+ },
7765
+ {
7766
+ "epoch": 9.21,
7767
+ "learning_rate": 3.457313621942164e-05,
7768
+ "loss": 2.8365,
7769
+ "step": 640000
7770
+ },
7771
+ {
7772
+ "epoch": 9.22,
7773
+ "learning_rate": 3.452120947766891e-05,
7774
+ "loss": 2.8357,
7775
+ "step": 640500
7776
+ },
7777
+ {
7778
+ "epoch": 9.23,
7779
+ "learning_rate": 3.4469282735916166e-05,
7780
+ "loss": 2.8318,
7781
+ "step": 641000
7782
+ },
7783
+ {
7784
+ "epoch": 9.23,
7785
+ "learning_rate": 3.4417459847646946e-05,
7786
+ "loss": 2.8369,
7787
+ "step": 641500
7788
+ },
7789
+ {
7790
+ "epoch": 9.24,
7791
+ "learning_rate": 3.436553310589421e-05,
7792
+ "loss": 2.8379,
7793
+ "step": 642000
7794
+ },
7795
+ {
7796
+ "epoch": 9.25,
7797
+ "learning_rate": 3.431360636414147e-05,
7798
+ "loss": 2.8327,
7799
+ "step": 642500
7800
+ },
7801
+ {
7802
+ "epoch": 9.26,
7803
+ "learning_rate": 3.426167962238874e-05,
7804
+ "loss": 2.8311,
7805
+ "step": 643000
7806
+ },
7807
+ {
7808
+ "epoch": 9.26,
7809
+ "learning_rate": 3.4209752880636e-05,
7810
+ "loss": 2.8275,
7811
+ "step": 643500
7812
+ },
7813
+ {
7814
+ "epoch": 9.27,
7815
+ "learning_rate": 3.415782613888327e-05,
7816
+ "loss": 2.834,
7817
+ "step": 644000
7818
+ },
7819
+ {
7820
+ "epoch": 9.28,
7821
+ "learning_rate": 3.410589939713053e-05,
7822
+ "loss": 2.8364,
7823
+ "step": 644500
7824
+ },
7825
+ {
7826
+ "epoch": 9.28,
7827
+ "learning_rate": 3.40540765088613e-05,
7828
+ "loss": 2.832,
7829
+ "step": 645000
7830
+ },
7831
+ {
7832
+ "epoch": 9.29,
7833
+ "learning_rate": 3.400214976710856e-05,
7834
+ "loss": 2.8355,
7835
+ "step": 645500
7836
+ },
7837
+ {
7838
+ "epoch": 9.3,
7839
+ "learning_rate": 3.395022302535583e-05,
7840
+ "loss": 2.8331,
7841
+ "step": 646000
7842
+ },
7843
+ {
7844
+ "epoch": 9.31,
7845
+ "learning_rate": 3.389829628360309e-05,
7846
+ "loss": 2.833,
7847
+ "step": 646500
7848
+ },
7849
+ {
7850
+ "epoch": 9.31,
7851
+ "learning_rate": 3.3846473395333866e-05,
7852
+ "loss": 2.8313,
7853
+ "step": 647000
7854
+ },
7855
+ {
7856
+ "epoch": 9.32,
7857
+ "learning_rate": 3.379454665358113e-05,
7858
+ "loss": 2.8312,
7859
+ "step": 647500
7860
+ },
7861
+ {
7862
+ "epoch": 9.33,
7863
+ "learning_rate": 3.3742619911828396e-05,
7864
+ "loss": 2.8333,
7865
+ "step": 648000
7866
+ },
7867
+ {
7868
+ "epoch": 9.33,
7869
+ "learning_rate": 3.369069317007566e-05,
7870
+ "loss": 2.8306,
7871
+ "step": 648500
7872
+ },
7873
+ {
7874
+ "epoch": 9.34,
7875
+ "learning_rate": 3.363887028180643e-05,
7876
+ "loss": 2.832,
7877
+ "step": 649000
7878
+ },
7879
+ {
7880
+ "epoch": 9.35,
7881
+ "learning_rate": 3.358694354005369e-05,
7882
+ "loss": 2.8346,
7883
+ "step": 649500
7884
+ },
7885
+ {
7886
+ "epoch": 9.36,
7887
+ "learning_rate": 3.353501679830096e-05,
7888
+ "loss": 2.8321,
7889
+ "step": 650000
7890
+ },
7891
+ {
7892
+ "epoch": 9.36,
7893
+ "learning_rate": 3.348309005654822e-05,
7894
+ "loss": 2.8266,
7895
+ "step": 650500
7896
+ },
7897
+ {
7898
+ "epoch": 9.37,
7899
+ "learning_rate": 3.343116331479549e-05,
7900
+ "loss": 2.836,
7901
+ "step": 651000
7902
+ },
7903
+ {
7904
+ "epoch": 9.38,
7905
+ "learning_rate": 3.3379340426526256e-05,
7906
+ "loss": 2.8332,
7907
+ "step": 651500
7908
+ },
7909
+ {
7910
+ "epoch": 9.38,
7911
+ "learning_rate": 3.3327413684773525e-05,
7912
+ "loss": 2.8319,
7913
+ "step": 652000
7914
+ },
7915
+ {
7916
+ "epoch": 9.39,
7917
+ "learning_rate": 3.3275486943020786e-05,
7918
+ "loss": 2.8319,
7919
+ "step": 652500
7920
+ },
7921
+ {
7922
+ "epoch": 9.4,
7923
+ "learning_rate": 3.322356020126805e-05,
7924
+ "loss": 2.8284,
7925
+ "step": 653000
7926
+ },
7927
+ {
7928
+ "epoch": 9.41,
7929
+ "learning_rate": 3.3171633459515316e-05,
7930
+ "loss": 2.8259,
7931
+ "step": 653500
7932
+ },
7933
+ {
7934
+ "epoch": 9.41,
7935
+ "learning_rate": 3.3119706717762584e-05,
7936
+ "loss": 2.8288,
7937
+ "step": 654000
7938
+ },
7939
+ {
7940
+ "epoch": 9.42,
7941
+ "learning_rate": 3.306777997600985e-05,
7942
+ "loss": 2.8329,
7943
+ "step": 654500
7944
+ },
7945
+ {
7946
+ "epoch": 9.43,
7947
+ "learning_rate": 3.301585323425711e-05,
7948
+ "loss": 2.8321,
7949
+ "step": 655000
7950
+ },
7951
+ {
7952
+ "epoch": 9.44,
7953
+ "learning_rate": 3.296403034598789e-05,
7954
+ "loss": 2.8312,
7955
+ "step": 655500
7956
+ },
7957
+ {
7958
+ "epoch": 9.44,
7959
+ "learning_rate": 3.291210360423514e-05,
7960
+ "loss": 2.8308,
7961
+ "step": 656000
7962
+ },
7963
+ {
7964
+ "epoch": 9.45,
7965
+ "learning_rate": 3.286017686248241e-05,
7966
+ "loss": 2.8266,
7967
+ "step": 656500
7968
+ },
7969
+ {
7970
+ "epoch": 9.46,
7971
+ "learning_rate": 3.280825012072968e-05,
7972
+ "loss": 2.8317,
7973
+ "step": 657000
7974
+ },
7975
+ {
7976
+ "epoch": 9.46,
7977
+ "learning_rate": 3.2756427232460445e-05,
7978
+ "loss": 2.8337,
7979
+ "step": 657500
7980
+ },
7981
+ {
7982
+ "epoch": 9.47,
7983
+ "learning_rate": 3.270450049070771e-05,
7984
+ "loss": 2.8302,
7985
+ "step": 658000
7986
+ },
7987
+ {
7988
+ "epoch": 9.48,
7989
+ "learning_rate": 3.2652573748954974e-05,
7990
+ "loss": 2.8275,
7991
+ "step": 658500
7992
+ },
7993
+ {
7994
+ "epoch": 9.49,
7995
+ "learning_rate": 3.260064700720224e-05,
7996
+ "loss": 2.8284,
7997
+ "step": 659000
7998
+ },
7999
+ {
8000
+ "epoch": 9.49,
8001
+ "learning_rate": 3.2548824118933016e-05,
8002
+ "loss": 2.8285,
8003
+ "step": 659500
8004
+ },
8005
+ {
8006
+ "epoch": 9.5,
8007
+ "learning_rate": 3.249689737718028e-05,
8008
+ "loss": 2.8306,
8009
+ "step": 660000
8010
+ },
8011
+ {
8012
+ "epoch": 9.51,
8013
+ "learning_rate": 3.244497063542754e-05,
8014
+ "loss": 2.8324,
8015
+ "step": 660500
8016
+ },
8017
+ {
8018
+ "epoch": 9.51,
8019
+ "learning_rate": 3.239304389367481e-05,
8020
+ "loss": 2.8272,
8021
+ "step": 661000
8022
+ },
8023
+ {
8024
+ "epoch": 9.52,
8025
+ "learning_rate": 3.234111715192207e-05,
8026
+ "loss": 2.8337,
8027
+ "step": 661500
8028
+ },
8029
+ {
8030
+ "epoch": 9.53,
8031
+ "learning_rate": 3.228919041016934e-05,
8032
+ "loss": 2.8277,
8033
+ "step": 662000
8034
+ },
8035
+ {
8036
+ "epoch": 9.54,
8037
+ "learning_rate": 3.22372636684166e-05,
8038
+ "loss": 2.8251,
8039
+ "step": 662500
8040
+ },
8041
+ {
8042
+ "epoch": 9.54,
8043
+ "learning_rate": 3.218544078014737e-05,
8044
+ "loss": 2.831,
8045
+ "step": 663000
8046
+ },
8047
+ {
8048
+ "epoch": 9.55,
8049
+ "learning_rate": 3.213351403839463e-05,
8050
+ "loss": 2.8286,
8051
+ "step": 663500
8052
+ },
8053
+ {
8054
+ "epoch": 9.56,
8055
+ "learning_rate": 3.20815872966419e-05,
8056
+ "loss": 2.8298,
8057
+ "step": 664000
8058
+ },
8059
+ {
8060
+ "epoch": 9.56,
8061
+ "learning_rate": 3.202966055488916e-05,
8062
+ "loss": 2.8234,
8063
+ "step": 664500
8064
+ },
8065
+ {
8066
+ "epoch": 9.57,
8067
+ "learning_rate": 3.197773381313643e-05,
8068
+ "loss": 2.8285,
8069
+ "step": 665000
8070
+ },
8071
+ {
8072
+ "epoch": 9.58,
8073
+ "learning_rate": 3.192580707138369e-05,
8074
+ "loss": 2.8285,
8075
+ "step": 665500
8076
+ },
8077
+ {
8078
+ "epoch": 9.59,
8079
+ "learning_rate": 3.1873880329630954e-05,
8080
+ "loss": 2.8288,
8081
+ "step": 666000
8082
+ },
8083
+ {
8084
+ "epoch": 9.59,
8085
+ "learning_rate": 3.182195358787822e-05,
8086
+ "loss": 2.8278,
8087
+ "step": 666500
8088
+ },
8089
+ {
8090
+ "epoch": 9.6,
8091
+ "learning_rate": 3.177013069960899e-05,
8092
+ "loss": 2.8275,
8093
+ "step": 667000
8094
+ },
8095
+ {
8096
+ "epoch": 9.61,
8097
+ "learning_rate": 3.171830781133976e-05,
8098
+ "loss": 2.8216,
8099
+ "step": 667500
8100
+ },
8101
+ {
8102
+ "epoch": 9.62,
8103
+ "learning_rate": 3.166638106958702e-05,
8104
+ "loss": 2.8224,
8105
+ "step": 668000
8106
+ },
8107
+ {
8108
+ "epoch": 9.62,
8109
+ "learning_rate": 3.161445432783429e-05,
8110
+ "loss": 2.8297,
8111
+ "step": 668500
8112
+ },
8113
+ {
8114
+ "epoch": 9.63,
8115
+ "learning_rate": 3.156252758608156e-05,
8116
+ "loss": 2.827,
8117
+ "step": 669000
8118
+ },
8119
+ {
8120
+ "epoch": 9.64,
8121
+ "learning_rate": 3.151060084432883e-05,
8122
+ "loss": 2.8302,
8123
+ "step": 669500
8124
+ },
8125
+ {
8126
+ "epoch": 9.64,
8127
+ "learning_rate": 3.1458777956059594e-05,
8128
+ "loss": 2.8289,
8129
+ "step": 670000
8130
+ },
8131
+ {
8132
+ "epoch": 9.65,
8133
+ "learning_rate": 3.140685121430686e-05,
8134
+ "loss": 2.8283,
8135
+ "step": 670500
8136
+ },
8137
+ {
8138
+ "epoch": 9.66,
8139
+ "learning_rate": 3.135492447255412e-05,
8140
+ "loss": 2.824,
8141
+ "step": 671000
8142
+ },
8143
+ {
8144
+ "epoch": 9.67,
8145
+ "learning_rate": 3.1302997730801385e-05,
8146
+ "loss": 2.825,
8147
+ "step": 671500
8148
+ },
8149
+ {
8150
+ "epoch": 9.67,
8151
+ "learning_rate": 3.125117484253215e-05,
8152
+ "loss": 2.8198,
8153
+ "step": 672000
8154
+ },
8155
+ {
8156
+ "epoch": 9.68,
8157
+ "learning_rate": 3.119924810077942e-05,
8158
+ "loss": 2.8254,
8159
+ "step": 672500
8160
+ },
8161
+ {
8162
+ "epoch": 9.69,
8163
+ "learning_rate": 3.114732135902669e-05,
8164
+ "loss": 2.8216,
8165
+ "step": 673000
8166
+ },
8167
+ {
8168
+ "epoch": 9.69,
8169
+ "learning_rate": 3.1095394617273957e-05,
8170
+ "loss": 2.8273,
8171
+ "step": 673500
8172
+ },
8173
+ {
8174
+ "epoch": 9.7,
8175
+ "learning_rate": 3.104346787552122e-05,
8176
+ "loss": 2.8231,
8177
+ "step": 674000
8178
+ },
8179
+ {
8180
+ "epoch": 9.71,
8181
+ "learning_rate": 3.099154113376848e-05,
8182
+ "loss": 2.8267,
8183
+ "step": 674500
8184
+ },
8185
+ {
8186
+ "epoch": 9.72,
8187
+ "learning_rate": 3.093971824549925e-05,
8188
+ "loss": 2.8241,
8189
+ "step": 675000
8190
+ },
8191
+ {
8192
+ "epoch": 9.72,
8193
+ "learning_rate": 3.0887791503746514e-05,
8194
+ "loss": 2.8218,
8195
+ "step": 675500
8196
+ },
8197
+ {
8198
+ "epoch": 9.73,
8199
+ "learning_rate": 3.083586476199378e-05,
8200
+ "loss": 2.8262,
8201
+ "step": 676000
8202
+ },
8203
+ {
8204
+ "epoch": 9.74,
8205
+ "learning_rate": 3.0783938020241044e-05,
8206
+ "loss": 2.8207,
8207
+ "step": 676500
8208
+ },
8209
+ {
8210
+ "epoch": 9.74,
8211
+ "learning_rate": 3.073201127848831e-05,
8212
+ "loss": 2.8265,
8213
+ "step": 677000
8214
+ },
8215
+ {
8216
+ "epoch": 9.75,
8217
+ "learning_rate": 3.068018839021908e-05,
8218
+ "loss": 2.8176,
8219
+ "step": 677500
8220
+ },
8221
+ {
8222
+ "epoch": 9.76,
8223
+ "learning_rate": 3.062826164846635e-05,
8224
+ "loss": 2.8262,
8225
+ "step": 678000
8226
+ },
8227
+ {
8228
+ "epoch": 9.77,
8229
+ "learning_rate": 3.057633490671361e-05,
8230
+ "loss": 2.8195,
8231
+ "step": 678500
8232
+ },
8233
+ {
8234
+ "epoch": 9.77,
8235
+ "learning_rate": 3.0524408164960877e-05,
8236
+ "loss": 2.8232,
8237
+ "step": 679000
8238
+ },
8239
+ {
8240
+ "epoch": 9.78,
8241
+ "learning_rate": 3.0472481423208138e-05,
8242
+ "loss": 2.8231,
8243
+ "step": 679500
8244
+ },
8245
+ {
8246
+ "epoch": 9.79,
8247
+ "learning_rate": 3.0420554681455403e-05,
8248
+ "loss": 2.8215,
8249
+ "step": 680000
8250
+ },
8251
+ {
8252
+ "epoch": 9.8,
8253
+ "learning_rate": 3.036862793970267e-05,
8254
+ "loss": 2.825,
8255
+ "step": 680500
8256
+ },
8257
+ {
8258
+ "epoch": 9.8,
8259
+ "learning_rate": 3.0316701197949933e-05,
8260
+ "loss": 2.8225,
8261
+ "step": 681000
8262
+ },
8263
+ {
8264
+ "epoch": 9.81,
8265
+ "learning_rate": 3.0264878309680706e-05,
8266
+ "loss": 2.8144,
8267
+ "step": 681500
8268
+ },
8269
+ {
8270
+ "epoch": 9.82,
8271
+ "learning_rate": 3.0212951567927967e-05,
8272
+ "loss": 2.8204,
8273
+ "step": 682000
8274
+ },
8275
+ {
8276
+ "epoch": 9.82,
8277
+ "learning_rate": 3.0161024826175232e-05,
8278
+ "loss": 2.8276,
8279
+ "step": 682500
8280
+ },
8281
+ {
8282
+ "epoch": 9.83,
8283
+ "learning_rate": 3.01090980844225e-05,
8284
+ "loss": 2.8234,
8285
+ "step": 683000
8286
+ },
8287
+ {
8288
+ "epoch": 9.84,
8289
+ "learning_rate": 3.0057379049636775e-05,
8290
+ "loss": 2.8245,
8291
+ "step": 683500
8292
+ },
8293
+ {
8294
+ "epoch": 9.85,
8295
+ "learning_rate": 3.000545230788404e-05,
8296
+ "loss": 2.8231,
8297
+ "step": 684000
8298
+ },
8299
+ {
8300
+ "epoch": 9.85,
8301
+ "learning_rate": 2.99535255661313e-05,
8302
+ "loss": 2.8219,
8303
+ "step": 684500
8304
+ },
8305
+ {
8306
+ "epoch": 9.86,
8307
+ "learning_rate": 2.990159882437857e-05,
8308
+ "loss": 2.821,
8309
+ "step": 685000
8310
+ },
8311
+ {
8312
+ "epoch": 9.87,
8313
+ "learning_rate": 2.9849672082625835e-05,
8314
+ "loss": 2.8236,
8315
+ "step": 685500
8316
+ },
8317
+ {
8318
+ "epoch": 9.87,
8319
+ "learning_rate": 2.9797745340873096e-05,
8320
+ "loss": 2.8212,
8321
+ "step": 686000
8322
+ },
8323
+ {
8324
+ "epoch": 9.88,
8325
+ "learning_rate": 2.974581859912036e-05,
8326
+ "loss": 2.8231,
8327
+ "step": 686500
8328
+ },
8329
+ {
8330
+ "epoch": 9.89,
8331
+ "learning_rate": 2.969399571085113e-05,
8332
+ "loss": 2.8228,
8333
+ "step": 687000
8334
+ },
8335
+ {
8336
+ "epoch": 9.9,
8337
+ "learning_rate": 2.9642068969098395e-05,
8338
+ "loss": 2.8205,
8339
+ "step": 687500
8340
+ },
8341
+ {
8342
+ "epoch": 9.9,
8343
+ "learning_rate": 2.9590142227345664e-05,
8344
+ "loss": 2.8207,
8345
+ "step": 688000
8346
+ },
8347
+ {
8348
+ "epoch": 9.91,
8349
+ "learning_rate": 2.953821548559293e-05,
8350
+ "loss": 2.8209,
8351
+ "step": 688500
8352
+ },
8353
+ {
8354
+ "epoch": 9.92,
8355
+ "learning_rate": 2.948628874384019e-05,
8356
+ "loss": 2.8211,
8357
+ "step": 689000
8358
+ },
8359
+ {
8360
+ "epoch": 9.92,
8361
+ "learning_rate": 2.9434362002087455e-05,
8362
+ "loss": 2.8211,
8363
+ "step": 689500
8364
+ },
8365
+ {
8366
+ "epoch": 9.93,
8367
+ "learning_rate": 2.9382435260334723e-05,
8368
+ "loss": 2.8194,
8369
+ "step": 690000
8370
+ },
8371
+ {
8372
+ "epoch": 9.94,
8373
+ "learning_rate": 2.9330508518581985e-05,
8374
+ "loss": 2.8241,
8375
+ "step": 690500
8376
+ },
8377
+ {
8378
+ "epoch": 9.95,
8379
+ "learning_rate": 2.9278685630312758e-05,
8380
+ "loss": 2.8171,
8381
+ "step": 691000
8382
+ },
8383
+ {
8384
+ "epoch": 9.95,
8385
+ "learning_rate": 2.922675888856002e-05,
8386
+ "loss": 2.8193,
8387
+ "step": 691500
8388
+ },
8389
+ {
8390
+ "epoch": 9.96,
8391
+ "learning_rate": 2.9174832146807284e-05,
8392
+ "loss": 2.8213,
8393
+ "step": 692000
8394
+ },
8395
+ {
8396
+ "epoch": 9.97,
8397
+ "learning_rate": 2.9122905405054553e-05,
8398
+ "loss": 2.8249,
8399
+ "step": 692500
8400
+ },
8401
+ {
8402
+ "epoch": 9.98,
8403
+ "learning_rate": 2.9070978663301818e-05,
8404
+ "loss": 2.8247,
8405
+ "step": 693000
8406
+ },
8407
+ {
8408
+ "epoch": 9.98,
8409
+ "learning_rate": 2.9019155775032587e-05,
8410
+ "loss": 2.8203,
8411
+ "step": 693500
8412
+ },
8413
+ {
8414
+ "epoch": 9.99,
8415
+ "learning_rate": 2.8967229033279852e-05,
8416
+ "loss": 2.8256,
8417
+ "step": 694000
8418
+ },
8419
+ {
8420
+ "epoch": 10.0,
8421
+ "learning_rate": 2.8915302291527114e-05,
8422
+ "loss": 2.8163,
8423
+ "step": 694500
8424
+ },
8425
+ {
8426
+ "epoch": 10.0,
8427
+ "eval_accuracy": 0.5010026952779273,
8428
+ "eval_loss": 2.650995969772339,
8429
+ "eval_runtime": 555.6978,
8430
+ "eval_samples_per_second": 969.838,
8431
+ "eval_steps_per_second": 40.41,
8432
+ "step": 694730
8433
  }
8434
  ],
8435
  "max_steps": 972622,
8436
  "num_train_epochs": 14,
8437
+ "total_flos": 4.2724234309696225e+18,
8438
  "trial_name": null,
8439
  "trial_params": null
8440
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9af26e70d13d97dd1148f504edda0ca0d4b3e70b4d9f65a19697b44e426a3580
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:020c47f09229e7b4397da1597dd814d8bad9db375a9c2b2366593d834ff17bb0
3
  size 118242180
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15dad8bb38ca7c3dfddd43f874a30019a7bb4e8290fe3bcc51c26b73d24ec10e
3
- size 206287
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25c4e9beee823a5935e50174eb1417fc4c243aa090bff6d0ab7ea1c65de8bda8
3
+ size 228856