schnell commited on
Commit
02166a3
·
1 Parent(s): 26a571d

Training in progress, epoch 9

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a95e9685f7cd1888a964404d3a12c7901a29b2df58dbc1af6ad81e5615e00ee4
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0eb4dcf9ed1ce73d5d32482193ed272b0ff98916f9ae6c370fad43e65a6259a2
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08fa1d11b1e8d17acf511a08375f85229d0f5fbc9ac05460b78b3d6eae608ef7
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af26e70d13d97dd1148f504edda0ca0d4b3e70b4d9f65a19697b44e426a3580
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d870ec0989535dca2b80429983b6169a4cd3e8ae4acc035c5e27e16231b19367
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39204b4e0d21ca3c9794332a74eb829d80abede2633c846a34ad11056cbd2f4
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37238f485a747bdf411140f3ae786aa1ea193668eeb905f979f7244c03f830a9
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981a20d97caa849fd69d09144a00fa71a090a40d23907501e2ed06e6c009f28d
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5049691cebbb5a2ee68a53b40104e081fc2c143f2cb3c29341094315e9153721
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c5c09b553671832cbd2235f75894fd5507dbddfab709bf1c35b62744443e806
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f732e87d3656ba3aff9a6c3eb0f2055ff5280fda57513995d4337e7c7b5ef089
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90ca4aeeda8da7670d23742adcfecc2c9d6f9e133399a23226811c91518226ab
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5833d29c5bb0965b6ed6a386cf1f3ec2da591b9c1d9bdeb335707a9bec2c66f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7d64569f6b34d0d97ddba566149385f2d6d8171b6a32d36e23c2c2476fc151
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.0,
5
- "global_step": 555784,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -6744,11 +6744,854 @@
6744
  "eval_samples_per_second": 969.57,
6745
  "eval_steps_per_second": 40.399,
6746
  "step": 555784
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6747
  }
6748
  ],
6749
  "max_steps": 972622,
6750
  "num_train_epochs": 14,
6751
- "total_flos": 3.418009686270542e+18,
6752
  "trial_name": null,
6753
  "trial_params": null
6754
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.0,
5
+ "global_step": 625257,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
6744
  "eval_samples_per_second": 969.57,
6745
  "eval_steps_per_second": 40.399,
6746
  "step": 555784
6747
+ },
6748
+ {
6749
+ "epoch": 8.0,
6750
+ "learning_rate": 4.329360937589249e-05,
6751
+ "loss": 2.8652,
6752
+ "step": 556000
6753
+ },
6754
+ {
6755
+ "epoch": 8.01,
6756
+ "learning_rate": 4.3241682634139755e-05,
6757
+ "loss": 2.86,
6758
+ "step": 556500
6759
+ },
6760
+ {
6761
+ "epoch": 8.02,
6762
+ "learning_rate": 4.318975589238702e-05,
6763
+ "loss": 2.8523,
6764
+ "step": 557000
6765
+ },
6766
+ {
6767
+ "epoch": 8.02,
6768
+ "learning_rate": 4.3137829150634284e-05,
6769
+ "loss": 2.8513,
6770
+ "step": 557500
6771
+ },
6772
+ {
6773
+ "epoch": 8.03,
6774
+ "learning_rate": 4.308590240888155e-05,
6775
+ "loss": 2.8526,
6776
+ "step": 558000
6777
+ },
6778
+ {
6779
+ "epoch": 8.04,
6780
+ "learning_rate": 4.3033975667128814e-05,
6781
+ "loss": 2.8663,
6782
+ "step": 558500
6783
+ },
6784
+ {
6785
+ "epoch": 8.05,
6786
+ "learning_rate": 4.298204892537608e-05,
6787
+ "loss": 2.8554,
6788
+ "step": 559000
6789
+ },
6790
+ {
6791
+ "epoch": 8.05,
6792
+ "learning_rate": 4.2930226037106855e-05,
6793
+ "loss": 2.8556,
6794
+ "step": 559500
6795
+ },
6796
+ {
6797
+ "epoch": 8.06,
6798
+ "learning_rate": 4.287829929535412e-05,
6799
+ "loss": 2.856,
6800
+ "step": 560000
6801
+ },
6802
+ {
6803
+ "epoch": 8.07,
6804
+ "learning_rate": 4.282647640708489e-05,
6805
+ "loss": 2.8588,
6806
+ "step": 560500
6807
+ },
6808
+ {
6809
+ "epoch": 8.08,
6810
+ "learning_rate": 4.277454966533215e-05,
6811
+ "loss": 2.8524,
6812
+ "step": 561000
6813
+ },
6814
+ {
6815
+ "epoch": 8.08,
6816
+ "learning_rate": 4.272262292357941e-05,
6817
+ "loss": 2.8519,
6818
+ "step": 561500
6819
+ },
6820
+ {
6821
+ "epoch": 8.09,
6822
+ "learning_rate": 4.267069618182668e-05,
6823
+ "loss": 2.8579,
6824
+ "step": 562000
6825
+ },
6826
+ {
6827
+ "epoch": 8.1,
6828
+ "learning_rate": 4.261876944007395e-05,
6829
+ "loss": 2.853,
6830
+ "step": 562500
6831
+ },
6832
+ {
6833
+ "epoch": 8.1,
6834
+ "learning_rate": 4.256684269832121e-05,
6835
+ "loss": 2.8629,
6836
+ "step": 563000
6837
+ },
6838
+ {
6839
+ "epoch": 8.11,
6840
+ "learning_rate": 4.251491595656847e-05,
6841
+ "loss": 2.8584,
6842
+ "step": 563500
6843
+ },
6844
+ {
6845
+ "epoch": 8.12,
6846
+ "learning_rate": 4.246298921481574e-05,
6847
+ "loss": 2.853,
6848
+ "step": 564000
6849
+ },
6850
+ {
6851
+ "epoch": 8.13,
6852
+ "learning_rate": 4.241116632654651e-05,
6853
+ "loss": 2.8529,
6854
+ "step": 564500
6855
+ },
6856
+ {
6857
+ "epoch": 8.13,
6858
+ "learning_rate": 4.2359239584793775e-05,
6859
+ "loss": 2.8497,
6860
+ "step": 565000
6861
+ },
6862
+ {
6863
+ "epoch": 8.14,
6864
+ "learning_rate": 4.2307312843041044e-05,
6865
+ "loss": 2.8558,
6866
+ "step": 565500
6867
+ },
6868
+ {
6869
+ "epoch": 8.15,
6870
+ "learning_rate": 4.2255386101288305e-05,
6871
+ "loss": 2.8552,
6872
+ "step": 566000
6873
+ },
6874
+ {
6875
+ "epoch": 8.15,
6876
+ "learning_rate": 4.220356321301908e-05,
6877
+ "loss": 2.8553,
6878
+ "step": 566500
6879
+ },
6880
+ {
6881
+ "epoch": 8.16,
6882
+ "learning_rate": 4.215163647126634e-05,
6883
+ "loss": 2.8547,
6884
+ "step": 567000
6885
+ },
6886
+ {
6887
+ "epoch": 8.17,
6888
+ "learning_rate": 4.20997097295136e-05,
6889
+ "loss": 2.8513,
6890
+ "step": 567500
6891
+ },
6892
+ {
6893
+ "epoch": 8.18,
6894
+ "learning_rate": 4.204778298776087e-05,
6895
+ "loss": 2.855,
6896
+ "step": 568000
6897
+ },
6898
+ {
6899
+ "epoch": 8.18,
6900
+ "learning_rate": 4.199585624600814e-05,
6901
+ "loss": 2.8517,
6902
+ "step": 568500
6903
+ },
6904
+ {
6905
+ "epoch": 8.19,
6906
+ "learning_rate": 4.1944033357738904e-05,
6907
+ "loss": 2.8513,
6908
+ "step": 569000
6909
+ },
6910
+ {
6911
+ "epoch": 8.2,
6912
+ "learning_rate": 4.189210661598617e-05,
6913
+ "loss": 2.8525,
6914
+ "step": 569500
6915
+ },
6916
+ {
6917
+ "epoch": 8.2,
6918
+ "learning_rate": 4.184028372771694e-05,
6919
+ "loss": 2.8587,
6920
+ "step": 570000
6921
+ },
6922
+ {
6923
+ "epoch": 8.21,
6924
+ "learning_rate": 4.178835698596421e-05,
6925
+ "loss": 2.8499,
6926
+ "step": 570500
6927
+ },
6928
+ {
6929
+ "epoch": 8.22,
6930
+ "learning_rate": 4.173643024421147e-05,
6931
+ "loss": 2.8525,
6932
+ "step": 571000
6933
+ },
6934
+ {
6935
+ "epoch": 8.23,
6936
+ "learning_rate": 4.168450350245873e-05,
6937
+ "loss": 2.8523,
6938
+ "step": 571500
6939
+ },
6940
+ {
6941
+ "epoch": 8.23,
6942
+ "learning_rate": 4.1632576760706e-05,
6943
+ "loss": 2.851,
6944
+ "step": 572000
6945
+ },
6946
+ {
6947
+ "epoch": 8.24,
6948
+ "learning_rate": 4.1580650018953267e-05,
6949
+ "loss": 2.861,
6950
+ "step": 572500
6951
+ },
6952
+ {
6953
+ "epoch": 8.25,
6954
+ "learning_rate": 4.152872327720053e-05,
6955
+ "loss": 2.8567,
6956
+ "step": 573000
6957
+ },
6958
+ {
6959
+ "epoch": 8.26,
6960
+ "learning_rate": 4.147679653544779e-05,
6961
+ "loss": 2.8534,
6962
+ "step": 573500
6963
+ },
6964
+ {
6965
+ "epoch": 8.26,
6966
+ "learning_rate": 4.142497364717856e-05,
6967
+ "loss": 2.8493,
6968
+ "step": 574000
6969
+ },
6970
+ {
6971
+ "epoch": 8.27,
6972
+ "learning_rate": 4.1373046905425824e-05,
6973
+ "loss": 2.8514,
6974
+ "step": 574500
6975
+ },
6976
+ {
6977
+ "epoch": 8.28,
6978
+ "learning_rate": 4.132112016367309e-05,
6979
+ "loss": 2.8511,
6980
+ "step": 575000
6981
+ },
6982
+ {
6983
+ "epoch": 8.28,
6984
+ "learning_rate": 4.1269193421920354e-05,
6985
+ "loss": 2.8514,
6986
+ "step": 575500
6987
+ },
6988
+ {
6989
+ "epoch": 8.29,
6990
+ "learning_rate": 4.121726668016762e-05,
6991
+ "loss": 2.854,
6992
+ "step": 576000
6993
+ },
6994
+ {
6995
+ "epoch": 8.3,
6996
+ "learning_rate": 4.116533993841489e-05,
6997
+ "loss": 2.8539,
6998
+ "step": 576500
6999
+ },
7000
+ {
7001
+ "epoch": 8.31,
7002
+ "learning_rate": 4.111351705014566e-05,
7003
+ "loss": 2.8456,
7004
+ "step": 577000
7005
+ },
7006
+ {
7007
+ "epoch": 8.31,
7008
+ "learning_rate": 4.1061590308392925e-05,
7009
+ "loss": 2.8602,
7010
+ "step": 577500
7011
+ },
7012
+ {
7013
+ "epoch": 8.32,
7014
+ "learning_rate": 4.1009663566640187e-05,
7015
+ "loss": 2.8506,
7016
+ "step": 578000
7017
+ },
7018
+ {
7019
+ "epoch": 8.33,
7020
+ "learning_rate": 4.095773682488745e-05,
7021
+ "loss": 2.8524,
7022
+ "step": 578500
7023
+ },
7024
+ {
7025
+ "epoch": 8.33,
7026
+ "learning_rate": 4.0905810083134716e-05,
7027
+ "loss": 2.8504,
7028
+ "step": 579000
7029
+ },
7030
+ {
7031
+ "epoch": 8.34,
7032
+ "learning_rate": 4.085398719486548e-05,
7033
+ "loss": 2.8484,
7034
+ "step": 579500
7035
+ },
7036
+ {
7037
+ "epoch": 8.35,
7038
+ "learning_rate": 4.080206045311275e-05,
7039
+ "loss": 2.8479,
7040
+ "step": 580000
7041
+ },
7042
+ {
7043
+ "epoch": 8.36,
7044
+ "learning_rate": 4.075013371136002e-05,
7045
+ "loss": 2.8561,
7046
+ "step": 580500
7047
+ },
7048
+ {
7049
+ "epoch": 8.36,
7050
+ "learning_rate": 4.069820696960728e-05,
7051
+ "loss": 2.85,
7052
+ "step": 581000
7053
+ },
7054
+ {
7055
+ "epoch": 8.37,
7056
+ "learning_rate": 4.064628022785454e-05,
7057
+ "loss": 2.8561,
7058
+ "step": 581500
7059
+ },
7060
+ {
7061
+ "epoch": 8.38,
7062
+ "learning_rate": 4.059435348610181e-05,
7063
+ "loss": 2.8504,
7064
+ "step": 582000
7065
+ },
7066
+ {
7067
+ "epoch": 8.38,
7068
+ "learning_rate": 4.054242674434908e-05,
7069
+ "loss": 2.8555,
7070
+ "step": 582500
7071
+ },
7072
+ {
7073
+ "epoch": 8.39,
7074
+ "learning_rate": 4.0490603856079845e-05,
7075
+ "loss": 2.8438,
7076
+ "step": 583000
7077
+ },
7078
+ {
7079
+ "epoch": 8.4,
7080
+ "learning_rate": 4.043867711432711e-05,
7081
+ "loss": 2.8494,
7082
+ "step": 583500
7083
+ },
7084
+ {
7085
+ "epoch": 8.41,
7086
+ "learning_rate": 4.0386750372574375e-05,
7087
+ "loss": 2.8473,
7088
+ "step": 584000
7089
+ },
7090
+ {
7091
+ "epoch": 8.41,
7092
+ "learning_rate": 4.0334823630821636e-05,
7093
+ "loss": 2.8479,
7094
+ "step": 584500
7095
+ },
7096
+ {
7097
+ "epoch": 8.42,
7098
+ "learning_rate": 4.0282896889068905e-05,
7099
+ "loss": 2.8488,
7100
+ "step": 585000
7101
+ },
7102
+ {
7103
+ "epoch": 8.43,
7104
+ "learning_rate": 4.0230970147316166e-05,
7105
+ "loss": 2.8506,
7106
+ "step": 585500
7107
+ },
7108
+ {
7109
+ "epoch": 8.43,
7110
+ "learning_rate": 4.0179043405563434e-05,
7111
+ "loss": 2.8476,
7112
+ "step": 586000
7113
+ },
7114
+ {
7115
+ "epoch": 8.44,
7116
+ "learning_rate": 4.0127116663810696e-05,
7117
+ "loss": 2.8448,
7118
+ "step": 586500
7119
+ },
7120
+ {
7121
+ "epoch": 8.45,
7122
+ "learning_rate": 4.007529377554147e-05,
7123
+ "loss": 2.8518,
7124
+ "step": 587000
7125
+ },
7126
+ {
7127
+ "epoch": 8.46,
7128
+ "learning_rate": 4.002336703378873e-05,
7129
+ "loss": 2.8421,
7130
+ "step": 587500
7131
+ },
7132
+ {
7133
+ "epoch": 8.46,
7134
+ "learning_rate": 3.9971440292036e-05,
7135
+ "loss": 2.8544,
7136
+ "step": 588000
7137
+ },
7138
+ {
7139
+ "epoch": 8.47,
7140
+ "learning_rate": 3.9919617403766765e-05,
7141
+ "loss": 2.8506,
7142
+ "step": 588500
7143
+ },
7144
+ {
7145
+ "epoch": 8.48,
7146
+ "learning_rate": 3.986769066201403e-05,
7147
+ "loss": 2.8495,
7148
+ "step": 589000
7149
+ },
7150
+ {
7151
+ "epoch": 8.49,
7152
+ "learning_rate": 3.9815763920261295e-05,
7153
+ "loss": 2.8504,
7154
+ "step": 589500
7155
+ },
7156
+ {
7157
+ "epoch": 8.49,
7158
+ "learning_rate": 3.976383717850856e-05,
7159
+ "loss": 2.844,
7160
+ "step": 590000
7161
+ },
7162
+ {
7163
+ "epoch": 8.5,
7164
+ "learning_rate": 3.971191043675583e-05,
7165
+ "loss": 2.8472,
7166
+ "step": 590500
7167
+ },
7168
+ {
7169
+ "epoch": 8.51,
7170
+ "learning_rate": 3.9659983695003086e-05,
7171
+ "loss": 2.8443,
7172
+ "step": 591000
7173
+ },
7174
+ {
7175
+ "epoch": 8.51,
7176
+ "learning_rate": 3.9608056953250354e-05,
7177
+ "loss": 2.8546,
7178
+ "step": 591500
7179
+ },
7180
+ {
7181
+ "epoch": 8.52,
7182
+ "learning_rate": 3.955623406498113e-05,
7183
+ "loss": 2.8484,
7184
+ "step": 592000
7185
+ },
7186
+ {
7187
+ "epoch": 8.53,
7188
+ "learning_rate": 3.950430732322839e-05,
7189
+ "loss": 2.8438,
7190
+ "step": 592500
7191
+ },
7192
+ {
7193
+ "epoch": 8.54,
7194
+ "learning_rate": 3.945238058147566e-05,
7195
+ "loss": 2.8479,
7196
+ "step": 593000
7197
+ },
7198
+ {
7199
+ "epoch": 8.54,
7200
+ "learning_rate": 3.9400453839722926e-05,
7201
+ "loss": 2.8455,
7202
+ "step": 593500
7203
+ },
7204
+ {
7205
+ "epoch": 8.55,
7206
+ "learning_rate": 3.934852709797018e-05,
7207
+ "loss": 2.8422,
7208
+ "step": 594000
7209
+ },
7210
+ {
7211
+ "epoch": 8.56,
7212
+ "learning_rate": 3.929660035621745e-05,
7213
+ "loss": 2.847,
7214
+ "step": 594500
7215
+ },
7216
+ {
7217
+ "epoch": 8.56,
7218
+ "learning_rate": 3.924467361446472e-05,
7219
+ "loss": 2.8453,
7220
+ "step": 595000
7221
+ },
7222
+ {
7223
+ "epoch": 8.57,
7224
+ "learning_rate": 3.919274687271198e-05,
7225
+ "loss": 2.8533,
7226
+ "step": 595500
7227
+ },
7228
+ {
7229
+ "epoch": 8.58,
7230
+ "learning_rate": 3.9141027837926256e-05,
7231
+ "loss": 2.8511,
7232
+ "step": 596000
7233
+ },
7234
+ {
7235
+ "epoch": 8.59,
7236
+ "learning_rate": 3.908910109617352e-05,
7237
+ "loss": 2.8456,
7238
+ "step": 596500
7239
+ },
7240
+ {
7241
+ "epoch": 8.59,
7242
+ "learning_rate": 3.9037174354420786e-05,
7243
+ "loss": 2.8501,
7244
+ "step": 597000
7245
+ },
7246
+ {
7247
+ "epoch": 8.6,
7248
+ "learning_rate": 3.8985247612668054e-05,
7249
+ "loss": 2.8479,
7250
+ "step": 597500
7251
+ },
7252
+ {
7253
+ "epoch": 8.61,
7254
+ "learning_rate": 3.8933320870915316e-05,
7255
+ "loss": 2.8488,
7256
+ "step": 598000
7257
+ },
7258
+ {
7259
+ "epoch": 8.61,
7260
+ "learning_rate": 3.888139412916258e-05,
7261
+ "loss": 2.8456,
7262
+ "step": 598500
7263
+ },
7264
+ {
7265
+ "epoch": 8.62,
7266
+ "learning_rate": 3.8829467387409846e-05,
7267
+ "loss": 2.8479,
7268
+ "step": 599000
7269
+ },
7270
+ {
7271
+ "epoch": 8.63,
7272
+ "learning_rate": 3.877764449914061e-05,
7273
+ "loss": 2.8457,
7274
+ "step": 599500
7275
+ },
7276
+ {
7277
+ "epoch": 8.64,
7278
+ "learning_rate": 3.872571775738788e-05,
7279
+ "loss": 2.8439,
7280
+ "step": 600000
7281
+ },
7282
+ {
7283
+ "epoch": 8.64,
7284
+ "learning_rate": 3.867379101563514e-05,
7285
+ "loss": 2.8488,
7286
+ "step": 600500
7287
+ },
7288
+ {
7289
+ "epoch": 8.65,
7290
+ "learning_rate": 3.862186427388241e-05,
7291
+ "loss": 2.8437,
7292
+ "step": 601000
7293
+ },
7294
+ {
7295
+ "epoch": 8.66,
7296
+ "learning_rate": 3.856993753212967e-05,
7297
+ "loss": 2.8445,
7298
+ "step": 601500
7299
+ },
7300
+ {
7301
+ "epoch": 8.67,
7302
+ "learning_rate": 3.851801079037694e-05,
7303
+ "loss": 2.842,
7304
+ "step": 602000
7305
+ },
7306
+ {
7307
+ "epoch": 8.67,
7308
+ "learning_rate": 3.8466187902107706e-05,
7309
+ "loss": 2.8458,
7310
+ "step": 602500
7311
+ },
7312
+ {
7313
+ "epoch": 8.68,
7314
+ "learning_rate": 3.8414261160354974e-05,
7315
+ "loss": 2.8489,
7316
+ "step": 603000
7317
+ },
7318
+ {
7319
+ "epoch": 8.69,
7320
+ "learning_rate": 3.8362334418602236e-05,
7321
+ "loss": 2.8407,
7322
+ "step": 603500
7323
+ },
7324
+ {
7325
+ "epoch": 8.69,
7326
+ "learning_rate": 3.8310407676849504e-05,
7327
+ "loss": 2.8452,
7328
+ "step": 604000
7329
+ },
7330
+ {
7331
+ "epoch": 8.7,
7332
+ "learning_rate": 3.8258480935096766e-05,
7333
+ "loss": 2.8495,
7334
+ "step": 604500
7335
+ },
7336
+ {
7337
+ "epoch": 8.71,
7338
+ "learning_rate": 3.820655419334403e-05,
7339
+ "loss": 2.8426,
7340
+ "step": 605000
7341
+ },
7342
+ {
7343
+ "epoch": 8.72,
7344
+ "learning_rate": 3.8154627451591295e-05,
7345
+ "loss": 2.845,
7346
+ "step": 605500
7347
+ },
7348
+ {
7349
+ "epoch": 8.72,
7350
+ "learning_rate": 3.810280456332207e-05,
7351
+ "loss": 2.8437,
7352
+ "step": 606000
7353
+ },
7354
+ {
7355
+ "epoch": 8.73,
7356
+ "learning_rate": 3.805087782156933e-05,
7357
+ "loss": 2.8411,
7358
+ "step": 606500
7359
+ },
7360
+ {
7361
+ "epoch": 8.74,
7362
+ "learning_rate": 3.79989510798166e-05,
7363
+ "loss": 2.8449,
7364
+ "step": 607000
7365
+ },
7366
+ {
7367
+ "epoch": 8.74,
7368
+ "learning_rate": 3.7947024338063866e-05,
7369
+ "loss": 2.8474,
7370
+ "step": 607500
7371
+ },
7372
+ {
7373
+ "epoch": 8.75,
7374
+ "learning_rate": 3.789520144979463e-05,
7375
+ "loss": 2.8431,
7376
+ "step": 608000
7377
+ },
7378
+ {
7379
+ "epoch": 8.76,
7380
+ "learning_rate": 3.78432747080419e-05,
7381
+ "loss": 2.8437,
7382
+ "step": 608500
7383
+ },
7384
+ {
7385
+ "epoch": 8.77,
7386
+ "learning_rate": 3.7791347966289156e-05,
7387
+ "loss": 2.8445,
7388
+ "step": 609000
7389
+ },
7390
+ {
7391
+ "epoch": 8.77,
7392
+ "learning_rate": 3.7739421224536424e-05,
7393
+ "loss": 2.8438,
7394
+ "step": 609500
7395
+ },
7396
+ {
7397
+ "epoch": 8.78,
7398
+ "learning_rate": 3.768749448278369e-05,
7399
+ "loss": 2.8441,
7400
+ "step": 610000
7401
+ },
7402
+ {
7403
+ "epoch": 8.79,
7404
+ "learning_rate": 3.763567159451446e-05,
7405
+ "loss": 2.8497,
7406
+ "step": 610500
7407
+ },
7408
+ {
7409
+ "epoch": 8.79,
7410
+ "learning_rate": 3.758374485276173e-05,
7411
+ "loss": 2.8466,
7412
+ "step": 611000
7413
+ },
7414
+ {
7415
+ "epoch": 8.8,
7416
+ "learning_rate": 3.7531818111008995e-05,
7417
+ "loss": 2.8451,
7418
+ "step": 611500
7419
+ },
7420
+ {
7421
+ "epoch": 8.81,
7422
+ "learning_rate": 3.747989136925626e-05,
7423
+ "loss": 2.8407,
7424
+ "step": 612000
7425
+ },
7426
+ {
7427
+ "epoch": 8.82,
7428
+ "learning_rate": 3.742806848098703e-05,
7429
+ "loss": 2.8453,
7430
+ "step": 612500
7431
+ },
7432
+ {
7433
+ "epoch": 8.82,
7434
+ "learning_rate": 3.737614173923429e-05,
7435
+ "loss": 2.8403,
7436
+ "step": 613000
7437
+ },
7438
+ {
7439
+ "epoch": 8.83,
7440
+ "learning_rate": 3.732421499748155e-05,
7441
+ "loss": 2.8445,
7442
+ "step": 613500
7443
+ },
7444
+ {
7445
+ "epoch": 8.84,
7446
+ "learning_rate": 3.727228825572882e-05,
7447
+ "loss": 2.8422,
7448
+ "step": 614000
7449
+ },
7450
+ {
7451
+ "epoch": 8.85,
7452
+ "learning_rate": 3.722036151397608e-05,
7453
+ "loss": 2.8411,
7454
+ "step": 614500
7455
+ },
7456
+ {
7457
+ "epoch": 8.85,
7458
+ "learning_rate": 3.716843477222335e-05,
7459
+ "loss": 2.8402,
7460
+ "step": 615000
7461
+ },
7462
+ {
7463
+ "epoch": 8.86,
7464
+ "learning_rate": 3.711650803047061e-05,
7465
+ "loss": 2.8368,
7466
+ "step": 615500
7467
+ },
7468
+ {
7469
+ "epoch": 8.87,
7470
+ "learning_rate": 3.706458128871788e-05,
7471
+ "loss": 2.846,
7472
+ "step": 616000
7473
+ },
7474
+ {
7475
+ "epoch": 8.87,
7476
+ "learning_rate": 3.701286225393216e-05,
7477
+ "loss": 2.8407,
7478
+ "step": 616500
7479
+ },
7480
+ {
7481
+ "epoch": 8.88,
7482
+ "learning_rate": 3.696093551217942e-05,
7483
+ "loss": 2.8388,
7484
+ "step": 617000
7485
+ },
7486
+ {
7487
+ "epoch": 8.89,
7488
+ "learning_rate": 3.690900877042668e-05,
7489
+ "loss": 2.8414,
7490
+ "step": 617500
7491
+ },
7492
+ {
7493
+ "epoch": 8.9,
7494
+ "learning_rate": 3.685708202867395e-05,
7495
+ "loss": 2.8437,
7496
+ "step": 618000
7497
+ },
7498
+ {
7499
+ "epoch": 8.9,
7500
+ "learning_rate": 3.680515528692121e-05,
7501
+ "loss": 2.8418,
7502
+ "step": 618500
7503
+ },
7504
+ {
7505
+ "epoch": 8.91,
7506
+ "learning_rate": 3.675322854516848e-05,
7507
+ "loss": 2.8413,
7508
+ "step": 619000
7509
+ },
7510
+ {
7511
+ "epoch": 8.92,
7512
+ "learning_rate": 3.6701405656899246e-05,
7513
+ "loss": 2.8469,
7514
+ "step": 619500
7515
+ },
7516
+ {
7517
+ "epoch": 8.92,
7518
+ "learning_rate": 3.6649478915146514e-05,
7519
+ "loss": 2.8402,
7520
+ "step": 620000
7521
+ },
7522
+ {
7523
+ "epoch": 8.93,
7524
+ "learning_rate": 3.6597552173393776e-05,
7525
+ "loss": 2.842,
7526
+ "step": 620500
7527
+ },
7528
+ {
7529
+ "epoch": 8.94,
7530
+ "learning_rate": 3.6545625431641044e-05,
7531
+ "loss": 2.84,
7532
+ "step": 621000
7533
+ },
7534
+ {
7535
+ "epoch": 8.95,
7536
+ "learning_rate": 3.6493698689888305e-05,
7537
+ "loss": 2.8442,
7538
+ "step": 621500
7539
+ },
7540
+ {
7541
+ "epoch": 8.95,
7542
+ "learning_rate": 3.6441771948135574e-05,
7543
+ "loss": 2.8432,
7544
+ "step": 622000
7545
+ },
7546
+ {
7547
+ "epoch": 8.96,
7548
+ "learning_rate": 3.638984520638284e-05,
7549
+ "loss": 2.8384,
7550
+ "step": 622500
7551
+ },
7552
+ {
7553
+ "epoch": 8.97,
7554
+ "learning_rate": 3.63379184646301e-05,
7555
+ "loss": 2.8414,
7556
+ "step": 623000
7557
+ },
7558
+ {
7559
+ "epoch": 8.97,
7560
+ "learning_rate": 3.6286095576360876e-05,
7561
+ "loss": 2.8429,
7562
+ "step": 623500
7563
+ },
7564
+ {
7565
+ "epoch": 8.98,
7566
+ "learning_rate": 3.623416883460813e-05,
7567
+ "loss": 2.8397,
7568
+ "step": 624000
7569
+ },
7570
+ {
7571
+ "epoch": 8.99,
7572
+ "learning_rate": 3.61822420928554e-05,
7573
+ "loss": 2.8377,
7574
+ "step": 624500
7575
+ },
7576
+ {
7577
+ "epoch": 9.0,
7578
+ "learning_rate": 3.6130419204586166e-05,
7579
+ "loss": 2.8352,
7580
+ "step": 625000
7581
+ },
7582
+ {
7583
+ "epoch": 9.0,
7584
+ "eval_accuracy": 0.49907601112677125,
7585
+ "eval_loss": 2.6701717376708984,
7586
+ "eval_runtime": 557.6796,
7587
+ "eval_samples_per_second": 966.392,
7588
+ "eval_steps_per_second": 40.267,
7589
+ "step": 625257
7590
  }
7591
  ],
7592
  "max_steps": 972622,
7593
  "num_train_epochs": 14,
7594
+ "total_flos": 3.845195420804317e+18,
7595
  "trial_name": null,
7596
  "trial_params": null
7597
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08fa1d11b1e8d17acf511a08375f85229d0f5fbc9ac05460b78b3d6eae608ef7
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9af26e70d13d97dd1148f504edda0ca0d4b3e70b4d9f65a19697b44e426a3580
3
  size 118242180
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:066b48db75cbbc3697b8e7f9e071c6b08dc6498bd4e37d936177ef5dfe4b202f
3
- size 183718
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15dad8bb38ca7c3dfddd43f874a30019a7bb4e8290fe3bcc51c26b73d24ec10e
3
+ size 206287