schnell commited on
Commit
26a571d
·
1 Parent(s): 5fff3c5

Training in progress, epoch 8

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d40c449bfdf2ab70f584c376ba0b56fcab0eeffa57bb95269511c9f9329f1344
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a95e9685f7cd1888a964404d3a12c7901a29b2df58dbc1af6ad81e5615e00ee4
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aac616f2daebd51c2a4b6b2899d528ddc70b1135c823a086dc3c0cfcafd543e
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08fa1d11b1e8d17acf511a08375f85229d0f5fbc9ac05460b78b3d6eae608ef7
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aacb551eae113552c8fc0108b9cce60b880ddb4a3bcbeb2b970b7dfb66b93e82
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d870ec0989535dca2b80429983b6169a4cd3e8ae4acc035c5e27e16231b19367
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1523b836ad746e01e56d873bbaf8269662341810c3fd72e895d613c93ff596cf
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37238f485a747bdf411140f3ae786aa1ea193668eeb905f979f7244c03f830a9
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d44e8ba45806d39d19fc118f2f4debe5ef30963a47e6fa54d97d112061e07962
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5049691cebbb5a2ee68a53b40104e081fc2c143f2cb3c29341094315e9153721
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad765baf155930b25b5cf56b9bf47ee00e060d5a3e5562ec0ad517df920e1572
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f732e87d3656ba3aff9a6c3eb0f2055ff5280fda57513995d4337e7c7b5ef089
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47275b616f0620617e0ae3a4b67172329bb53d65a342833c5864f712e8bcb992
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5833d29c5bb0965b6ed6a386cf1f3ec2da591b9c1d9bdeb335707a9bec2c66f
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.0,
5
- "global_step": 486311,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -5901,11 +5901,854 @@
5901
  "eval_samples_per_second": 970.412,
5902
  "eval_steps_per_second": 40.434,
5903
  "step": 486311
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5904
  }
5905
  ],
5906
  "max_steps": 972622,
5907
  "num_train_epochs": 14,
5908
- "total_flos": 2.990601181715235e+18,
5909
  "trial_name": null,
5910
  "trial_params": null
5911
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
+ "global_step": 555784,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
5901
  "eval_samples_per_second": 970.412,
5902
  "eval_steps_per_second": 40.434,
5903
  "step": 486311
5904
+ },
5905
+ {
5906
+ "epoch": 7.0,
5907
+ "learning_rate": 5.050841472850103e-05,
5908
+ "loss": 2.8813,
5909
+ "step": 486500
5910
+ },
5911
+ {
5912
+ "epoch": 7.01,
5913
+ "learning_rate": 5.04564879867483e-05,
5914
+ "loss": 2.8798,
5915
+ "step": 487000
5916
+ },
5917
+ {
5918
+ "epoch": 7.02,
5919
+ "learning_rate": 5.040456124499556e-05,
5920
+ "loss": 2.8848,
5921
+ "step": 487500
5922
+ },
5923
+ {
5924
+ "epoch": 7.02,
5925
+ "learning_rate": 5.035263450324282e-05,
5926
+ "loss": 2.8811,
5927
+ "step": 488000
5928
+ },
5929
+ {
5930
+ "epoch": 7.03,
5931
+ "learning_rate": 5.03007077614901e-05,
5932
+ "loss": 2.8786,
5933
+ "step": 488500
5934
+ },
5935
+ {
5936
+ "epoch": 7.04,
5937
+ "learning_rate": 5.024878101973736e-05,
5938
+ "loss": 2.8777,
5939
+ "step": 489000
5940
+ },
5941
+ {
5942
+ "epoch": 7.05,
5943
+ "learning_rate": 5.019685427798462e-05,
5944
+ "loss": 2.8776,
5945
+ "step": 489500
5946
+ },
5947
+ {
5948
+ "epoch": 7.05,
5949
+ "learning_rate": 5.0145031389715393e-05,
5950
+ "loss": 2.8799,
5951
+ "step": 490000
5952
+ },
5953
+ {
5954
+ "epoch": 7.06,
5955
+ "learning_rate": 5.0093208501446166e-05,
5956
+ "loss": 2.883,
5957
+ "step": 490500
5958
+ },
5959
+ {
5960
+ "epoch": 7.07,
5961
+ "learning_rate": 5.004128175969343e-05,
5962
+ "loss": 2.8822,
5963
+ "step": 491000
5964
+ },
5965
+ {
5966
+ "epoch": 7.07,
5967
+ "learning_rate": 4.998935501794069e-05,
5968
+ "loss": 2.8739,
5969
+ "step": 491500
5970
+ },
5971
+ {
5972
+ "epoch": 7.08,
5973
+ "learning_rate": 4.993742827618796e-05,
5974
+ "loss": 2.8832,
5975
+ "step": 492000
5976
+ },
5977
+ {
5978
+ "epoch": 7.09,
5979
+ "learning_rate": 4.988550153443522e-05,
5980
+ "loss": 2.8728,
5981
+ "step": 492500
5982
+ },
5983
+ {
5984
+ "epoch": 7.1,
5985
+ "learning_rate": 4.983357479268249e-05,
5986
+ "loss": 2.8796,
5987
+ "step": 493000
5988
+ },
5989
+ {
5990
+ "epoch": 7.1,
5991
+ "learning_rate": 4.978164805092975e-05,
5992
+ "loss": 2.8773,
5993
+ "step": 493500
5994
+ },
5995
+ {
5996
+ "epoch": 7.11,
5997
+ "learning_rate": 4.972972130917701e-05,
5998
+ "loss": 2.8783,
5999
+ "step": 494000
6000
+ },
6001
+ {
6002
+ "epoch": 7.12,
6003
+ "learning_rate": 4.9677898420907784e-05,
6004
+ "loss": 2.8848,
6005
+ "step": 494500
6006
+ },
6007
+ {
6008
+ "epoch": 7.13,
6009
+ "learning_rate": 4.962607553263856e-05,
6010
+ "loss": 2.879,
6011
+ "step": 495000
6012
+ },
6013
+ {
6014
+ "epoch": 7.13,
6015
+ "learning_rate": 4.957414879088582e-05,
6016
+ "loss": 2.879,
6017
+ "step": 495500
6018
+ },
6019
+ {
6020
+ "epoch": 7.14,
6021
+ "learning_rate": 4.9522222049133086e-05,
6022
+ "loss": 2.8763,
6023
+ "step": 496000
6024
+ },
6025
+ {
6026
+ "epoch": 7.15,
6027
+ "learning_rate": 4.947029530738035e-05,
6028
+ "loss": 2.8815,
6029
+ "step": 496500
6030
+ },
6031
+ {
6032
+ "epoch": 7.15,
6033
+ "learning_rate": 4.9418368565627616e-05,
6034
+ "loss": 2.8765,
6035
+ "step": 497000
6036
+ },
6037
+ {
6038
+ "epoch": 7.16,
6039
+ "learning_rate": 4.9366441823874885e-05,
6040
+ "loss": 2.8775,
6041
+ "step": 497500
6042
+ },
6043
+ {
6044
+ "epoch": 7.17,
6045
+ "learning_rate": 4.931451508212214e-05,
6046
+ "loss": 2.8765,
6047
+ "step": 498000
6048
+ },
6049
+ {
6050
+ "epoch": 7.18,
6051
+ "learning_rate": 4.926258834036941e-05,
6052
+ "loss": 2.8789,
6053
+ "step": 498500
6054
+ },
6055
+ {
6056
+ "epoch": 7.18,
6057
+ "learning_rate": 4.9210765452100174e-05,
6058
+ "loss": 2.8767,
6059
+ "step": 499000
6060
+ },
6061
+ {
6062
+ "epoch": 7.19,
6063
+ "learning_rate": 4.915883871034744e-05,
6064
+ "loss": 2.8755,
6065
+ "step": 499500
6066
+ },
6067
+ {
6068
+ "epoch": 7.2,
6069
+ "learning_rate": 4.910691196859471e-05,
6070
+ "loss": 2.8766,
6071
+ "step": 500000
6072
+ },
6073
+ {
6074
+ "epoch": 7.2,
6075
+ "learning_rate": 4.905498522684198e-05,
6076
+ "loss": 2.8829,
6077
+ "step": 500500
6078
+ },
6079
+ {
6080
+ "epoch": 7.21,
6081
+ "learning_rate": 4.9003162338572745e-05,
6082
+ "loss": 2.8784,
6083
+ "step": 501000
6084
+ },
6085
+ {
6086
+ "epoch": 7.22,
6087
+ "learning_rate": 4.895123559682001e-05,
6088
+ "loss": 2.8789,
6089
+ "step": 501500
6090
+ },
6091
+ {
6092
+ "epoch": 7.23,
6093
+ "learning_rate": 4.889941270855078e-05,
6094
+ "loss": 2.8769,
6095
+ "step": 502000
6096
+ },
6097
+ {
6098
+ "epoch": 7.23,
6099
+ "learning_rate": 4.884748596679805e-05,
6100
+ "loss": 2.8792,
6101
+ "step": 502500
6102
+ },
6103
+ {
6104
+ "epoch": 7.24,
6105
+ "learning_rate": 4.879555922504531e-05,
6106
+ "loss": 2.8722,
6107
+ "step": 503000
6108
+ },
6109
+ {
6110
+ "epoch": 7.25,
6111
+ "learning_rate": 4.874363248329257e-05,
6112
+ "loss": 2.8766,
6113
+ "step": 503500
6114
+ },
6115
+ {
6116
+ "epoch": 7.25,
6117
+ "learning_rate": 4.869170574153984e-05,
6118
+ "loss": 2.8786,
6119
+ "step": 504000
6120
+ },
6121
+ {
6122
+ "epoch": 7.26,
6123
+ "learning_rate": 4.86397789997871e-05,
6124
+ "loss": 2.8784,
6125
+ "step": 504500
6126
+ },
6127
+ {
6128
+ "epoch": 7.27,
6129
+ "learning_rate": 4.858785225803437e-05,
6130
+ "loss": 2.8774,
6131
+ "step": 505000
6132
+ },
6133
+ {
6134
+ "epoch": 7.28,
6135
+ "learning_rate": 4.853592551628163e-05,
6136
+ "loss": 2.8709,
6137
+ "step": 505500
6138
+ },
6139
+ {
6140
+ "epoch": 7.28,
6141
+ "learning_rate": 4.8484102628012403e-05,
6142
+ "loss": 2.8734,
6143
+ "step": 506000
6144
+ },
6145
+ {
6146
+ "epoch": 7.29,
6147
+ "learning_rate": 4.8432175886259665e-05,
6148
+ "loss": 2.874,
6149
+ "step": 506500
6150
+ },
6151
+ {
6152
+ "epoch": 7.3,
6153
+ "learning_rate": 4.838024914450693e-05,
6154
+ "loss": 2.8691,
6155
+ "step": 507000
6156
+ },
6157
+ {
6158
+ "epoch": 7.3,
6159
+ "learning_rate": 4.8328322402754195e-05,
6160
+ "loss": 2.8739,
6161
+ "step": 507500
6162
+ },
6163
+ {
6164
+ "epoch": 7.31,
6165
+ "learning_rate": 4.827649951448497e-05,
6166
+ "loss": 2.8743,
6167
+ "step": 508000
6168
+ },
6169
+ {
6170
+ "epoch": 7.32,
6171
+ "learning_rate": 4.8224676626215734e-05,
6172
+ "loss": 2.8738,
6173
+ "step": 508500
6174
+ },
6175
+ {
6176
+ "epoch": 7.33,
6177
+ "learning_rate": 4.8172749884463e-05,
6178
+ "loss": 2.8711,
6179
+ "step": 509000
6180
+ },
6181
+ {
6182
+ "epoch": 7.33,
6183
+ "learning_rate": 4.8120823142710264e-05,
6184
+ "loss": 2.8737,
6185
+ "step": 509500
6186
+ },
6187
+ {
6188
+ "epoch": 7.34,
6189
+ "learning_rate": 4.806889640095753e-05,
6190
+ "loss": 2.8775,
6191
+ "step": 510000
6192
+ },
6193
+ {
6194
+ "epoch": 7.35,
6195
+ "learning_rate": 4.8016969659204794e-05,
6196
+ "loss": 2.8719,
6197
+ "step": 510500
6198
+ },
6199
+ {
6200
+ "epoch": 7.36,
6201
+ "learning_rate": 4.796504291745206e-05,
6202
+ "loss": 2.8738,
6203
+ "step": 511000
6204
+ },
6205
+ {
6206
+ "epoch": 7.36,
6207
+ "learning_rate": 4.791322002918283e-05,
6208
+ "loss": 2.8789,
6209
+ "step": 511500
6210
+ },
6211
+ {
6212
+ "epoch": 7.37,
6213
+ "learning_rate": 4.7861293287430096e-05,
6214
+ "loss": 2.873,
6215
+ "step": 512000
6216
+ },
6217
+ {
6218
+ "epoch": 7.38,
6219
+ "learning_rate": 4.780936654567736e-05,
6220
+ "loss": 2.8702,
6221
+ "step": 512500
6222
+ },
6223
+ {
6224
+ "epoch": 7.38,
6225
+ "learning_rate": 4.7757439803924626e-05,
6226
+ "loss": 2.8784,
6227
+ "step": 513000
6228
+ },
6229
+ {
6230
+ "epoch": 7.39,
6231
+ "learning_rate": 4.7705513062171895e-05,
6232
+ "loss": 2.868,
6233
+ "step": 513500
6234
+ },
6235
+ {
6236
+ "epoch": 7.4,
6237
+ "learning_rate": 4.765358632041915e-05,
6238
+ "loss": 2.8754,
6239
+ "step": 514000
6240
+ },
6241
+ {
6242
+ "epoch": 7.41,
6243
+ "learning_rate": 4.760176343214993e-05,
6244
+ "loss": 2.8758,
6245
+ "step": 514500
6246
+ },
6247
+ {
6248
+ "epoch": 7.41,
6249
+ "learning_rate": 4.754983669039719e-05,
6250
+ "loss": 2.8736,
6251
+ "step": 515000
6252
+ },
6253
+ {
6254
+ "epoch": 7.42,
6255
+ "learning_rate": 4.749790994864445e-05,
6256
+ "loss": 2.8699,
6257
+ "step": 515500
6258
+ },
6259
+ {
6260
+ "epoch": 7.43,
6261
+ "learning_rate": 4.744598320689172e-05,
6262
+ "loss": 2.8705,
6263
+ "step": 516000
6264
+ },
6265
+ {
6266
+ "epoch": 7.43,
6267
+ "learning_rate": 4.739405646513899e-05,
6268
+ "loss": 2.8713,
6269
+ "step": 516500
6270
+ },
6271
+ {
6272
+ "epoch": 7.44,
6273
+ "learning_rate": 4.734212972338625e-05,
6274
+ "loss": 2.8763,
6275
+ "step": 517000
6276
+ },
6277
+ {
6278
+ "epoch": 7.45,
6279
+ "learning_rate": 4.729020298163351e-05,
6280
+ "loss": 2.8744,
6281
+ "step": 517500
6282
+ },
6283
+ {
6284
+ "epoch": 7.46,
6285
+ "learning_rate": 4.7238380093364285e-05,
6286
+ "loss": 2.8726,
6287
+ "step": 518000
6288
+ },
6289
+ {
6290
+ "epoch": 7.46,
6291
+ "learning_rate": 4.7186453351611546e-05,
6292
+ "loss": 2.8754,
6293
+ "step": 518500
6294
+ },
6295
+ {
6296
+ "epoch": 7.47,
6297
+ "learning_rate": 4.7134526609858815e-05,
6298
+ "loss": 2.8741,
6299
+ "step": 519000
6300
+ },
6301
+ {
6302
+ "epoch": 7.48,
6303
+ "learning_rate": 4.708259986810608e-05,
6304
+ "loss": 2.8763,
6305
+ "step": 519500
6306
+ },
6307
+ {
6308
+ "epoch": 7.48,
6309
+ "learning_rate": 4.7030673126353344e-05,
6310
+ "loss": 2.8752,
6311
+ "step": 520000
6312
+ },
6313
+ {
6314
+ "epoch": 7.49,
6315
+ "learning_rate": 4.6978746384600606e-05,
6316
+ "loss": 2.8681,
6317
+ "step": 520500
6318
+ },
6319
+ {
6320
+ "epoch": 7.5,
6321
+ "learning_rate": 4.6926819642847874e-05,
6322
+ "loss": 2.8696,
6323
+ "step": 521000
6324
+ },
6325
+ {
6326
+ "epoch": 7.51,
6327
+ "learning_rate": 4.6874892901095136e-05,
6328
+ "loss": 2.8672,
6329
+ "step": 521500
6330
+ },
6331
+ {
6332
+ "epoch": 7.51,
6333
+ "learning_rate": 4.6823173866309413e-05,
6334
+ "loss": 2.8729,
6335
+ "step": 522000
6336
+ },
6337
+ {
6338
+ "epoch": 7.52,
6339
+ "learning_rate": 4.6771247124556675e-05,
6340
+ "loss": 2.8745,
6341
+ "step": 522500
6342
+ },
6343
+ {
6344
+ "epoch": 7.53,
6345
+ "learning_rate": 4.671932038280394e-05,
6346
+ "loss": 2.8692,
6347
+ "step": 523000
6348
+ },
6349
+ {
6350
+ "epoch": 7.54,
6351
+ "learning_rate": 4.6667393641051205e-05,
6352
+ "loss": 2.8736,
6353
+ "step": 523500
6354
+ },
6355
+ {
6356
+ "epoch": 7.54,
6357
+ "learning_rate": 4.661546689929847e-05,
6358
+ "loss": 2.8702,
6359
+ "step": 524000
6360
+ },
6361
+ {
6362
+ "epoch": 7.55,
6363
+ "learning_rate": 4.6563540157545735e-05,
6364
+ "loss": 2.8747,
6365
+ "step": 524500
6366
+ },
6367
+ {
6368
+ "epoch": 7.56,
6369
+ "learning_rate": 4.6511613415793e-05,
6370
+ "loss": 2.8673,
6371
+ "step": 525000
6372
+ },
6373
+ {
6374
+ "epoch": 7.56,
6375
+ "learning_rate": 4.6459686674040264e-05,
6376
+ "loss": 2.8686,
6377
+ "step": 525500
6378
+ },
6379
+ {
6380
+ "epoch": 7.57,
6381
+ "learning_rate": 4.640786378577104e-05,
6382
+ "loss": 2.8671,
6383
+ "step": 526000
6384
+ },
6385
+ {
6386
+ "epoch": 7.58,
6387
+ "learning_rate": 4.63559370440183e-05,
6388
+ "loss": 2.8714,
6389
+ "step": 526500
6390
+ },
6391
+ {
6392
+ "epoch": 7.59,
6393
+ "learning_rate": 4.630401030226557e-05,
6394
+ "loss": 2.8695,
6395
+ "step": 527000
6396
+ },
6397
+ {
6398
+ "epoch": 7.59,
6399
+ "learning_rate": 4.6252083560512835e-05,
6400
+ "loss": 2.8713,
6401
+ "step": 527500
6402
+ },
6403
+ {
6404
+ "epoch": 7.6,
6405
+ "learning_rate": 4.62002606722436e-05,
6406
+ "loss": 2.8692,
6407
+ "step": 528000
6408
+ },
6409
+ {
6410
+ "epoch": 7.61,
6411
+ "learning_rate": 4.614833393049087e-05,
6412
+ "loss": 2.8701,
6413
+ "step": 528500
6414
+ },
6415
+ {
6416
+ "epoch": 7.61,
6417
+ "learning_rate": 4.6096511042221636e-05,
6418
+ "loss": 2.8685,
6419
+ "step": 529000
6420
+ },
6421
+ {
6422
+ "epoch": 7.62,
6423
+ "learning_rate": 4.6044584300468905e-05,
6424
+ "loss": 2.8695,
6425
+ "step": 529500
6426
+ },
6427
+ {
6428
+ "epoch": 7.63,
6429
+ "learning_rate": 4.599276141219967e-05,
6430
+ "loss": 2.8624,
6431
+ "step": 530000
6432
+ },
6433
+ {
6434
+ "epoch": 7.64,
6435
+ "learning_rate": 4.594083467044694e-05,
6436
+ "loss": 2.8658,
6437
+ "step": 530500
6438
+ },
6439
+ {
6440
+ "epoch": 7.64,
6441
+ "learning_rate": 4.58889079286942e-05,
6442
+ "loss": 2.8725,
6443
+ "step": 531000
6444
+ },
6445
+ {
6446
+ "epoch": 7.65,
6447
+ "learning_rate": 4.583698118694146e-05,
6448
+ "loss": 2.8695,
6449
+ "step": 531500
6450
+ },
6451
+ {
6452
+ "epoch": 7.66,
6453
+ "learning_rate": 4.578505444518873e-05,
6454
+ "loss": 2.862,
6455
+ "step": 532000
6456
+ },
6457
+ {
6458
+ "epoch": 7.66,
6459
+ "learning_rate": 4.5733127703436e-05,
6460
+ "loss": 2.8737,
6461
+ "step": 532500
6462
+ },
6463
+ {
6464
+ "epoch": 7.67,
6465
+ "learning_rate": 4.568120096168326e-05,
6466
+ "loss": 2.8715,
6467
+ "step": 533000
6468
+ },
6469
+ {
6470
+ "epoch": 7.68,
6471
+ "learning_rate": 4.562927421993052e-05,
6472
+ "loss": 2.8704,
6473
+ "step": 533500
6474
+ },
6475
+ {
6476
+ "epoch": 7.69,
6477
+ "learning_rate": 4.557734747817779e-05,
6478
+ "loss": 2.8641,
6479
+ "step": 534000
6480
+ },
6481
+ {
6482
+ "epoch": 7.69,
6483
+ "learning_rate": 4.552542073642506e-05,
6484
+ "loss": 2.8709,
6485
+ "step": 534500
6486
+ },
6487
+ {
6488
+ "epoch": 7.7,
6489
+ "learning_rate": 4.547349399467232e-05,
6490
+ "loss": 2.8693,
6491
+ "step": 535000
6492
+ },
6493
+ {
6494
+ "epoch": 7.71,
6495
+ "learning_rate": 4.542156725291958e-05,
6496
+ "loss": 2.8679,
6497
+ "step": 535500
6498
+ },
6499
+ {
6500
+ "epoch": 7.72,
6501
+ "learning_rate": 4.5369744364650354e-05,
6502
+ "loss": 2.8706,
6503
+ "step": 536000
6504
+ },
6505
+ {
6506
+ "epoch": 7.72,
6507
+ "learning_rate": 4.5317817622897616e-05,
6508
+ "loss": 2.8684,
6509
+ "step": 536500
6510
+ },
6511
+ {
6512
+ "epoch": 7.73,
6513
+ "learning_rate": 4.5265890881144884e-05,
6514
+ "loss": 2.8682,
6515
+ "step": 537000
6516
+ },
6517
+ {
6518
+ "epoch": 7.74,
6519
+ "learning_rate": 4.5213964139392146e-05,
6520
+ "loss": 2.8671,
6521
+ "step": 537500
6522
+ },
6523
+ {
6524
+ "epoch": 7.74,
6525
+ "learning_rate": 4.516214125112292e-05,
6526
+ "loss": 2.8749,
6527
+ "step": 538000
6528
+ },
6529
+ {
6530
+ "epoch": 7.75,
6531
+ "learning_rate": 4.511021450937018e-05,
6532
+ "loss": 2.8687,
6533
+ "step": 538500
6534
+ },
6535
+ {
6536
+ "epoch": 7.76,
6537
+ "learning_rate": 4.505828776761745e-05,
6538
+ "loss": 2.8699,
6539
+ "step": 539000
6540
+ },
6541
+ {
6542
+ "epoch": 7.77,
6543
+ "learning_rate": 4.500646487934822e-05,
6544
+ "loss": 2.8659,
6545
+ "step": 539500
6546
+ },
6547
+ {
6548
+ "epoch": 7.77,
6549
+ "learning_rate": 4.495453813759548e-05,
6550
+ "loss": 2.8665,
6551
+ "step": 540000
6552
+ },
6553
+ {
6554
+ "epoch": 7.78,
6555
+ "learning_rate": 4.4902611395842745e-05,
6556
+ "loss": 2.8668,
6557
+ "step": 540500
6558
+ },
6559
+ {
6560
+ "epoch": 7.79,
6561
+ "learning_rate": 4.485068465409001e-05,
6562
+ "loss": 2.872,
6563
+ "step": 541000
6564
+ },
6565
+ {
6566
+ "epoch": 7.79,
6567
+ "learning_rate": 4.4798757912337274e-05,
6568
+ "loss": 2.8627,
6569
+ "step": 541500
6570
+ },
6571
+ {
6572
+ "epoch": 7.8,
6573
+ "learning_rate": 4.474683117058454e-05,
6574
+ "loss": 2.8638,
6575
+ "step": 542000
6576
+ },
6577
+ {
6578
+ "epoch": 7.81,
6579
+ "learning_rate": 4.4694904428831804e-05,
6580
+ "loss": 2.8647,
6581
+ "step": 542500
6582
+ },
6583
+ {
6584
+ "epoch": 7.82,
6585
+ "learning_rate": 4.4642977687079066e-05,
6586
+ "loss": 2.8621,
6587
+ "step": 543000
6588
+ },
6589
+ {
6590
+ "epoch": 7.82,
6591
+ "learning_rate": 4.4591154798809845e-05,
6592
+ "loss": 2.8669,
6593
+ "step": 543500
6594
+ },
6595
+ {
6596
+ "epoch": 7.83,
6597
+ "learning_rate": 4.453922805705711e-05,
6598
+ "loss": 2.8648,
6599
+ "step": 544000
6600
+ },
6601
+ {
6602
+ "epoch": 7.84,
6603
+ "learning_rate": 4.448730131530437e-05,
6604
+ "loss": 2.8584,
6605
+ "step": 544500
6606
+ },
6607
+ {
6608
+ "epoch": 7.84,
6609
+ "learning_rate": 4.443537457355164e-05,
6610
+ "loss": 2.8608,
6611
+ "step": 545000
6612
+ },
6613
+ {
6614
+ "epoch": 7.85,
6615
+ "learning_rate": 4.43835516852824e-05,
6616
+ "loss": 2.8663,
6617
+ "step": 545500
6618
+ },
6619
+ {
6620
+ "epoch": 7.86,
6621
+ "learning_rate": 4.433162494352967e-05,
6622
+ "loss": 2.8612,
6623
+ "step": 546000
6624
+ },
6625
+ {
6626
+ "epoch": 7.87,
6627
+ "learning_rate": 4.427969820177694e-05,
6628
+ "loss": 2.8626,
6629
+ "step": 546500
6630
+ },
6631
+ {
6632
+ "epoch": 7.87,
6633
+ "learning_rate": 4.4227771460024194e-05,
6634
+ "loss": 2.8659,
6635
+ "step": 547000
6636
+ },
6637
+ {
6638
+ "epoch": 7.88,
6639
+ "learning_rate": 4.4175948571754974e-05,
6640
+ "loss": 2.8647,
6641
+ "step": 547500
6642
+ },
6643
+ {
6644
+ "epoch": 7.89,
6645
+ "learning_rate": 4.4124021830002236e-05,
6646
+ "loss": 2.8618,
6647
+ "step": 548000
6648
+ },
6649
+ {
6650
+ "epoch": 7.9,
6651
+ "learning_rate": 4.407219894173301e-05,
6652
+ "loss": 2.8597,
6653
+ "step": 548500
6654
+ },
6655
+ {
6656
+ "epoch": 7.9,
6657
+ "learning_rate": 4.402027219998027e-05,
6658
+ "loss": 2.8669,
6659
+ "step": 549000
6660
+ },
6661
+ {
6662
+ "epoch": 7.91,
6663
+ "learning_rate": 4.396834545822753e-05,
6664
+ "loss": 2.8605,
6665
+ "step": 549500
6666
+ },
6667
+ {
6668
+ "epoch": 7.92,
6669
+ "learning_rate": 4.39164187164748e-05,
6670
+ "loss": 2.8683,
6671
+ "step": 550000
6672
+ },
6673
+ {
6674
+ "epoch": 7.92,
6675
+ "learning_rate": 4.386449197472207e-05,
6676
+ "loss": 2.8648,
6677
+ "step": 550500
6678
+ },
6679
+ {
6680
+ "epoch": 7.93,
6681
+ "learning_rate": 4.381256523296933e-05,
6682
+ "loss": 2.8607,
6683
+ "step": 551000
6684
+ },
6685
+ {
6686
+ "epoch": 7.94,
6687
+ "learning_rate": 4.376063849121659e-05,
6688
+ "loss": 2.8627,
6689
+ "step": 551500
6690
+ },
6691
+ {
6692
+ "epoch": 7.95,
6693
+ "learning_rate": 4.370871174946386e-05,
6694
+ "loss": 2.8654,
6695
+ "step": 552000
6696
+ },
6697
+ {
6698
+ "epoch": 7.95,
6699
+ "learning_rate": 4.3656888861194626e-05,
6700
+ "loss": 2.8593,
6701
+ "step": 552500
6702
+ },
6703
+ {
6704
+ "epoch": 7.96,
6705
+ "learning_rate": 4.36050659729254e-05,
6706
+ "loss": 2.8665,
6707
+ "step": 553000
6708
+ },
6709
+ {
6710
+ "epoch": 7.97,
6711
+ "learning_rate": 4.355313923117266e-05,
6712
+ "loss": 2.8612,
6713
+ "step": 553500
6714
+ },
6715
+ {
6716
+ "epoch": 7.97,
6717
+ "learning_rate": 4.350121248941993e-05,
6718
+ "loss": 2.8664,
6719
+ "step": 554000
6720
+ },
6721
+ {
6722
+ "epoch": 7.98,
6723
+ "learning_rate": 4.34492857476672e-05,
6724
+ "loss": 2.8526,
6725
+ "step": 554500
6726
+ },
6727
+ {
6728
+ "epoch": 7.99,
6729
+ "learning_rate": 4.339735900591446e-05,
6730
+ "loss": 2.8589,
6731
+ "step": 555000
6732
+ },
6733
+ {
6734
+ "epoch": 8.0,
6735
+ "learning_rate": 4.334553611764523e-05,
6736
+ "loss": 2.8609,
6737
+ "step": 555500
6738
+ },
6739
+ {
6740
+ "epoch": 8.0,
6741
+ "eval_accuracy": 0.49625970451459295,
6742
+ "eval_loss": 2.6880686283111572,
6743
+ "eval_runtime": 555.8516,
6744
+ "eval_samples_per_second": 969.57,
6745
+ "eval_steps_per_second": 40.399,
6746
+ "step": 555784
6747
  }
6748
  ],
6749
  "max_steps": 972622,
6750
  "num_train_epochs": 14,
6751
+ "total_flos": 3.418009686270542e+18,
6752
  "trial_name": null,
6753
  "trial_params": null
6754
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6aac616f2daebd51c2a4b6b2899d528ddc70b1135c823a086dc3c0cfcafd543e
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08fa1d11b1e8d17acf511a08375f85229d0f5fbc9ac05460b78b3d6eae608ef7
3
  size 118242180
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e4d9f47cd0de1e9ade974451278e1f71b4d9f6452d123d6940cbc7a4b914674
3
- size 161149
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:066b48db75cbbc3697b8e7f9e071c6b08dc6498bd4e37d936177ef5dfe4b202f
3
+ size 183718