schnell commited on
Commit
b802bbc
·
1 Parent(s): 33833a4

Training in progress, epoch 14

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd2f1419c45952d183434e915bc48855048f7e7221baef55ee75c2ec918ea8a9
3
  size 236469913
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f909ba46f0a48bc129a704d4f24cd179ebf4211739e8aa4c8a085db9561d2870
3
  size 236469913
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ec13bee1191623dd89f547f05ab2d8f9b47f3c9077b8d581674d22ee640388d
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1fe310f1f9e883c23d6725718feca058676978792d4f4de4c8dbbe2df9c17e
3
  size 118242180
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a47ecc6944cdde782b572b7a351e6a250bc225e29461826a5154c96dc4832901
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4b49c3d9f3fe57b8e1f1b69c51f85df70aed9ab2c7da43cab2725d7c8ce2a3
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b55971e7d5600536d0c32f692c32c0762404f751d6cde4f3d9062910e7ef1f7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ae897e79b48da1251412a7aa5215b19754cf4f6407daccf2cbcc48d6b54e5a8
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68c328438a7d04830fa9147610ceddf4844bd5db4e595b25c7f824689bdaa43d
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d33ec1b8096db043dca5e9616e048b70e27d0805be063f9fe36f9c6a19119ff8
3
  size 14503
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cfbfb8ae404ff126d864dc533080bc6f6ebba58e419973b1342f14752109cb0
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a0937b93f49d7efd83aa7fa86bb2e746d0887803a2bd13587895ce811116ea1
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87bfb5b4b88c1f3652dfbcbd080f3298e694046d1bf7d69c5b0519876a04d3f0
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9ecaac6bfa92bcf5ac19f559074b5567002f2b1aba65ed23951aa76e1f3154
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 13.0,
5
- "global_step": 903149,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -10959,11 +10959,854 @@
10959
  "eval_samples_per_second": 971.361,
10960
  "eval_steps_per_second": 40.474,
10961
  "step": 903149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10962
  }
10963
  ],
10964
  "max_steps": 972622,
10965
  "num_train_epochs": 14,
10966
- "total_flos": 5.554259622962921e+18,
10967
  "trial_name": null,
10968
  "trial_params": null
10969
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 14.0,
5
+ "global_step": 972622,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
10959
  "eval_samples_per_second": 971.361,
10960
  "eval_steps_per_second": 40.474,
10961
  "step": 903149
10962
+ },
10963
+ {
10964
+ "epoch": 13.01,
10965
+ "learning_rate": 7.218440224531231e-06,
10966
+ "loss": 2.7671,
10967
+ "step": 903500
10968
+ },
10969
+ {
10970
+ "epoch": 13.01,
10971
+ "learning_rate": 7.1665134827784964e-06,
10972
+ "loss": 2.7743,
10973
+ "step": 904000
10974
+ },
10975
+ {
10976
+ "epoch": 13.02,
10977
+ "learning_rate": 7.114586741025761e-06,
10978
+ "loss": 2.7692,
10979
+ "step": 904500
10980
+ },
10981
+ {
10982
+ "epoch": 13.03,
10983
+ "learning_rate": 7.062659999273026e-06,
10984
+ "loss": 2.7681,
10985
+ "step": 905000
10986
+ },
10987
+ {
10988
+ "epoch": 13.03,
10989
+ "learning_rate": 7.010733257520291e-06,
10990
+ "loss": 2.7708,
10991
+ "step": 905500
10992
+ },
10993
+ {
10994
+ "epoch": 13.04,
10995
+ "learning_rate": 6.958910369251061e-06,
10996
+ "loss": 2.7629,
10997
+ "step": 906000
10998
+ },
10999
+ {
11000
+ "epoch": 13.05,
11001
+ "learning_rate": 6.906983627498326e-06,
11002
+ "loss": 2.766,
11003
+ "step": 906500
11004
+ },
11005
+ {
11006
+ "epoch": 13.06,
11007
+ "learning_rate": 6.855056885745591e-06,
11008
+ "loss": 2.7656,
11009
+ "step": 907000
11010
+ },
11011
+ {
11012
+ "epoch": 13.06,
11013
+ "learning_rate": 6.8031301439928555e-06,
11014
+ "loss": 2.7697,
11015
+ "step": 907500
11016
+ },
11017
+ {
11018
+ "epoch": 13.07,
11019
+ "learning_rate": 6.7512034022401195e-06,
11020
+ "loss": 2.7668,
11021
+ "step": 908000
11022
+ },
11023
+ {
11024
+ "epoch": 13.08,
11025
+ "learning_rate": 6.6992766604873845e-06,
11026
+ "loss": 2.769,
11027
+ "step": 908500
11028
+ },
11029
+ {
11030
+ "epoch": 13.08,
11031
+ "learning_rate": 6.647453772218156e-06,
11032
+ "loss": 2.7645,
11033
+ "step": 909000
11034
+ },
11035
+ {
11036
+ "epoch": 13.09,
11037
+ "learning_rate": 6.595527030465419e-06,
11038
+ "loss": 2.7676,
11039
+ "step": 909500
11040
+ },
11041
+ {
11042
+ "epoch": 13.1,
11043
+ "learning_rate": 6.543600288712684e-06,
11044
+ "loss": 2.7681,
11045
+ "step": 910000
11046
+ },
11047
+ {
11048
+ "epoch": 13.11,
11049
+ "learning_rate": 6.491673546959949e-06,
11050
+ "loss": 2.7687,
11051
+ "step": 910500
11052
+ },
11053
+ {
11054
+ "epoch": 13.11,
11055
+ "learning_rate": 6.43985065869072e-06,
11056
+ "loss": 2.7705,
11057
+ "step": 911000
11058
+ },
11059
+ {
11060
+ "epoch": 13.12,
11061
+ "learning_rate": 6.387923916937983e-06,
11062
+ "loss": 2.7645,
11063
+ "step": 911500
11064
+ },
11065
+ {
11066
+ "epoch": 13.13,
11067
+ "learning_rate": 6.335997175185249e-06,
11068
+ "loss": 2.7686,
11069
+ "step": 912000
11070
+ },
11071
+ {
11072
+ "epoch": 13.13,
11073
+ "learning_rate": 6.284070433432514e-06,
11074
+ "loss": 2.7711,
11075
+ "step": 912500
11076
+ },
11077
+ {
11078
+ "epoch": 13.14,
11079
+ "learning_rate": 6.232143691679779e-06,
11080
+ "loss": 2.7682,
11081
+ "step": 913000
11082
+ },
11083
+ {
11084
+ "epoch": 13.15,
11085
+ "learning_rate": 6.1803208034105485e-06,
11086
+ "loss": 2.7709,
11087
+ "step": 913500
11088
+ },
11089
+ {
11090
+ "epoch": 13.16,
11091
+ "learning_rate": 6.128394061657813e-06,
11092
+ "loss": 2.7721,
11093
+ "step": 914000
11094
+ },
11095
+ {
11096
+ "epoch": 13.16,
11097
+ "learning_rate": 6.076467319905078e-06,
11098
+ "loss": 2.7701,
11099
+ "step": 914500
11100
+ },
11101
+ {
11102
+ "epoch": 13.17,
11103
+ "learning_rate": 6.024540578152342e-06,
11104
+ "loss": 2.766,
11105
+ "step": 915000
11106
+ },
11107
+ {
11108
+ "epoch": 13.18,
11109
+ "learning_rate": 5.972717689883113e-06,
11110
+ "loss": 2.7641,
11111
+ "step": 915500
11112
+ },
11113
+ {
11114
+ "epoch": 13.18,
11115
+ "learning_rate": 5.920790948130378e-06,
11116
+ "loss": 2.7665,
11117
+ "step": 916000
11118
+ },
11119
+ {
11120
+ "epoch": 13.19,
11121
+ "learning_rate": 5.868864206377643e-06,
11122
+ "loss": 2.768,
11123
+ "step": 916500
11124
+ },
11125
+ {
11126
+ "epoch": 13.2,
11127
+ "learning_rate": 5.8169374646249076e-06,
11128
+ "loss": 2.7703,
11129
+ "step": 917000
11130
+ },
11131
+ {
11132
+ "epoch": 13.21,
11133
+ "learning_rate": 5.7650107228721725e-06,
11134
+ "loss": 2.7685,
11135
+ "step": 917500
11136
+ },
11137
+ {
11138
+ "epoch": 13.21,
11139
+ "learning_rate": 5.7130839811194365e-06,
11140
+ "loss": 2.7681,
11141
+ "step": 918000
11142
+ },
11143
+ {
11144
+ "epoch": 13.22,
11145
+ "learning_rate": 5.661261092850207e-06,
11146
+ "loss": 2.7664,
11147
+ "step": 918500
11148
+ },
11149
+ {
11150
+ "epoch": 13.23,
11151
+ "learning_rate": 5.609334351097472e-06,
11152
+ "loss": 2.7648,
11153
+ "step": 919000
11154
+ },
11155
+ {
11156
+ "epoch": 13.24,
11157
+ "learning_rate": 5.557511462828242e-06,
11158
+ "loss": 2.7704,
11159
+ "step": 919500
11160
+ },
11161
+ {
11162
+ "epoch": 13.24,
11163
+ "learning_rate": 5.5055847210755064e-06,
11164
+ "loss": 2.7646,
11165
+ "step": 920000
11166
+ },
11167
+ {
11168
+ "epoch": 13.25,
11169
+ "learning_rate": 5.453657979322771e-06,
11170
+ "loss": 2.7614,
11171
+ "step": 920500
11172
+ },
11173
+ {
11174
+ "epoch": 13.26,
11175
+ "learning_rate": 5.401731237570037e-06,
11176
+ "loss": 2.7662,
11177
+ "step": 921000
11178
+ },
11179
+ {
11180
+ "epoch": 13.26,
11181
+ "learning_rate": 5.349804495817301e-06,
11182
+ "loss": 2.7648,
11183
+ "step": 921500
11184
+ },
11185
+ {
11186
+ "epoch": 13.27,
11187
+ "learning_rate": 5.297877754064566e-06,
11188
+ "loss": 2.7652,
11189
+ "step": 922000
11190
+ },
11191
+ {
11192
+ "epoch": 13.28,
11193
+ "learning_rate": 5.245951012311831e-06,
11194
+ "loss": 2.7639,
11195
+ "step": 922500
11196
+ },
11197
+ {
11198
+ "epoch": 13.29,
11199
+ "learning_rate": 5.194024270559095e-06,
11200
+ "loss": 2.7601,
11201
+ "step": 923000
11202
+ },
11203
+ {
11204
+ "epoch": 13.29,
11205
+ "learning_rate": 5.142097528806361e-06,
11206
+ "loss": 2.7665,
11207
+ "step": 923500
11208
+ },
11209
+ {
11210
+ "epoch": 13.3,
11211
+ "learning_rate": 5.090170787053625e-06,
11212
+ "loss": 2.7682,
11213
+ "step": 924000
11214
+ },
11215
+ {
11216
+ "epoch": 13.31,
11217
+ "learning_rate": 5.038347898784395e-06,
11218
+ "loss": 2.7652,
11219
+ "step": 924500
11220
+ },
11221
+ {
11222
+ "epoch": 13.31,
11223
+ "learning_rate": 4.98642115703166e-06,
11224
+ "loss": 2.7607,
11225
+ "step": 925000
11226
+ },
11227
+ {
11228
+ "epoch": 13.32,
11229
+ "learning_rate": 4.934494415278925e-06,
11230
+ "loss": 2.7694,
11231
+ "step": 925500
11232
+ },
11233
+ {
11234
+ "epoch": 13.33,
11235
+ "learning_rate": 4.882567673526189e-06,
11236
+ "loss": 2.7618,
11237
+ "step": 926000
11238
+ },
11239
+ {
11240
+ "epoch": 13.34,
11241
+ "learning_rate": 4.830640931773454e-06,
11242
+ "loss": 2.7679,
11243
+ "step": 926500
11244
+ },
11245
+ {
11246
+ "epoch": 13.34,
11247
+ "learning_rate": 4.778714190020719e-06,
11248
+ "loss": 2.7704,
11249
+ "step": 927000
11250
+ },
11251
+ {
11252
+ "epoch": 13.35,
11253
+ "learning_rate": 4.7268913017514894e-06,
11254
+ "loss": 2.7645,
11255
+ "step": 927500
11256
+ },
11257
+ {
11258
+ "epoch": 13.36,
11259
+ "learning_rate": 4.674964559998754e-06,
11260
+ "loss": 2.7672,
11261
+ "step": 928000
11262
+ },
11263
+ {
11264
+ "epoch": 13.36,
11265
+ "learning_rate": 4.623037818246019e-06,
11266
+ "loss": 2.7705,
11267
+ "step": 928500
11268
+ },
11269
+ {
11270
+ "epoch": 13.37,
11271
+ "learning_rate": 4.571111076493283e-06,
11272
+ "loss": 2.758,
11273
+ "step": 929000
11274
+ },
11275
+ {
11276
+ "epoch": 13.38,
11277
+ "learning_rate": 4.519184334740548e-06,
11278
+ "loss": 2.7695,
11279
+ "step": 929500
11280
+ },
11281
+ {
11282
+ "epoch": 13.39,
11283
+ "learning_rate": 4.467361446471319e-06,
11284
+ "loss": 2.768,
11285
+ "step": 930000
11286
+ },
11287
+ {
11288
+ "epoch": 13.39,
11289
+ "learning_rate": 4.415434704718583e-06,
11290
+ "loss": 2.7683,
11291
+ "step": 930500
11292
+ },
11293
+ {
11294
+ "epoch": 13.4,
11295
+ "learning_rate": 4.363507962965848e-06,
11296
+ "loss": 2.7695,
11297
+ "step": 931000
11298
+ },
11299
+ {
11300
+ "epoch": 13.41,
11301
+ "learning_rate": 4.311581221213113e-06,
11302
+ "loss": 2.7767,
11303
+ "step": 931500
11304
+ },
11305
+ {
11306
+ "epoch": 13.42,
11307
+ "learning_rate": 4.2596544794603774e-06,
11308
+ "loss": 2.7615,
11309
+ "step": 932000
11310
+ },
11311
+ {
11312
+ "epoch": 13.42,
11313
+ "learning_rate": 4.207727737707642e-06,
11314
+ "loss": 2.761,
11315
+ "step": 932500
11316
+ },
11317
+ {
11318
+ "epoch": 13.43,
11319
+ "learning_rate": 4.155800995954906e-06,
11320
+ "loss": 2.767,
11321
+ "step": 933000
11322
+ },
11323
+ {
11324
+ "epoch": 13.44,
11325
+ "learning_rate": 4.103978107685677e-06,
11326
+ "loss": 2.7671,
11327
+ "step": 933500
11328
+ },
11329
+ {
11330
+ "epoch": 13.44,
11331
+ "learning_rate": 4.052051365932942e-06,
11332
+ "loss": 2.7686,
11333
+ "step": 934000
11334
+ },
11335
+ {
11336
+ "epoch": 13.45,
11337
+ "learning_rate": 4.000124624180207e-06,
11338
+ "loss": 2.7656,
11339
+ "step": 934500
11340
+ },
11341
+ {
11342
+ "epoch": 13.46,
11343
+ "learning_rate": 3.948197882427472e-06,
11344
+ "loss": 2.7653,
11345
+ "step": 935000
11346
+ },
11347
+ {
11348
+ "epoch": 13.47,
11349
+ "learning_rate": 3.8962711406747365e-06,
11350
+ "loss": 2.7629,
11351
+ "step": 935500
11352
+ },
11353
+ {
11354
+ "epoch": 13.47,
11355
+ "learning_rate": 3.844448252405507e-06,
11356
+ "loss": 2.7645,
11357
+ "step": 936000
11358
+ },
11359
+ {
11360
+ "epoch": 13.48,
11361
+ "learning_rate": 3.792521510652771e-06,
11362
+ "loss": 2.7672,
11363
+ "step": 936500
11364
+ },
11365
+ {
11366
+ "epoch": 13.49,
11367
+ "learning_rate": 3.740594768900036e-06,
11368
+ "loss": 2.7698,
11369
+ "step": 937000
11370
+ },
11371
+ {
11372
+ "epoch": 13.49,
11373
+ "learning_rate": 3.6886680271473013e-06,
11374
+ "loss": 2.7685,
11375
+ "step": 937500
11376
+ },
11377
+ {
11378
+ "epoch": 13.5,
11379
+ "learning_rate": 3.6367412853945653e-06,
11380
+ "loss": 2.7602,
11381
+ "step": 938000
11382
+ },
11383
+ {
11384
+ "epoch": 13.51,
11385
+ "learning_rate": 3.5849183971253358e-06,
11386
+ "loss": 2.7635,
11387
+ "step": 938500
11388
+ },
11389
+ {
11390
+ "epoch": 13.52,
11391
+ "learning_rate": 3.5329916553726007e-06,
11392
+ "loss": 2.7602,
11393
+ "step": 939000
11394
+ },
11395
+ {
11396
+ "epoch": 13.52,
11397
+ "learning_rate": 3.481064913619865e-06,
11398
+ "loss": 2.7568,
11399
+ "step": 939500
11400
+ },
11401
+ {
11402
+ "epoch": 13.53,
11403
+ "learning_rate": 3.42913817186713e-06,
11404
+ "loss": 2.7642,
11405
+ "step": 940000
11406
+ },
11407
+ {
11408
+ "epoch": 13.54,
11409
+ "learning_rate": 3.377211430114395e-06,
11410
+ "loss": 2.7657,
11411
+ "step": 940500
11412
+ },
11413
+ {
11414
+ "epoch": 13.54,
11415
+ "learning_rate": 3.3252846883616595e-06,
11416
+ "loss": 2.7644,
11417
+ "step": 941000
11418
+ },
11419
+ {
11420
+ "epoch": 13.55,
11421
+ "learning_rate": 3.2734618000924295e-06,
11422
+ "loss": 2.765,
11423
+ "step": 941500
11424
+ },
11425
+ {
11426
+ "epoch": 13.56,
11427
+ "learning_rate": 3.221535058339695e-06,
11428
+ "loss": 2.7656,
11429
+ "step": 942000
11430
+ },
11431
+ {
11432
+ "epoch": 13.57,
11433
+ "learning_rate": 3.169608316586959e-06,
11434
+ "loss": 2.7593,
11435
+ "step": 942500
11436
+ },
11437
+ {
11438
+ "epoch": 13.57,
11439
+ "learning_rate": 3.1176815748342242e-06,
11440
+ "loss": 2.7662,
11441
+ "step": 943000
11442
+ },
11443
+ {
11444
+ "epoch": 13.58,
11445
+ "learning_rate": 3.0657548330814887e-06,
11446
+ "loss": 2.7595,
11447
+ "step": 943500
11448
+ },
11449
+ {
11450
+ "epoch": 13.59,
11451
+ "learning_rate": 3.013931944812259e-06,
11452
+ "loss": 2.7605,
11453
+ "step": 944000
11454
+ },
11455
+ {
11456
+ "epoch": 13.6,
11457
+ "learning_rate": 2.9620052030595237e-06,
11458
+ "loss": 2.7638,
11459
+ "step": 944500
11460
+ },
11461
+ {
11462
+ "epoch": 13.6,
11463
+ "learning_rate": 2.9100784613067886e-06,
11464
+ "loss": 2.7634,
11465
+ "step": 945000
11466
+ },
11467
+ {
11468
+ "epoch": 13.61,
11469
+ "learning_rate": 2.8581517195540535e-06,
11470
+ "loss": 2.7629,
11471
+ "step": 945500
11472
+ },
11473
+ {
11474
+ "epoch": 13.62,
11475
+ "learning_rate": 2.806224977801318e-06,
11476
+ "loss": 2.7662,
11477
+ "step": 946000
11478
+ },
11479
+ {
11480
+ "epoch": 13.62,
11481
+ "learning_rate": 2.754298236048583e-06,
11482
+ "loss": 2.7636,
11483
+ "step": 946500
11484
+ },
11485
+ {
11486
+ "epoch": 13.63,
11487
+ "learning_rate": 2.7023714942958473e-06,
11488
+ "loss": 2.7625,
11489
+ "step": 947000
11490
+ },
11491
+ {
11492
+ "epoch": 13.64,
11493
+ "learning_rate": 2.650548606026618e-06,
11494
+ "loss": 2.7606,
11495
+ "step": 947500
11496
+ },
11497
+ {
11498
+ "epoch": 13.65,
11499
+ "learning_rate": 2.5986218642738823e-06,
11500
+ "loss": 2.7685,
11501
+ "step": 948000
11502
+ },
11503
+ {
11504
+ "epoch": 13.65,
11505
+ "learning_rate": 2.546695122521147e-06,
11506
+ "loss": 2.7667,
11507
+ "step": 948500
11508
+ },
11509
+ {
11510
+ "epoch": 13.66,
11511
+ "learning_rate": 2.494768380768412e-06,
11512
+ "loss": 2.7613,
11513
+ "step": 949000
11514
+ },
11515
+ {
11516
+ "epoch": 13.67,
11517
+ "learning_rate": 2.4428416390156766e-06,
11518
+ "loss": 2.7675,
11519
+ "step": 949500
11520
+ },
11521
+ {
11522
+ "epoch": 13.67,
11523
+ "learning_rate": 2.391018750746447e-06,
11524
+ "loss": 2.766,
11525
+ "step": 950000
11526
+ },
11527
+ {
11528
+ "epoch": 13.68,
11529
+ "learning_rate": 2.3390920089937115e-06,
11530
+ "loss": 2.7623,
11531
+ "step": 950500
11532
+ },
11533
+ {
11534
+ "epoch": 13.69,
11535
+ "learning_rate": 2.287165267240977e-06,
11536
+ "loss": 2.7623,
11537
+ "step": 951000
11538
+ },
11539
+ {
11540
+ "epoch": 13.7,
11541
+ "learning_rate": 2.2352385254882413e-06,
11542
+ "loss": 2.7631,
11543
+ "step": 951500
11544
+ },
11545
+ {
11546
+ "epoch": 13.7,
11547
+ "learning_rate": 2.1833117837355062e-06,
11548
+ "loss": 2.7559,
11549
+ "step": 952000
11550
+ },
11551
+ {
11552
+ "epoch": 13.71,
11553
+ "learning_rate": 2.1313850419827707e-06,
11554
+ "loss": 2.7593,
11555
+ "step": 952500
11556
+ },
11557
+ {
11558
+ "epoch": 13.72,
11559
+ "learning_rate": 2.0794583002300356e-06,
11560
+ "loss": 2.7603,
11561
+ "step": 953000
11562
+ },
11563
+ {
11564
+ "epoch": 13.72,
11565
+ "learning_rate": 2.0276354119608057e-06,
11566
+ "loss": 2.7611,
11567
+ "step": 953500
11568
+ },
11569
+ {
11570
+ "epoch": 13.73,
11571
+ "learning_rate": 1.9757086702080706e-06,
11572
+ "loss": 2.7659,
11573
+ "step": 954000
11574
+ },
11575
+ {
11576
+ "epoch": 13.74,
11577
+ "learning_rate": 1.9237819284553355e-06,
11578
+ "loss": 2.7671,
11579
+ "step": 954500
11580
+ },
11581
+ {
11582
+ "epoch": 13.75,
11583
+ "learning_rate": 1.8718551867026e-06,
11584
+ "loss": 2.7695,
11585
+ "step": 955000
11586
+ },
11587
+ {
11588
+ "epoch": 13.75,
11589
+ "learning_rate": 1.8199284449498646e-06,
11590
+ "loss": 2.7619,
11591
+ "step": 955500
11592
+ },
11593
+ {
11594
+ "epoch": 13.76,
11595
+ "learning_rate": 1.7681055566806351e-06,
11596
+ "loss": 2.765,
11597
+ "step": 956000
11598
+ },
11599
+ {
11600
+ "epoch": 13.77,
11601
+ "learning_rate": 1.7161788149278996e-06,
11602
+ "loss": 2.7589,
11603
+ "step": 956500
11604
+ },
11605
+ {
11606
+ "epoch": 13.78,
11607
+ "learning_rate": 1.6642520731751647e-06,
11608
+ "loss": 2.7621,
11609
+ "step": 957000
11610
+ },
11611
+ {
11612
+ "epoch": 13.78,
11613
+ "learning_rate": 1.6123253314224294e-06,
11614
+ "loss": 2.7618,
11615
+ "step": 957500
11616
+ },
11617
+ {
11618
+ "epoch": 13.79,
11619
+ "learning_rate": 1.560398589669694e-06,
11620
+ "loss": 2.7611,
11621
+ "step": 958000
11622
+ },
11623
+ {
11624
+ "epoch": 13.8,
11625
+ "learning_rate": 1.5084718479169588e-06,
11626
+ "loss": 2.7601,
11627
+ "step": 958500
11628
+ },
11629
+ {
11630
+ "epoch": 13.8,
11631
+ "learning_rate": 1.456648959647729e-06,
11632
+ "loss": 2.7613,
11633
+ "step": 959000
11634
+ },
11635
+ {
11636
+ "epoch": 13.81,
11637
+ "learning_rate": 1.404722217894994e-06,
11638
+ "loss": 2.76,
11639
+ "step": 959500
11640
+ },
11641
+ {
11642
+ "epoch": 13.82,
11643
+ "learning_rate": 1.3527954761422584e-06,
11644
+ "loss": 2.7567,
11645
+ "step": 960000
11646
+ },
11647
+ {
11648
+ "epoch": 13.83,
11649
+ "learning_rate": 1.3008687343895233e-06,
11650
+ "loss": 2.7568,
11651
+ "step": 960500
11652
+ },
11653
+ {
11654
+ "epoch": 13.83,
11655
+ "learning_rate": 1.248941992636788e-06,
11656
+ "loss": 2.7613,
11657
+ "step": 961000
11658
+ },
11659
+ {
11660
+ "epoch": 13.84,
11661
+ "learning_rate": 1.197015250884053e-06,
11662
+ "loss": 2.7679,
11663
+ "step": 961500
11664
+ },
11665
+ {
11666
+ "epoch": 13.85,
11667
+ "learning_rate": 1.1450885091313176e-06,
11668
+ "loss": 2.7643,
11669
+ "step": 962000
11670
+ },
11671
+ {
11672
+ "epoch": 13.85,
11673
+ "learning_rate": 1.0931617673785823e-06,
11674
+ "loss": 2.7615,
11675
+ "step": 962500
11676
+ },
11677
+ {
11678
+ "epoch": 13.86,
11679
+ "learning_rate": 1.0413388791093526e-06,
11680
+ "loss": 2.7609,
11681
+ "step": 963000
11682
+ },
11683
+ {
11684
+ "epoch": 13.87,
11685
+ "learning_rate": 9.894121373566175e-07,
11686
+ "loss": 2.7618,
11687
+ "step": 963500
11688
+ },
11689
+ {
11690
+ "epoch": 13.88,
11691
+ "learning_rate": 9.375892490873875e-07,
11692
+ "loss": 2.7576,
11693
+ "step": 964000
11694
+ },
11695
+ {
11696
+ "epoch": 13.88,
11697
+ "learning_rate": 8.856625073346523e-07,
11698
+ "loss": 2.7619,
11699
+ "step": 964500
11700
+ },
11701
+ {
11702
+ "epoch": 13.89,
11703
+ "learning_rate": 8.33735765581917e-07,
11704
+ "loss": 2.7564,
11705
+ "step": 965000
11706
+ },
11707
+ {
11708
+ "epoch": 13.9,
11709
+ "learning_rate": 7.818090238291818e-07,
11710
+ "loss": 2.7631,
11711
+ "step": 965500
11712
+ },
11713
+ {
11714
+ "epoch": 13.9,
11715
+ "learning_rate": 7.298822820764466e-07,
11716
+ "loss": 2.7709,
11717
+ "step": 966000
11718
+ },
11719
+ {
11720
+ "epoch": 13.91,
11721
+ "learning_rate": 6.779555403237114e-07,
11722
+ "loss": 2.7624,
11723
+ "step": 966500
11724
+ },
11725
+ {
11726
+ "epoch": 13.92,
11727
+ "learning_rate": 6.261326520544816e-07,
11728
+ "loss": 2.7607,
11729
+ "step": 967000
11730
+ },
11731
+ {
11732
+ "epoch": 13.93,
11733
+ "learning_rate": 5.742059103017464e-07,
11734
+ "loss": 2.7559,
11735
+ "step": 967500
11736
+ },
11737
+ {
11738
+ "epoch": 13.93,
11739
+ "learning_rate": 5.222791685490112e-07,
11740
+ "loss": 2.7601,
11741
+ "step": 968000
11742
+ },
11743
+ {
11744
+ "epoch": 13.94,
11745
+ "learning_rate": 4.7035242679627586e-07,
11746
+ "loss": 2.7625,
11747
+ "step": 968500
11748
+ },
11749
+ {
11750
+ "epoch": 13.95,
11751
+ "learning_rate": 4.184256850435406e-07,
11752
+ "loss": 2.7585,
11753
+ "step": 969000
11754
+ },
11755
+ {
11756
+ "epoch": 13.96,
11757
+ "learning_rate": 3.666027967743108e-07,
11758
+ "loss": 2.7573,
11759
+ "step": 969500
11760
+ },
11761
+ {
11762
+ "epoch": 13.96,
11763
+ "learning_rate": 3.1467605502157556e-07,
11764
+ "loss": 2.7586,
11765
+ "step": 970000
11766
+ },
11767
+ {
11768
+ "epoch": 13.97,
11769
+ "learning_rate": 2.627493132688403e-07,
11770
+ "loss": 2.7611,
11771
+ "step": 970500
11772
+ },
11773
+ {
11774
+ "epoch": 13.98,
11775
+ "learning_rate": 2.1082257151610508e-07,
11776
+ "loss": 2.7612,
11777
+ "step": 971000
11778
+ },
11779
+ {
11780
+ "epoch": 13.98,
11781
+ "learning_rate": 1.5899968324687532e-07,
11782
+ "loss": 2.7539,
11783
+ "step": 971500
11784
+ },
11785
+ {
11786
+ "epoch": 13.99,
11787
+ "learning_rate": 1.0707294149414008e-07,
11788
+ "loss": 2.7624,
11789
+ "step": 972000
11790
+ },
11791
+ {
11792
+ "epoch": 14.0,
11793
+ "learning_rate": 5.5146199741404825e-08,
11794
+ "loss": 2.761,
11795
+ "step": 972500
11796
+ },
11797
+ {
11798
+ "epoch": 14.0,
11799
+ "eval_accuracy": 0.509524975254837,
11800
+ "eval_loss": 2.5919222831726074,
11801
+ "eval_runtime": 555.1881,
11802
+ "eval_samples_per_second": 970.729,
11803
+ "eval_steps_per_second": 40.448,
11804
+ "step": 972622
11805
  }
11806
  ],
11807
  "max_steps": 972622,
11808
  "num_train_epochs": 14,
11809
+ "total_flos": 5.9815806927215e+18,
11810
  "trial_name": null,
11811
  "trial_params": null
11812
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ec13bee1191623dd89f547f05ab2d8f9b47f3c9077b8d581674d22ee640388d
3
  size 118242180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae1fe310f1f9e883c23d6725718feca058676978792d4f4de4c8dbbe2df9c17e
3
  size 118242180
runs/Jul30_01-06-41_user-SYS-5049A-TR/events.out.tfevents.1659110814.user-SYS-5049A-TR.3945016.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8ec72c1e2fe2a362e45c53d1f17f2bc66e75e52404b7a8c7dc1900a55a00db23
3
- size 296563
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3beebb212def5ed2a93cacef0a8f337665578f45993672c54eb6b8189a7b4fc
3
+ size 319132