Nadav commited on
Commit
b0cea72
·
1 Parent(s): 9629aa1

Training in progress, step 65000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abf4891b4d7d549b998893b34a4d7fda74fe4d926dd8829b0fd1f3ebe9e05331
3
  size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:988099a377e3ae6ef89fd2f1f761be64fa6a19032354dab4bc5333d2740798f7
3
  size 893438545
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18a2bbc1e1a7ffff89dca088c920d657df7faf45dedf18ccd7ee5e4d0208668c
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f03d0e43e34a2e0bd81bc161f8b22fc6c59b8c9b7c6cda78db789b3b576cc9
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6674133d74344ac7df609ac13da3436a0f3992a815e6582851a68fd5a7ad5e18
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d496ee2298f65cd4dad9053d5676d64850869fdb37f1b20e4f79d1c4026aca1
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:337ac944c1300b5fad5b6313f7b0d4a29d6bf7750594a32866e96847ec6ec635
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9941c77ea6765c024840da9e5a9b406fef84b8e5ef3a55221f4dca4db4e76a10
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19d585522f5a4f4e47c69e3dfd034779da400358487b840d5af1cf4b0f318c1e
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de3c7b468f562a170fe98313b8778b6d4fc7ff5fb03b102a017eca4a43908ee2
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.103776794828173,
5
- "global_step": 60000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1206,11 +1206,111 @@
1206
  "eval_samples_per_second": 25.283,
1207
  "eval_steps_per_second": 0.809,
1208
  "step": 60000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1209
  }
1210
  ],
1211
  "max_steps": 1000000,
1212
  "num_train_epochs": 86,
1213
- "total_flos": 2.7601170332347956e+21,
1214
  "trial_name": null,
1215
  "trial_params": null
1216
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.529091527730521,
5
+ "global_step": 65000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1206
  "eval_samples_per_second": 25.283,
1207
  "eval_steps_per_second": 0.809,
1208
  "step": 60000
1209
+ },
1210
+ {
1211
+ "epoch": 5.15,
1212
+ "learning_rate": 9.999999999999999e-06,
1213
+ "loss": 0.4108,
1214
+ "step": 60500
1215
+ },
1216
+ {
1217
+ "epoch": 5.19,
1218
+ "learning_rate": 9.999999999999999e-06,
1219
+ "loss": 0.4094,
1220
+ "step": 61000
1221
+ },
1222
+ {
1223
+ "epoch": 5.19,
1224
+ "eval_loss": 0.3796501159667969,
1225
+ "eval_runtime": 18.1293,
1226
+ "eval_samples_per_second": 27.58,
1227
+ "eval_steps_per_second": 0.883,
1228
+ "step": 61000
1229
+ },
1230
+ {
1231
+ "epoch": 5.23,
1232
+ "learning_rate": 9.999999999999999e-06,
1233
+ "loss": 0.4092,
1234
+ "step": 61500
1235
+ },
1236
+ {
1237
+ "epoch": 5.27,
1238
+ "learning_rate": 9.999999999999999e-06,
1239
+ "loss": 0.4091,
1240
+ "step": 62000
1241
+ },
1242
+ {
1243
+ "epoch": 5.27,
1244
+ "eval_loss": 0.3790924549102783,
1245
+ "eval_runtime": 20.9048,
1246
+ "eval_samples_per_second": 23.918,
1247
+ "eval_steps_per_second": 0.765,
1248
+ "step": 62000
1249
+ },
1250
+ {
1251
+ "epoch": 5.32,
1252
+ "learning_rate": 9.999999999999999e-06,
1253
+ "loss": 0.408,
1254
+ "step": 62500
1255
+ },
1256
+ {
1257
+ "epoch": 5.36,
1258
+ "learning_rate": 9.999999999999999e-06,
1259
+ "loss": 0.4102,
1260
+ "step": 63000
1261
+ },
1262
+ {
1263
+ "epoch": 5.36,
1264
+ "eval_loss": 0.3805426061153412,
1265
+ "eval_runtime": 27.4404,
1266
+ "eval_samples_per_second": 18.221,
1267
+ "eval_steps_per_second": 0.583,
1268
+ "step": 63000
1269
+ },
1270
+ {
1271
+ "epoch": 5.4,
1272
+ "learning_rate": 9.999999999999999e-06,
1273
+ "loss": 0.4086,
1274
+ "step": 63500
1275
+ },
1276
+ {
1277
+ "epoch": 5.44,
1278
+ "learning_rate": 9.999999999999999e-06,
1279
+ "loss": 0.4087,
1280
+ "step": 64000
1281
+ },
1282
+ {
1283
+ "epoch": 5.44,
1284
+ "eval_loss": 0.37830984592437744,
1285
+ "eval_runtime": 14.8851,
1286
+ "eval_samples_per_second": 33.591,
1287
+ "eval_steps_per_second": 1.075,
1288
+ "step": 64000
1289
+ },
1290
+ {
1291
+ "epoch": 5.49,
1292
+ "learning_rate": 9.999999999999999e-06,
1293
+ "loss": 0.4081,
1294
+ "step": 64500
1295
+ },
1296
+ {
1297
+ "epoch": 5.53,
1298
+ "learning_rate": 9.999999999999999e-06,
1299
+ "loss": 0.4083,
1300
+ "step": 65000
1301
+ },
1302
+ {
1303
+ "epoch": 5.53,
1304
+ "eval_loss": 0.3796636164188385,
1305
+ "eval_runtime": 17.3567,
1306
+ "eval_samples_per_second": 28.807,
1307
+ "eval_steps_per_second": 0.922,
1308
+ "step": 65000
1309
  }
1310
  ],
1311
  "max_steps": 1000000,
1312
  "num_train_epochs": 86,
1313
+ "total_flos": 2.990133517637586e+21,
1314
  "trial_name": null,
1315
  "trial_params": null
1316
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18a2bbc1e1a7ffff89dca088c920d657df7faf45dedf18ccd7ee5e4d0208668c
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f03d0e43e34a2e0bd81bc161f8b22fc6c59b8c9b7c6cda78db789b3b576cc9
3
  size 449471589