Nadav commited on
Commit
a01686a
·
1 Parent(s): 6a219ab

Training in progress, step 100000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0946b4af042d0003be776279dd8dec310d85422ca12c683daea7163fdc14f58f
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10ce2f9aa9d5627458e5c26c0c8c1f447307afa57aad3f361bb6eeb7f577533f
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:886c14bc146e35ddb8b43369943fdd5ae148eef1a89bed83206fbfc7c2a327ef
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6ee7a4f983bd49941cf45454fc26eedf52c2df44891b73ef7378899f037b91
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3ab37f55295b05256b0aeab0c9118d1e91676146a55f40c6459b8bea2a5ccfb
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e63f9b2799078475c6fffe20b9cb0c8afc8f9b68799aea89478ba54d8b5f9a
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fd1d30ec6a85b35de8f6aedadaea7484a7f1c0d94891107a63ad388e3781007
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96c80f44742a8b293db0b2ae4807850db50b76531a4a9c1e49d9708ec4e2c11e
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f61de41cc564c25462ca0290993ffba1d92f72b28f091680e90a006d4be7a958
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbda746b03062a6d3e95bcd90c26d65c91a4557dd86831395f573a0b3bba8af0
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 7.8376730819472264,
5
- "global_step": 90000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1230,11 +1230,147 @@
1230
  "eval_samples_per_second": 64.646,
1231
  "eval_steps_per_second": 1.021,
1232
  "step": 90000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1233
  }
1234
  ],
1235
  "max_steps": 100000,
1236
  "num_train_epochs": 9,
1237
- "total_flos": 4.238631778453049e+21,
1238
  "trial_name": null,
1239
  "trial_params": null
1240
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.70852564660803,
5
+ "global_step": 100000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1230
  "eval_samples_per_second": 64.646,
1231
  "eval_steps_per_second": 1.021,
1232
  "step": 90000
1233
+ },
1234
+ {
1235
+ "epoch": 7.88,
1236
+ "learning_rate": 1.20196015019827e-05,
1237
+ "loss": 0.3811,
1238
+ "step": 90500
1239
+ },
1240
+ {
1241
+ "epoch": 7.92,
1242
+ "learning_rate": 1.1814648074431392e-05,
1243
+ "loss": 0.3802,
1244
+ "step": 91000
1245
+ },
1246
+ {
1247
+ "epoch": 7.97,
1248
+ "learning_rate": 1.1620446367145134e-05,
1249
+ "loss": 0.3808,
1250
+ "step": 91500
1251
+ },
1252
+ {
1253
+ "epoch": 8.01,
1254
+ "learning_rate": 1.1437400292071077e-05,
1255
+ "loss": 0.3807,
1256
+ "step": 92000
1257
+ },
1258
+ {
1259
+ "epoch": 8.06,
1260
+ "learning_rate": 1.1265155682572145e-05,
1261
+ "loss": 0.3799,
1262
+ "step": 92500
1263
+ },
1264
+ {
1265
+ "epoch": 8.1,
1266
+ "learning_rate": 1.1103442333401839e-05,
1267
+ "loss": 0.3794,
1268
+ "step": 93000
1269
+ },
1270
+ {
1271
+ "epoch": 8.14,
1272
+ "learning_rate": 1.0952656183871172e-05,
1273
+ "loss": 0.3796,
1274
+ "step": 93500
1275
+ },
1276
+ {
1277
+ "epoch": 8.19,
1278
+ "learning_rate": 1.0812834438206289e-05,
1279
+ "loss": 0.3805,
1280
+ "step": 94000
1281
+ },
1282
+ {
1283
+ "epoch": 8.23,
1284
+ "learning_rate": 1.0684011595330725e-05,
1285
+ "loss": 0.3814,
1286
+ "step": 94500
1287
+ },
1288
+ {
1289
+ "epoch": 8.27,
1290
+ "learning_rate": 1.0566443996265343e-05,
1291
+ "loss": 0.3809,
1292
+ "step": 95000
1293
+ },
1294
+ {
1295
+ "epoch": 8.27,
1296
+ "eval_loss": 0.3615710437297821,
1297
+ "eval_runtime": 63.8854,
1298
+ "eval_samples_per_second": 78.265,
1299
+ "eval_steps_per_second": 1.237,
1300
+ "step": 95000
1301
+ },
1302
+ {
1303
+ "epoch": 8.32,
1304
+ "learning_rate": 1.0459689445942448e-05,
1305
+ "loss": 0.3801,
1306
+ "step": 95500
1307
+ },
1308
+ {
1309
+ "epoch": 8.36,
1310
+ "learning_rate": 1.036402093165149e-05,
1311
+ "loss": 0.3808,
1312
+ "step": 96000
1313
+ },
1314
+ {
1315
+ "epoch": 8.4,
1316
+ "learning_rate": 1.0279462058166865e-05,
1317
+ "loss": 0.3796,
1318
+ "step": 96500
1319
+ },
1320
+ {
1321
+ "epoch": 8.45,
1322
+ "learning_rate": 1.0206033689125313e-05,
1323
+ "loss": 0.379,
1324
+ "step": 97000
1325
+ },
1326
+ {
1327
+ "epoch": 8.49,
1328
+ "learning_rate": 1.0143753941878168e-05,
1329
+ "loss": 0.3782,
1330
+ "step": 97500
1331
+ },
1332
+ {
1333
+ "epoch": 8.53,
1334
+ "learning_rate": 1.0092638183021144e-05,
1335
+ "loss": 0.3797,
1336
+ "step": 98000
1337
+ },
1338
+ {
1339
+ "epoch": 8.58,
1340
+ "learning_rate": 1.0052699024602892e-05,
1341
+ "loss": 0.3793,
1342
+ "step": 98500
1343
+ },
1344
+ {
1345
+ "epoch": 8.62,
1346
+ "learning_rate": 1.0023946321013112e-05,
1347
+ "loss": 0.3799,
1348
+ "step": 99000
1349
+ },
1350
+ {
1351
+ "epoch": 8.66,
1352
+ "learning_rate": 1.0006387166551193e-05,
1353
+ "loss": 0.3795,
1354
+ "step": 99500
1355
+ },
1356
+ {
1357
+ "epoch": 8.71,
1358
+ "learning_rate": 1.0000025893675746e-05,
1359
+ "loss": 0.3796,
1360
+ "step": 100000
1361
+ },
1362
+ {
1363
+ "epoch": 8.71,
1364
+ "eval_loss": 0.36024102568626404,
1365
+ "eval_runtime": 72.9389,
1366
+ "eval_samples_per_second": 68.551,
1367
+ "eval_steps_per_second": 1.083,
1368
+ "step": 100000
1369
  }
1370
  ],
1371
  "max_steps": 100000,
1372
  "num_train_epochs": 9,
1373
+ "total_flos": 4.7095831368807633e+21,
1374
  "trial_name": null,
1375
  "trial_params": null
1376
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:886c14bc146e35ddb8b43369943fdd5ae148eef1a89bed83206fbfc7c2a327ef
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6ee7a4f983bd49941cf45454fc26eedf52c2df44891b73ef7378899f037b91
3
  size 449471589