Nadav commited on
Commit
0906fc4
·
1 Parent(s): b0cea72

Training in progress, step 70000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:988099a377e3ae6ef89fd2f1f761be64fa6a19032354dab4bc5333d2740798f7
3
- size 893438545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058e818841dd5d8551bdb702bf79c2919a18608fe8d04e481344ab6a9d437053
3
+ size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65f03d0e43e34a2e0bd81bc161f8b22fc6c59b8c9b7c6cda78db789b3b576cc9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2aad4c0bf3fb0ed60689c461b583204b374f775ac8f21901c8280987af4fc28
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d496ee2298f65cd4dad9053d5676d64850869fdb37f1b20e4f79d1c4026aca1
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4674bdfc2111c4be2d53fed27a1a59d3f54df49666b8123e70941223006d6b1b
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9941c77ea6765c024840da9e5a9b406fef84b8e5ef3a55221f4dca4db4e76a10
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e6a5cc3a60f7141da7f18b69a3a13de9f897b3f440b905c0f70fda71c091a5
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de3c7b468f562a170fe98313b8778b6d4fc7ff5fb03b102a017eca4a43908ee2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2a44e614402fafbcde75b77e1a195abb137e3c2b1958a6d75361c958ced8160
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.529091527730521,
5
- "global_step": 65000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1306,11 +1306,111 @@
1306
  "eval_samples_per_second": 28.807,
1307
  "eval_steps_per_second": 0.922,
1308
  "step": 65000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1309
  }
1310
  ],
1311
  "max_steps": 1000000,
1312
  "num_train_epochs": 86,
1313
- "total_flos": 2.990133517637586e+21,
1314
  "trial_name": null,
1315
  "trial_params": null
1316
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.954406260632869,
5
+ "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1306
  "eval_samples_per_second": 28.807,
1307
  "eval_steps_per_second": 0.922,
1308
  "step": 65000
1309
+ },
1310
+ {
1311
+ "epoch": 5.57,
1312
+ "learning_rate": 9.999999999999999e-06,
1313
+ "loss": 0.4078,
1314
+ "step": 65500
1315
+ },
1316
+ {
1317
+ "epoch": 5.61,
1318
+ "learning_rate": 9.999999999999999e-06,
1319
+ "loss": 0.4078,
1320
+ "step": 66000
1321
+ },
1322
+ {
1323
+ "epoch": 5.61,
1324
+ "eval_loss": 0.3783106803894043,
1325
+ "eval_runtime": 29.6676,
1326
+ "eval_samples_per_second": 16.853,
1327
+ "eval_steps_per_second": 0.539,
1328
+ "step": 66000
1329
+ },
1330
+ {
1331
+ "epoch": 5.66,
1332
+ "learning_rate": 9.999999999999999e-06,
1333
+ "loss": 0.4067,
1334
+ "step": 66500
1335
+ },
1336
+ {
1337
+ "epoch": 5.7,
1338
+ "learning_rate": 9.999999999999999e-06,
1339
+ "loss": 0.4072,
1340
+ "step": 67000
1341
+ },
1342
+ {
1343
+ "epoch": 5.7,
1344
+ "eval_loss": 0.3780921399593353,
1345
+ "eval_runtime": 15.2739,
1346
+ "eval_samples_per_second": 32.736,
1347
+ "eval_steps_per_second": 1.048,
1348
+ "step": 67000
1349
+ },
1350
+ {
1351
+ "epoch": 5.74,
1352
+ "learning_rate": 9.999999999999999e-06,
1353
+ "loss": 0.4079,
1354
+ "step": 67500
1355
+ },
1356
+ {
1357
+ "epoch": 5.78,
1358
+ "learning_rate": 9.999999999999999e-06,
1359
+ "loss": 0.4057,
1360
+ "step": 68000
1361
+ },
1362
+ {
1363
+ "epoch": 5.78,
1364
+ "eval_loss": 0.37833890318870544,
1365
+ "eval_runtime": 17.1263,
1366
+ "eval_samples_per_second": 29.195,
1367
+ "eval_steps_per_second": 0.934,
1368
+ "step": 68000
1369
+ },
1370
+ {
1371
+ "epoch": 5.83,
1372
+ "learning_rate": 9.999999999999999e-06,
1373
+ "loss": 0.406,
1374
+ "step": 68500
1375
+ },
1376
+ {
1377
+ "epoch": 5.87,
1378
+ "learning_rate": 9.999999999999999e-06,
1379
+ "loss": 0.4065,
1380
+ "step": 69000
1381
+ },
1382
+ {
1383
+ "epoch": 5.87,
1384
+ "eval_loss": 0.37815991044044495,
1385
+ "eval_runtime": 19.0772,
1386
+ "eval_samples_per_second": 26.209,
1387
+ "eval_steps_per_second": 0.839,
1388
+ "step": 69000
1389
+ },
1390
+ {
1391
+ "epoch": 5.91,
1392
+ "learning_rate": 9.999999999999999e-06,
1393
+ "loss": 0.4063,
1394
+ "step": 69500
1395
+ },
1396
+ {
1397
+ "epoch": 5.95,
1398
+ "learning_rate": 9.999999999999999e-06,
1399
+ "loss": 0.4062,
1400
+ "step": 70000
1401
+ },
1402
+ {
1403
+ "epoch": 5.95,
1404
+ "eval_loss": 0.3770570158958435,
1405
+ "eval_runtime": 15.6266,
1406
+ "eval_samples_per_second": 31.997,
1407
+ "eval_steps_per_second": 1.024,
1408
+ "step": 70000
1409
  }
1410
  ],
1411
  "max_steps": 1000000,
1412
  "num_train_epochs": 86,
1413
+ "total_flos": 3.2201500020403764e+21,
1414
  "trial_name": null,
1415
  "trial_params": null
1416
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65f03d0e43e34a2e0bd81bc161f8b22fc6c59b8c9b7c6cda78db789b3b576cc9
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2aad4c0bf3fb0ed60689c461b583204b374f775ac8f21901c8280987af4fc28
3
  size 449471589