Nadav commited on
Commit
05c83f2
·
1 Parent(s): 0906fc4

Training in progress, step 75000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:058e818841dd5d8551bdb702bf79c2919a18608fe8d04e481344ab6a9d437053
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94c42a01b552543e62a3944c3187b9f2d5e5e9bc4078dc1a017356ad2f7dc004
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2aad4c0bf3fb0ed60689c461b583204b374f775ac8f21901c8280987af4fc28
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad209d089c4737fe935a630c9bedf910da9516f82f0d1d863ad0c840e926ffa
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4674bdfc2111c4be2d53fed27a1a59d3f54df49666b8123e70941223006d6b1b
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19a6bae5be3d123d40565035db3477df4ba95d519abfda9013e7b522c94cbc53
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:62e6a5cc3a60f7141da7f18b69a3a13de9f897b3f440b905c0f70fda71c091a5
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:134046d1fefbeeafed77a6a1deaac9436322f5bf920a26c3d8ece634cccfe3e9
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2a44e614402fafbcde75b77e1a195abb137e3c2b1958a6d75361c958ced8160
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be8308decf673b01fae26328e1125d69008a8fd59ea958321bb3e87e2ca217de
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.954406260632869,
5
- "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1406,11 +1406,111 @@
1406
  "eval_samples_per_second": 31.997,
1407
  "eval_steps_per_second": 1.024,
1408
  "step": 70000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1409
  }
1410
  ],
1411
  "max_steps": 1000000,
1412
  "num_train_epochs": 86,
1413
- "total_flos": 3.2201500020403764e+21,
1414
  "trial_name": null,
1415
  "trial_params": null
1416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.379720993535216,
5
+ "global_step": 75000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1406
  "eval_samples_per_second": 31.997,
1407
  "eval_steps_per_second": 1.024,
1408
  "step": 70000
1409
+ },
1410
+ {
1411
+ "epoch": 6.0,
1412
+ "learning_rate": 9.999999999999999e-06,
1413
+ "loss": 0.4073,
1414
+ "step": 70500
1415
+ },
1416
+ {
1417
+ "epoch": 6.04,
1418
+ "learning_rate": 9.999999999999999e-06,
1419
+ "loss": 0.4051,
1420
+ "step": 71000
1421
+ },
1422
+ {
1423
+ "epoch": 6.04,
1424
+ "eval_loss": 0.3775251507759094,
1425
+ "eval_runtime": 16.0318,
1426
+ "eval_samples_per_second": 31.188,
1427
+ "eval_steps_per_second": 0.998,
1428
+ "step": 71000
1429
+ },
1430
+ {
1431
+ "epoch": 6.08,
1432
+ "learning_rate": 9.999999999999999e-06,
1433
+ "loss": 0.4066,
1434
+ "step": 71500
1435
+ },
1436
+ {
1437
+ "epoch": 6.12,
1438
+ "learning_rate": 9.999999999999999e-06,
1439
+ "loss": 0.4057,
1440
+ "step": 72000
1441
+ },
1442
+ {
1443
+ "epoch": 6.12,
1444
+ "eval_loss": 0.37701237201690674,
1445
+ "eval_runtime": 15.6982,
1446
+ "eval_samples_per_second": 31.851,
1447
+ "eval_steps_per_second": 1.019,
1448
+ "step": 72000
1449
+ },
1450
+ {
1451
+ "epoch": 6.17,
1452
+ "learning_rate": 9.999999999999999e-06,
1453
+ "loss": 0.4067,
1454
+ "step": 72500
1455
+ },
1456
+ {
1457
+ "epoch": 6.21,
1458
+ "learning_rate": 9.999999999999999e-06,
1459
+ "loss": 0.4061,
1460
+ "step": 73000
1461
+ },
1462
+ {
1463
+ "epoch": 6.21,
1464
+ "eval_loss": 0.37806421518325806,
1465
+ "eval_runtime": 15.7852,
1466
+ "eval_samples_per_second": 31.675,
1467
+ "eval_steps_per_second": 1.014,
1468
+ "step": 73000
1469
+ },
1470
+ {
1471
+ "epoch": 6.25,
1472
+ "learning_rate": 9.999999999999999e-06,
1473
+ "loss": 0.4053,
1474
+ "step": 73500
1475
+ },
1476
+ {
1477
+ "epoch": 6.29,
1478
+ "learning_rate": 9.999999999999999e-06,
1479
+ "loss": 0.405,
1480
+ "step": 74000
1481
+ },
1482
+ {
1483
+ "epoch": 6.29,
1484
+ "eval_loss": 0.3771826922893524,
1485
+ "eval_runtime": 15.5158,
1486
+ "eval_samples_per_second": 32.225,
1487
+ "eval_steps_per_second": 1.031,
1488
+ "step": 74000
1489
+ },
1490
+ {
1491
+ "epoch": 6.34,
1492
+ "learning_rate": 9.999999999999999e-06,
1493
+ "loss": 0.4064,
1494
+ "step": 74500
1495
+ },
1496
+ {
1497
+ "epoch": 6.38,
1498
+ "learning_rate": 9.999999999999999e-06,
1499
+ "loss": 0.4053,
1500
+ "step": 75000
1501
+ },
1502
+ {
1503
+ "epoch": 6.38,
1504
+ "eval_loss": 0.377290278673172,
1505
+ "eval_runtime": 23.3698,
1506
+ "eval_samples_per_second": 21.395,
1507
+ "eval_steps_per_second": 0.685,
1508
+ "step": 75000
1509
  }
1510
  ],
1511
  "max_steps": 1000000,
1512
  "num_train_epochs": 86,
1513
+ "total_flos": 3.450150330523429e+21,
1514
  "trial_name": null,
1515
  "trial_params": null
1516
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2aad4c0bf3fb0ed60689c461b583204b374f775ac8f21901c8280987af4fc28
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ad209d089c4737fe935a630c9bedf910da9516f82f0d1d863ad0c840e926ffa
3
  size 449471589