jflotz commited on
Commit
93b6b1f
·
1 Parent(s): 772cf88

Training in progress, step 80000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c390c19518ce2505f6a86ec1e2f47d0d41f2396b4d8d3b3070b634f49cd1065d
3
  size 50044689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a30131a362734b1f942b271b7aca1732aab5c321492d56bcb252385a39f1b68d
3
  size 50044689
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fec60f3309de39871877c86c47238ea77b026c2a586d001d07ccd4e052fc5ce
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcbde3d756560955fbfc66085573e8eef27e478ece9f50dd82562f9061c96fc6
3
  size 25761253
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:456f6e75773c2103858da7be2712eb3dc4e020101d1bbc59dc5323b907e2cfa7
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79323be1553d545e96629f1619fb64d1cbed0d647260d3d32b2ad240f882494f
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d89de13f8c844c8306762a31aff6dac5ffaadd95c6501762d83dc7939ed9eace
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a3f000e1c939912bb1fea4c3fda9ca6e80fe563342ccf4a59cb3d0b46d78187
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.122769450392577,
5
- "global_step": 70000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1406,11 +1406,211 @@
1406
  "eval_samples_per_second": 1067.405,
1407
  "eval_steps_per_second": 16.729,
1408
  "step": 70000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1409
  }
1410
  ],
1411
  "max_steps": 250000,
1412
  "num_train_epochs": 12,
1413
- "total_flos": 1.1211546241141079e+21,
1414
  "trial_name": null,
1415
  "trial_params": null
1416
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.5688793718772307,
5
+ "global_step": 80000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1406
  "eval_samples_per_second": 1067.405,
1407
  "eval_steps_per_second": 16.729,
1408
  "step": 70000
1409
+ },
1410
+ {
1411
+ "epoch": 3.15,
1412
+ "learning_rate": 0.0005173557775693715,
1413
+ "loss": 0.4528,
1414
+ "step": 70500
1415
+ },
1416
+ {
1417
+ "epoch": 3.17,
1418
+ "learning_rate": 0.0005159968305957235,
1419
+ "loss": 0.4507,
1420
+ "step": 71000
1421
+ },
1422
+ {
1423
+ "epoch": 3.17,
1424
+ "eval_loss": 0.42597973346710205,
1425
+ "eval_runtime": 2.1768,
1426
+ "eval_samples_per_second": 1055.242,
1427
+ "eval_steps_per_second": 16.538,
1428
+ "step": 71000
1429
+ },
1430
+ {
1431
+ "epoch": 3.19,
1432
+ "learning_rate": 0.0005146286539371703,
1433
+ "loss": 0.4489,
1434
+ "step": 71500
1435
+ },
1436
+ {
1437
+ "epoch": 3.21,
1438
+ "learning_rate": 0.0005132513074421913,
1439
+ "loss": 0.4468,
1440
+ "step": 72000
1441
+ },
1442
+ {
1443
+ "epoch": 3.21,
1444
+ "eval_loss": 0.4192256033420563,
1445
+ "eval_runtime": 2.2165,
1446
+ "eval_samples_per_second": 1036.301,
1447
+ "eval_steps_per_second": 16.242,
1448
+ "step": 72000
1449
+ },
1450
+ {
1451
+ "epoch": 3.23,
1452
+ "learning_rate": 0.0005118648513603841,
1453
+ "loss": 0.445,
1454
+ "step": 72500
1455
+ },
1456
+ {
1457
+ "epoch": 3.26,
1458
+ "learning_rate": 0.0005104693463398293,
1459
+ "loss": 0.4432,
1460
+ "step": 73000
1461
+ },
1462
+ {
1463
+ "epoch": 3.26,
1464
+ "eval_loss": 0.4161074459552765,
1465
+ "eval_runtime": 2.2735,
1466
+ "eval_samples_per_second": 1010.341,
1467
+ "eval_steps_per_second": 15.835,
1468
+ "step": 73000
1469
+ },
1470
+ {
1471
+ "epoch": 3.28,
1472
+ "learning_rate": 0.0005090648534244371,
1473
+ "loss": 0.4415,
1474
+ "step": 73500
1475
+ },
1476
+ {
1477
+ "epoch": 3.3,
1478
+ "learning_rate": 0.0005076514340512776,
1479
+ "loss": 0.44,
1480
+ "step": 74000
1481
+ },
1482
+ {
1483
+ "epoch": 3.3,
1484
+ "eval_loss": 0.41528481245040894,
1485
+ "eval_runtime": 2.1341,
1486
+ "eval_samples_per_second": 1076.31,
1487
+ "eval_steps_per_second": 16.869,
1488
+ "step": 74000
1489
+ },
1490
+ {
1491
+ "epoch": 3.32,
1492
+ "learning_rate": 0.0005062291500478931,
1493
+ "loss": 0.4386,
1494
+ "step": 74500
1495
+ },
1496
+ {
1497
+ "epoch": 3.35,
1498
+ "learning_rate": 0.0005047980636295937,
1499
+ "loss": 0.4367,
1500
+ "step": 75000
1501
+ },
1502
+ {
1503
+ "epoch": 3.35,
1504
+ "eval_loss": 0.4101610779762268,
1505
+ "eval_runtime": 2.2225,
1506
+ "eval_samples_per_second": 1033.52,
1507
+ "eval_steps_per_second": 16.198,
1508
+ "step": 75000
1509
+ },
1510
+ {
1511
+ "epoch": 3.37,
1512
+ "learning_rate": 0.0005033582373967348,
1513
+ "loss": 0.4351,
1514
+ "step": 75500
1515
+ },
1516
+ {
1517
+ "epoch": 3.39,
1518
+ "learning_rate": 0.0005019097343319809,
1519
+ "loss": 0.4337,
1520
+ "step": 76000
1521
+ },
1522
+ {
1523
+ "epoch": 3.39,
1524
+ "eval_loss": 0.40619799494743347,
1525
+ "eval_runtime": 2.1631,
1526
+ "eval_samples_per_second": 1061.919,
1527
+ "eval_steps_per_second": 16.643,
1528
+ "step": 76000
1529
+ },
1530
+ {
1531
+ "epoch": 3.41,
1532
+ "learning_rate": 0.0005004526177975481,
1533
+ "loss": 0.4322,
1534
+ "step": 76500
1535
+ },
1536
+ {
1537
+ "epoch": 3.44,
1538
+ "learning_rate": 0.0004989869515324342,
1539
+ "loss": 0.4311,
1540
+ "step": 77000
1541
+ },
1542
+ {
1543
+ "epoch": 3.44,
1544
+ "eval_loss": 0.4019022583961487,
1545
+ "eval_runtime": 2.2694,
1546
+ "eval_samples_per_second": 1012.161,
1547
+ "eval_steps_per_second": 15.863,
1548
+ "step": 77000
1549
+ },
1550
+ {
1551
+ "epoch": 3.46,
1552
+ "learning_rate": 0.0004975127996496297,
1553
+ "loss": 0.4294,
1554
+ "step": 77500
1555
+ },
1556
+ {
1557
+ "epoch": 3.48,
1558
+ "learning_rate": 0.0004960302266333135,
1559
+ "loss": 0.4286,
1560
+ "step": 78000
1561
+ },
1562
+ {
1563
+ "epoch": 3.48,
1564
+ "eval_loss": 0.40067020058631897,
1565
+ "eval_runtime": 2.31,
1566
+ "eval_samples_per_second": 994.37,
1567
+ "eval_steps_per_second": 15.584,
1568
+ "step": 78000
1569
+ },
1570
+ {
1571
+ "epoch": 3.5,
1572
+ "learning_rate": 0.0004945392973360323,
1573
+ "loss": 0.427,
1574
+ "step": 78500
1575
+ },
1576
+ {
1577
+ "epoch": 3.52,
1578
+ "learning_rate": 0.0004930400769758634,
1579
+ "loss": 0.4259,
1580
+ "step": 79000
1581
+ },
1582
+ {
1583
+ "epoch": 3.52,
1584
+ "eval_loss": 0.3996644616127014,
1585
+ "eval_runtime": 2.1788,
1586
+ "eval_samples_per_second": 1054.255,
1587
+ "eval_steps_per_second": 16.523,
1588
+ "step": 79000
1589
+ },
1590
+ {
1591
+ "epoch": 3.55,
1592
+ "learning_rate": 0.0004915326311335622,
1593
+ "loss": 0.425,
1594
+ "step": 79500
1595
+ },
1596
+ {
1597
+ "epoch": 3.57,
1598
+ "learning_rate": 0.0004900170257496933,
1599
+ "loss": 0.4239,
1600
+ "step": 80000
1601
+ },
1602
+ {
1603
+ "epoch": 3.57,
1604
+ "eval_loss": 0.3968483507633209,
1605
+ "eval_runtime": 2.2257,
1606
+ "eval_samples_per_second": 1032.02,
1607
+ "eval_steps_per_second": 16.174,
1608
+ "step": 80000
1609
  }
1610
  ],
1611
  "max_steps": 250000,
1612
  "num_train_epochs": 12,
1613
+ "total_flos": 1.281322626979912e+21,
1614
  "trial_name": null,
1615
  "trial_params": null
1616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fec60f3309de39871877c86c47238ea77b026c2a586d001d07ccd4e052fc5ce
3
  size 25761253
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcbde3d756560955fbfc66085573e8eef27e478ece9f50dd82562f9061c96fc6
3
  size 25761253