Nadav commited on
Commit
6c68d02
·
1 Parent(s): 05c83f2

Training in progress, step 80000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:94c42a01b552543e62a3944c3187b9f2d5e5e9bc4078dc1a017356ad2f7dc004
3
  size 893439185
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddeabb9ead685c5e5b416b4a981e11a787d94773db5c89384835f8ea6b2e1c4
3
  size 893439185
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ad209d089c4737fe935a630c9bedf910da9516f82f0d1d863ad0c840e926ffa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98dacb4579cc2ad8d273f8bac5c3977b66490dafa2ef002d312807df6670d4cd
3
  size 449471589
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19a6bae5be3d123d40565035db3477df4ba95d519abfda9013e7b522c94cbc53
3
  size 15523
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79b99893c9b02fa371856f47a0ec288962436c435769873b9db7532898348d23
3
  size 15523
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:134046d1fefbeeafed77a6a1deaac9436322f5bf920a26c3d8ece634cccfe3e9
3
  size 559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e8d48d0939e6a173efa67076df748028040c8480b0133ac53f27544c88363a
3
  size 559
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be8308decf673b01fae26328e1125d69008a8fd59ea958321bb3e87e2ca217de
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71d5e106c9d23676ccfa26cba844ba11ee123c667bd6da5c807ecc94bb13e886
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.379720993535216,
5
- "global_step": 75000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1506,11 +1506,111 @@
1506
  "eval_samples_per_second": 21.395,
1507
  "eval_steps_per_second": 0.685,
1508
  "step": 75000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1509
  }
1510
  ],
1511
  "max_steps": 1000000,
1512
  "num_train_epochs": 86,
1513
- "total_flos": 3.450150330523429e+21,
1514
  "trial_name": null,
1515
  "trial_params": null
1516
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.805035726437564,
5
+ "global_step": 80000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1506
  "eval_samples_per_second": 21.395,
1507
  "eval_steps_per_second": 0.685,
1508
  "step": 75000
1509
+ },
1510
+ {
1511
+ "epoch": 6.42,
1512
+ "learning_rate": 9.999999999999999e-06,
1513
+ "loss": 0.406,
1514
+ "step": 75500
1515
+ },
1516
+ {
1517
+ "epoch": 6.46,
1518
+ "learning_rate": 9.999999999999999e-06,
1519
+ "loss": 0.4054,
1520
+ "step": 76000
1521
+ },
1522
+ {
1523
+ "epoch": 6.46,
1524
+ "eval_loss": 0.3762701749801636,
1525
+ "eval_runtime": 15.2662,
1526
+ "eval_samples_per_second": 32.752,
1527
+ "eval_steps_per_second": 1.048,
1528
+ "step": 76000
1529
+ },
1530
+ {
1531
+ "epoch": 6.51,
1532
+ "learning_rate": 9.999999999999999e-06,
1533
+ "loss": 0.4047,
1534
+ "step": 76500
1535
+ },
1536
+ {
1537
+ "epoch": 6.55,
1538
+ "learning_rate": 9.999999999999999e-06,
1539
+ "loss": 0.4043,
1540
+ "step": 77000
1541
+ },
1542
+ {
1543
+ "epoch": 6.55,
1544
+ "eval_loss": 0.3773665130138397,
1545
+ "eval_runtime": 23.0339,
1546
+ "eval_samples_per_second": 21.707,
1547
+ "eval_steps_per_second": 0.695,
1548
+ "step": 77000
1549
+ },
1550
+ {
1551
+ "epoch": 6.59,
1552
+ "learning_rate": 9.999999999999999e-06,
1553
+ "loss": 0.4041,
1554
+ "step": 77500
1555
+ },
1556
+ {
1557
+ "epoch": 6.63,
1558
+ "learning_rate": 9.999999999999999e-06,
1559
+ "loss": 0.4044,
1560
+ "step": 78000
1561
+ },
1562
+ {
1563
+ "epoch": 6.63,
1564
+ "eval_loss": 0.3738757371902466,
1565
+ "eval_runtime": 16.5496,
1566
+ "eval_samples_per_second": 30.212,
1567
+ "eval_steps_per_second": 0.967,
1568
+ "step": 78000
1569
+ },
1570
+ {
1571
+ "epoch": 6.68,
1572
+ "learning_rate": 9.999999999999999e-06,
1573
+ "loss": 0.4038,
1574
+ "step": 78500
1575
+ },
1576
+ {
1577
+ "epoch": 6.72,
1578
+ "learning_rate": 9.999999999999999e-06,
1579
+ "loss": 0.4038,
1580
+ "step": 79000
1581
+ },
1582
+ {
1583
+ "epoch": 6.72,
1584
+ "eval_loss": 0.37452879548072815,
1585
+ "eval_runtime": 16.7684,
1586
+ "eval_samples_per_second": 29.818,
1587
+ "eval_steps_per_second": 0.954,
1588
+ "step": 79000
1589
+ },
1590
+ {
1591
+ "epoch": 6.76,
1592
+ "learning_rate": 9.999999999999999e-06,
1593
+ "loss": 0.4039,
1594
+ "step": 79500
1595
+ },
1596
+ {
1597
+ "epoch": 6.81,
1598
+ "learning_rate": 9.999999999999999e-06,
1599
+ "loss": 0.4045,
1600
+ "step": 80000
1601
+ },
1602
+ {
1603
+ "epoch": 6.81,
1604
+ "eval_loss": 0.3761942684650421,
1605
+ "eval_runtime": 16.6694,
1606
+ "eval_samples_per_second": 29.995,
1607
+ "eval_steps_per_second": 0.96,
1608
+ "step": 80000
1609
  }
1610
  ],
1611
  "max_steps": 1000000,
1612
  "num_train_epochs": 86,
1613
+ "total_flos": 3.680166814926219e+21,
1614
  "trial_name": null,
1615
  "trial_params": null
1616
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9ad209d089c4737fe935a630c9bedf910da9516f82f0d1d863ad0c840e926ffa
3
  size 449471589
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98dacb4579cc2ad8d273f8bac5c3977b66490dafa2ef002d312807df6670d4cd
3
  size 449471589