WellDunDun commited on
Commit
41e7827
·
verified ·
1 Parent(s): 8cf3c12

Training in progress, step 21000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:577e79f060d5a963f3cc3b4b34f8a7dd570497cde46b54c16168d2da07cf5ef7
3
  size 308136760
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2135ef466dfb3e2d5d3572e292dd4324ec591f57d775172ef8b6840f75dda4e8
3
  size 308136760
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7847e554bd29f7803bb720457749f96de17d2b67c9cd349261abd77a59ce34e
3
  size 615918027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe53cb029e96cca574386603a28d4ac8e28031f8553b1bac9f9faad9460286ab
3
  size 615918027
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:447ada4c90322dccefeca75496b1425410ec29eaffe6caf9584c9a488c113263
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7733a2f92829c6f2affe6fd0ac9e139536c282e141c026aa754b405cc7953510
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53bd263440b7410128d68768fe66b30a197c8b545827e425eb9c838fa3f9f999
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42c3ceb1cd051682bc26b7265f7ab65390e13753ebfee8b08559c90f7024d3cf
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76059481ee20fa0728a23bc290f1f2838e40eb8c9fd187e27811a2ba0a738bd3
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6825e6420671b3b2ba9ae06d59973a9cb825625a5cc4a35c14b31b038dd1ca67
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "best_global_step": 20000,
3
- "best_metric": 0.3491455018520355,
4
- "best_model_checkpoint": "./en-wal-checkpoints/checkpoint-20000",
5
- "epoch": 2.7925160569673277,
6
  "eval_steps": 1000,
7
- "global_step": 20000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1568,6 +1568,84 @@
1568
  "eval_samples_per_second": 359.865,
1569
  "eval_steps_per_second": 22.495,
1570
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1571
  }
1572
  ],
1573
  "logging_steps": 100,
@@ -1587,7 +1665,7 @@
1587
  "attributes": {}
1588
  }
1589
  },
1590
- "total_flos": 1.084645982601216e+16,
1591
  "train_batch_size": 16,
1592
  "trial_name": null,
1593
  "trial_params": null
 
1
  {
2
+ "best_global_step": 21000,
3
+ "best_metric": 0.3484770655632019,
4
+ "best_model_checkpoint": "./en-wal-checkpoints/checkpoint-21000",
5
+ "epoch": 2.932141859815694,
6
  "eval_steps": 1000,
7
+ "global_step": 21000,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1568
  "eval_samples_per_second": 359.865,
1569
  "eval_steps_per_second": 22.495,
1570
  "step": 20000
1571
+ },
1572
+ {
1573
+ "epoch": 2.806478637252164,
1574
+ "grad_norm": 1.2699772119522095,
1575
+ "learning_rate": 1.2910732570045612e-06,
1576
+ "loss": 0.3734,
1577
+ "step": 20100
1578
+ },
1579
+ {
1580
+ "epoch": 2.8204412175370006,
1581
+ "grad_norm": 1.3147704601287842,
1582
+ "learning_rate": 1.1979893884389836e-06,
1583
+ "loss": 0.3733,
1584
+ "step": 20200
1585
+ },
1586
+ {
1587
+ "epoch": 2.8344037978218375,
1588
+ "grad_norm": 1.1118375062942505,
1589
+ "learning_rate": 1.104905519873406e-06,
1590
+ "loss": 0.3726,
1591
+ "step": 20300
1592
+ },
1593
+ {
1594
+ "epoch": 2.8483663781066744,
1595
+ "grad_norm": 1.2976515293121338,
1596
+ "learning_rate": 1.0118216513078284e-06,
1597
+ "loss": 0.3748,
1598
+ "step": 20400
1599
+ },
1600
+ {
1601
+ "epoch": 2.862328958391511,
1602
+ "grad_norm": 1.276545763015747,
1603
+ "learning_rate": 9.187377827422509e-07,
1604
+ "loss": 0.3701,
1605
+ "step": 20500
1606
+ },
1607
+ {
1608
+ "epoch": 2.8762915386763472,
1609
+ "grad_norm": 1.0961774587631226,
1610
+ "learning_rate": 8.256539141766733e-07,
1611
+ "loss": 0.3764,
1612
+ "step": 20600
1613
+ },
1614
+ {
1615
+ "epoch": 2.890254118961184,
1616
+ "grad_norm": 0.902900755405426,
1617
+ "learning_rate": 7.325700456110957e-07,
1618
+ "loss": 0.3648,
1619
+ "step": 20700
1620
+ },
1621
+ {
1622
+ "epoch": 2.9042166992460205,
1623
+ "grad_norm": 1.2065845727920532,
1624
+ "learning_rate": 6.394861770455181e-07,
1625
+ "loss": 0.3549,
1626
+ "step": 20800
1627
+ },
1628
+ {
1629
+ "epoch": 2.9181792795308574,
1630
+ "grad_norm": 1.2781444787979126,
1631
+ "learning_rate": 5.464023084799405e-07,
1632
+ "loss": 0.3803,
1633
+ "step": 20900
1634
+ },
1635
+ {
1636
+ "epoch": 2.932141859815694,
1637
+ "grad_norm": 1.297359585762024,
1638
+ "learning_rate": 4.533184399143629e-07,
1639
+ "loss": 0.368,
1640
+ "step": 21000
1641
+ },
1642
+ {
1643
+ "epoch": 2.932141859815694,
1644
+ "eval_loss": 0.3484770655632019,
1645
+ "eval_runtime": 16.7265,
1646
+ "eval_samples_per_second": 360.567,
1647
+ "eval_steps_per_second": 22.539,
1648
+ "step": 21000
1649
  }
1650
  ],
1651
  "logging_steps": 100,
 
1665
  "attributes": {}
1666
  }
1667
  },
1668
+ "total_flos": 1.138883366486016e+16,
1669
  "train_batch_size": 16,
1670
  "trial_name": null,
1671
  "trial_params": null