ljcamargo commited on
Commit
5d206af
·
verified ·
1 Parent(s): d6c7989

Training in progress, step 2300, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dc14ab8f69ec65e7855224cd4b5b2dac50c96b2c4e24065af51a27d34a401e3
3
  size 3826461296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dfeffd5b70b240cf2e2d2863da88eee1aa83f787540436f9ab733c8f7de2bec
3
  size 3826461296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:befb290c48abb8a194408bc7e0220009e932a229a7b61920a999600f5d482797
3
  size 2479955235
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40948a2d1802a19363fb766e414eb283cbde5c248e2166918a98f5c11d88b13c
3
  size 2479955235
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a1d273ddb38f082f32c6d628e3728d6e9b70d01320b1dc05757751f2141f652
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c615193f9ae8a09e4b00ab7eadeaae7a871753d733db99177a1dcef6ff1f33b3
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95ba5207589489cdf07784253d958ae087f570555b90d46b2edd73dcec34d4f5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b73cf65e0ad93ae93850e87c8976f968fc0604f876f55ae715db103a1834c6d
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.88,
6
  "eval_steps": 500,
7
- "global_step": 2200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1548,6 +1548,76 @@
1548
  "learning_rate": 6.129421221864952e-06,
1549
  "loss": 0.2943,
1550
  "step": 2200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1551
  }
1552
  ],
1553
  "logging_steps": 10,
@@ -1567,7 +1637,7 @@
1567
  "attributes": {}
1568
  }
1569
  },
1570
- "total_flos": 3.972002478026342e+16,
1571
  "train_batch_size": 2,
1572
  "trial_name": null,
1573
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.92,
6
  "eval_steps": 500,
7
+ "global_step": 2300,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1548
  "learning_rate": 6.129421221864952e-06,
1549
  "loss": 0.2943,
1550
  "step": 2200
1551
+ },
1552
+ {
1553
+ "epoch": 0.884,
1554
+ "grad_norm": 5.587230682373047,
1555
+ "learning_rate": 5.928456591639871e-06,
1556
+ "loss": 0.2124,
1557
+ "step": 2210
1558
+ },
1559
+ {
1560
+ "epoch": 0.888,
1561
+ "grad_norm": 12.111313819885254,
1562
+ "learning_rate": 5.727491961414791e-06,
1563
+ "loss": 0.4733,
1564
+ "step": 2220
1565
+ },
1566
+ {
1567
+ "epoch": 0.892,
1568
+ "grad_norm": 13.287424087524414,
1569
+ "learning_rate": 5.526527331189711e-06,
1570
+ "loss": 0.3033,
1571
+ "step": 2230
1572
+ },
1573
+ {
1574
+ "epoch": 0.896,
1575
+ "grad_norm": 16.977447509765625,
1576
+ "learning_rate": 5.325562700964631e-06,
1577
+ "loss": 0.3256,
1578
+ "step": 2240
1579
+ },
1580
+ {
1581
+ "epoch": 0.9,
1582
+ "grad_norm": 6.201746463775635,
1583
+ "learning_rate": 5.12459807073955e-06,
1584
+ "loss": 0.1995,
1585
+ "step": 2250
1586
+ },
1587
+ {
1588
+ "epoch": 0.904,
1589
+ "grad_norm": 10.706707954406738,
1590
+ "learning_rate": 4.92363344051447e-06,
1591
+ "loss": 0.3303,
1592
+ "step": 2260
1593
+ },
1594
+ {
1595
+ "epoch": 0.908,
1596
+ "grad_norm": 14.989971160888672,
1597
+ "learning_rate": 4.7226688102893895e-06,
1598
+ "loss": 0.2842,
1599
+ "step": 2270
1600
+ },
1601
+ {
1602
+ "epoch": 0.912,
1603
+ "grad_norm": 15.570676803588867,
1604
+ "learning_rate": 4.521704180064309e-06,
1605
+ "loss": 0.212,
1606
+ "step": 2280
1607
+ },
1608
+ {
1609
+ "epoch": 0.916,
1610
+ "grad_norm": 3.9141690731048584,
1611
+ "learning_rate": 4.320739549839229e-06,
1612
+ "loss": 0.263,
1613
+ "step": 2290
1614
+ },
1615
+ {
1616
+ "epoch": 0.92,
1617
+ "grad_norm": 6.531361103057861,
1618
+ "learning_rate": 4.119774919614148e-06,
1619
+ "loss": 0.2503,
1620
+ "step": 2300
1621
  }
1622
  ],
1623
  "logging_steps": 10,
 
1637
  "attributes": {}
1638
  }
1639
  },
1640
+ "total_flos": 4.150543064863334e+16,
1641
  "train_batch_size": 2,
1642
  "trial_name": null,
1643
  "trial_params": null