ljcamargo commited on
Commit
4a7702d
·
verified ·
1 Parent(s): fafbd62

Training in progress, step 2250, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f413611c4d62c42b34cb3c35bf307f1f004d85a195b49625f53cb045532d3640
3
  size 3237818848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:806718b1eb03b182c4243b5852d730ab0623fae273359ceac4fe27a42cd0a775
3
  size 3237818848
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3f4fe7007d84dd8d6957cbfe09cbddc20609770c2a83ca6a81bf721da4dd27d
3
  size 2062251569
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db7a8720fe6eca8fdfeb84875333805ad66f043fc4a4f42dec7e8bfbf28079eb
3
  size 2062251569
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e677751a73856bc26d9944f5cb47e64e748585d8eec5698550c17847fccdcc5
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcd869f950beffeea433b31c1771e6c791cdf18f5aa8f339dfd15edc2ed1949
3
  size 14645
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f13dd54935d4d1876d05824ed5aab8e787b691f2aec583b5a7e328fd2bead633
3
  size 1383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a918540d48e856372c74bc697e7d28a86a7fdf24471d9a4adf2ffeeae4217207
3
  size 1383
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecb7b6032736995012c1953174826b2661afb8ec0cbb267bc342afa03a626fe2
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:867aa6395462cd9ef6621775b2ee589702b29a904be428c06029ad86f199f393
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.9333333333333333,
6
  "eval_steps": 300,
7
- "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1490,6 +1490,111 @@
1490
  "learning_rate": 2.3952935202260608e-06,
1491
  "loss": 0.8618,
1492
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1493
  }
1494
  ],
1495
  "logging_steps": 10,
@@ -1504,12 +1609,12 @@
1504
  "should_evaluate": false,
1505
  "should_log": false,
1506
  "should_save": true,
1507
- "should_training_stop": false
1508
  },
1509
  "attributes": {}
1510
  }
1511
  },
1512
- "total_flos": 5.7286358728704e+19,
1513
  "train_batch_size": 4,
1514
  "trial_name": null,
1515
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0,
6
  "eval_steps": 300,
7
+ "global_step": 2250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1490
  "learning_rate": 2.3952935202260608e-06,
1491
  "loss": 0.8618,
1492
  "step": 2100
1493
+ },
1494
+ {
1495
+ "epoch": 0.9377777777777778,
1496
+ "grad_norm": 7.140369892120361,
1497
+ "learning_rate": 2.095372194305578e-06,
1498
+ "loss": 0.8829,
1499
+ "step": 2110
1500
+ },
1501
+ {
1502
+ "epoch": 0.9422222222222222,
1503
+ "grad_norm": 8.534485816955566,
1504
+ "learning_rate": 1.815306541298023e-06,
1505
+ "loss": 0.8757,
1506
+ "step": 2120
1507
+ },
1508
+ {
1509
+ "epoch": 0.9466666666666667,
1510
+ "grad_norm": 10.03408145904541,
1511
+ "learning_rate": 1.55515336027533e-06,
1512
+ "loss": 0.8699,
1513
+ "step": 2130
1514
+ },
1515
+ {
1516
+ "epoch": 0.9511111111111111,
1517
+ "grad_norm": 10.744848251342773,
1518
+ "learning_rate": 1.3149654119351874e-06,
1519
+ "loss": 0.9025,
1520
+ "step": 2140
1521
+ },
1522
+ {
1523
+ "epoch": 0.9555555555555556,
1524
+ "grad_norm": 12.541043281555176,
1525
+ "learning_rate": 1.0947914079008193e-06,
1526
+ "loss": 0.9111,
1527
+ "step": 2150
1528
+ },
1529
+ {
1530
+ "epoch": 0.96,
1531
+ "grad_norm": 9.694021224975586,
1532
+ "learning_rate": 8.946760008419208e-07,
1533
+ "loss": 0.8739,
1534
+ "step": 2160
1535
+ },
1536
+ {
1537
+ "epoch": 0.9644444444444444,
1538
+ "grad_norm": 8.234989166259766,
1539
+ "learning_rate": 7.146597754188578e-07,
1540
+ "loss": 0.8864,
1541
+ "step": 2170
1542
+ },
1543
+ {
1544
+ "epoch": 0.9688888888888889,
1545
+ "grad_norm": 8.109750747680664,
1546
+ "learning_rate": 5.547792400518171e-07,
1547
+ "loss": 0.8951,
1548
+ "step": 2180
1549
+ },
1550
+ {
1551
+ "epoch": 0.9733333333333334,
1552
+ "grad_norm": 5.8855180740356445,
1553
+ "learning_rate": 4.150668195166962e-07,
1554
+ "loss": 0.9224,
1555
+ "step": 2190
1556
+ },
1557
+ {
1558
+ "epoch": 0.9777777777777777,
1559
+ "grad_norm": 6.274494171142578,
1560
+ "learning_rate": 2.955508483691416e-07,
1561
+ "loss": 0.867,
1562
+ "step": 2200
1563
+ },
1564
+ {
1565
+ "epoch": 0.9822222222222222,
1566
+ "grad_norm": 9.63640022277832,
1567
+ "learning_rate": 1.9625556519811128e-07,
1568
+ "loss": 0.8781,
1569
+ "step": 2210
1570
+ },
1571
+ {
1572
+ "epoch": 0.9866666666666667,
1573
+ "grad_norm": 8.013216018676758,
1574
+ "learning_rate": 1.1720110771015202e-07,
1575
+ "loss": 0.9555,
1576
+ "step": 2220
1577
+ },
1578
+ {
1579
+ "epoch": 0.9911111111111112,
1580
+ "grad_norm": 11.773499488830566,
1581
+ "learning_rate": 5.840350864533273e-08,
1582
+ "loss": 0.8768,
1583
+ "step": 2230
1584
+ },
1585
+ {
1586
+ "epoch": 0.9955555555555555,
1587
+ "grad_norm": 6.226689338684082,
1588
+ "learning_rate": 1.9874692525745365e-08,
1589
+ "loss": 0.8743,
1590
+ "step": 2240
1591
+ },
1592
+ {
1593
+ "epoch": 1.0,
1594
+ "grad_norm": 6.99469518661499,
1595
+ "learning_rate": 1.6224732370728746e-09,
1596
+ "loss": 0.8964,
1597
+ "step": 2250
1598
  }
1599
  ],
1600
  "logging_steps": 10,
 
1609
  "should_evaluate": false,
1610
  "should_log": false,
1611
  "should_save": true,
1612
+ "should_training_stop": true
1613
  },
1614
  "attributes": {}
1615
  }
1616
  },
1617
+ "total_flos": 6.137653654388736e+19,
1618
  "train_batch_size": 4,
1619
  "trial_name": null,
1620
  "trial_params": null