irishprancer commited on
Commit
10912f5
·
verified ·
1 Parent(s): 648aa09

Training in progress, step 1650, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be8a45329c3e8e3a2f45c77d7ac9080fd0aad7fa0e5966556ccab7ad8fa2f098
3
  size 527048968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e34a2741903c7131a591ab1ea100bd60a9f729205b2327f5d897a43dd1a350df
3
  size 527048968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1178946cf0771c850e2a43f81c79255d308890814174c3df8070c8810c0eba4c
3
  size 1054135994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b868ab44d14f751a290f2fb9a43b0004c429bf63a62d6da5cdde1046626611
3
  size 1054135994
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86a4d2549b1634b5162bba6559970b4387f95d4c5153179e15ae2066cc09b884
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:001785b24c383f1dc5e05ca97682558022e868af635239d8c60b6646c2c21747
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d00fd7759971df004f86f26240bbcec291b25e581e60ea022fb56951da465e4f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0ba921a18d46c557f13f21f43adf78f0e35b0b4cbde5268f7f2125015b3077
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.7166430950164795,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
- "epoch": 65.21739130434783,
5
  "eval_steps": 150,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1457,6 +1457,151 @@
1457
  "EMA_steps_per_second": 24.774,
1458
  "epoch": 65.21739130434783,
1459
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1460
  }
1461
  ],
1462
  "logging_steps": 10,
@@ -1476,7 +1621,7 @@
1476
  "attributes": {}
1477
  }
1478
  },
1479
- "total_flos": 3.854430872108237e+16,
1480
  "train_batch_size": 4,
1481
  "trial_name": null,
1482
  "trial_params": null
 
1
  {
2
  "best_metric": 0.7166430950164795,
3
  "best_model_checkpoint": "./output/checkpoint-450",
4
+ "epoch": 71.73913043478261,
5
  "eval_steps": 150,
6
+ "global_step": 1650,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1457
  "EMA_steps_per_second": 24.774,
1458
  "epoch": 65.21739130434783,
1459
  "step": 1500
1460
+ },
1461
+ {
1462
+ "epoch": 65.65217391304348,
1463
+ "grad_norm": 2.2542121410369873,
1464
+ "learning_rate": 1.4981367287650419e-05,
1465
+ "loss": 0.3164,
1466
+ "step": 1510
1467
+ },
1468
+ {
1469
+ "epoch": 66.08695652173913,
1470
+ "grad_norm": 1.7643301486968994,
1471
+ "learning_rate": 1.4981303451854267e-05,
1472
+ "loss": 0.2947,
1473
+ "step": 1520
1474
+ },
1475
+ {
1476
+ "epoch": 66.52173913043478,
1477
+ "grad_norm": 1.7471901178359985,
1478
+ "learning_rate": 1.4981236647145501e-05,
1479
+ "loss": 0.3103,
1480
+ "step": 1530
1481
+ },
1482
+ {
1483
+ "epoch": 66.95652173913044,
1484
+ "grad_norm": 2.057833194732666,
1485
+ "learning_rate": 1.4981166873550601e-05,
1486
+ "loss": 0.3051,
1487
+ "step": 1540
1488
+ },
1489
+ {
1490
+ "epoch": 67.3913043478261,
1491
+ "grad_norm": 1.7425355911254883,
1492
+ "learning_rate": 1.4981094131097224e-05,
1493
+ "loss": 0.2713,
1494
+ "step": 1550
1495
+ },
1496
+ {
1497
+ "epoch": 67.82608695652173,
1498
+ "grad_norm": 2.050690174102783,
1499
+ "learning_rate": 1.49810184198142e-05,
1500
+ "loss": 0.3439,
1501
+ "step": 1560
1502
+ },
1503
+ {
1504
+ "epoch": 68.26086956521739,
1505
+ "grad_norm": 2.0778491497039795,
1506
+ "learning_rate": 1.498093973973154e-05,
1507
+ "loss": 0.2503,
1508
+ "step": 1570
1509
+ },
1510
+ {
1511
+ "epoch": 68.69565217391305,
1512
+ "grad_norm": 1.8078017234802246,
1513
+ "learning_rate": 1.4980858090880429e-05,
1514
+ "loss": 0.2862,
1515
+ "step": 1580
1516
+ },
1517
+ {
1518
+ "epoch": 69.1304347826087,
1519
+ "grad_norm": 1.9451018571853638,
1520
+ "learning_rate": 1.4980773473293232e-05,
1521
+ "loss": 0.368,
1522
+ "step": 1590
1523
+ },
1524
+ {
1525
+ "epoch": 69.56521739130434,
1526
+ "grad_norm": 1.9795953035354614,
1527
+ "learning_rate": 1.4980685887003486e-05,
1528
+ "loss": 0.3073,
1529
+ "step": 1600
1530
+ },
1531
+ {
1532
+ "epoch": 70.0,
1533
+ "grad_norm": 1.6645371913909912,
1534
+ "learning_rate": 1.498059533204591e-05,
1535
+ "loss": 0.2691,
1536
+ "step": 1610
1537
+ },
1538
+ {
1539
+ "epoch": 70.43478260869566,
1540
+ "grad_norm": 2.21379017829895,
1541
+ "learning_rate": 1.4980501808456398e-05,
1542
+ "loss": 0.3142,
1543
+ "step": 1620
1544
+ },
1545
+ {
1546
+ "epoch": 70.8695652173913,
1547
+ "grad_norm": 1.9500844478607178,
1548
+ "learning_rate": 1.4980405316272018e-05,
1549
+ "loss": 0.2996,
1550
+ "step": 1630
1551
+ },
1552
+ {
1553
+ "epoch": 71.30434782608695,
1554
+ "grad_norm": 2.359870195388794,
1555
+ "learning_rate": 1.4980305855531015e-05,
1556
+ "loss": 0.2888,
1557
+ "step": 1640
1558
+ },
1559
+ {
1560
+ "epoch": 71.73913043478261,
1561
+ "grad_norm": 1.8895881175994873,
1562
+ "learning_rate": 1.4980203426272815e-05,
1563
+ "loss": 0.2624,
1564
+ "step": 1650
1565
+ },
1566
+ {
1567
+ "epoch": 71.73913043478261,
1568
+ "eval_loss": 0.847686767578125,
1569
+ "eval_runtime": 0.5359,
1570
+ "eval_samples_per_second": 18.659,
1571
+ "eval_steps_per_second": 18.659,
1572
+ "step": 1650
1573
+ },
1574
+ {
1575
+ "Start_State_loss": 0.8601926565170288,
1576
+ "Start_State_runtime": 0.3989,
1577
+ "Start_State_samples_per_second": 25.067,
1578
+ "Start_State_steps_per_second": 25.067,
1579
+ "epoch": 71.73913043478261,
1580
+ "step": 1650
1581
+ },
1582
+ {
1583
+ "Raw_Model_loss": 0.847686767578125,
1584
+ "Raw_Model_runtime": 0.4133,
1585
+ "Raw_Model_samples_per_second": 24.198,
1586
+ "Raw_Model_steps_per_second": 24.198,
1587
+ "epoch": 71.73913043478261,
1588
+ "step": 1650
1589
+ },
1590
+ {
1591
+ "SWA_loss": 0.7314801216125488,
1592
+ "SWA_runtime": 0.3914,
1593
+ "SWA_samples_per_second": 25.548,
1594
+ "SWA_steps_per_second": 25.548,
1595
+ "epoch": 71.73913043478261,
1596
+ "step": 1650
1597
+ },
1598
+ {
1599
+ "EMA_loss": 0.8605908155441284,
1600
+ "EMA_runtime": 0.3897,
1601
+ "EMA_samples_per_second": 25.662,
1602
+ "EMA_steps_per_second": 25.662,
1603
+ "epoch": 71.73913043478261,
1604
+ "step": 1650
1605
  }
1606
  ],
1607
  "logging_steps": 10,
 
1621
  "attributes": {}
1622
  }
1623
  },
1624
+ "total_flos": 4.242701339976499e+16,
1625
  "train_batch_size": 4,
1626
  "trial_name": null,
1627
  "trial_params": null