alicegoesdown commited on
Commit
13009e1
·
verified ·
1 Parent(s): 6c71e76

Training in progress, step 2100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0c6ba386b6c9ac14076b04c23768d4a01b00f7ed5d433cf7482534886d5a076
3
  size 1130395064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6e8b8c1e9c0bce3f00131df71d272a082ce5dcf2d99759019d2b0659c78315e
3
  size 1130395064
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13c817bad7985c8c2675399e52d5193079cac5653e43895457ed1a8c377c6c4e
3
  size 2260919034
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6188e4d76c0b4e37ea00b0a3b23f5b7084262ecb5f16dd3e7bcbf521356872ef
3
  size 2260919034
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecd50ddad6b79517b1a64ad33a7b62461119e1f341649c526af1be9253918f09
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:347168a1ca5a3d3959109211d8471dcf62bcaa6f64d8a927b670d1eeeb82ee9c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:289c7e0141bb9f3ce06cf36b41817dc5543309cbcea37abb63a612bfb66d7221
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb8d370aad2d5fbcf199f24dc52e08b4648a77d17fa77f60c7ea0dbb61e6a008
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.7938833236694336,
3
  "best_model_checkpoint": "./output/checkpoint-750",
4
- "epoch": 2.4904214559386975,
5
  "eval_steps": 150,
6
- "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1476,6 +1476,119 @@
1476
  "eval_samples_per_second": 10.82,
1477
  "eval_steps_per_second": 10.82,
1478
  "step": 1950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1479
  }
1480
  ],
1481
  "logging_steps": 10,
@@ -1495,7 +1608,7 @@
1495
  "attributes": {}
1496
  }
1497
  },
1498
- "total_flos": 2.110663884391588e+17,
1499
  "train_batch_size": 16,
1500
  "trial_name": null,
1501
  "trial_params": null
 
1
  {
2
  "best_metric": 1.7938833236694336,
3
  "best_model_checkpoint": "./output/checkpoint-750",
4
+ "epoch": 2.681992337164751,
5
  "eval_steps": 150,
6
+ "global_step": 2100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1476
  "eval_samples_per_second": 10.82,
1477
  "eval_steps_per_second": 10.82,
1478
  "step": 1950
1479
+ },
1480
+ {
1481
+ "epoch": 2.503192848020434,
1482
+ "grad_norm": 4.016136646270752,
1483
+ "learning_rate": 7.553069430743865e-06,
1484
+ "loss": 1.4328,
1485
+ "step": 1960
1486
+ },
1487
+ {
1488
+ "epoch": 2.5159642401021713,
1489
+ "grad_norm": 3.8296611309051514,
1490
+ "learning_rate": 7.552060297199744e-06,
1491
+ "loss": 1.4755,
1492
+ "step": 1970
1493
+ },
1494
+ {
1495
+ "epoch": 2.528735632183908,
1496
+ "grad_norm": 4.180707931518555,
1497
+ "learning_rate": 7.5508960240322795e-06,
1498
+ "loss": 1.448,
1499
+ "step": 1980
1500
+ },
1501
+ {
1502
+ "epoch": 2.541507024265645,
1503
+ "grad_norm": 4.346092700958252,
1504
+ "learning_rate": 7.549576659100182e-06,
1505
+ "loss": 1.4674,
1506
+ "step": 1990
1507
+ },
1508
+ {
1509
+ "epoch": 2.554278416347382,
1510
+ "grad_norm": 4.5013298988342285,
1511
+ "learning_rate": 7.548102256637375e-06,
1512
+ "loss": 1.4986,
1513
+ "step": 2000
1514
+ },
1515
+ {
1516
+ "epoch": 2.5670498084291187,
1517
+ "grad_norm": 4.069826602935791,
1518
+ "learning_rate": 7.546472877250771e-06,
1519
+ "loss": 1.4708,
1520
+ "step": 2010
1521
+ },
1522
+ {
1523
+ "epoch": 2.579821200510856,
1524
+ "grad_norm": 4.403062343597412,
1525
+ "learning_rate": 7.544688587917772e-06,
1526
+ "loss": 1.4648,
1527
+ "step": 2020
1528
+ },
1529
+ {
1530
+ "epoch": 2.5925925925925926,
1531
+ "grad_norm": 4.398291110992432,
1532
+ "learning_rate": 7.542749461983527e-06,
1533
+ "loss": 1.4526,
1534
+ "step": 2030
1535
+ },
1536
+ {
1537
+ "epoch": 2.6053639846743293,
1538
+ "grad_norm": 4.199296951293945,
1539
+ "learning_rate": 7.540655579157906e-06,
1540
+ "loss": 1.5017,
1541
+ "step": 2040
1542
+ },
1543
+ {
1544
+ "epoch": 2.6181353767560664,
1545
+ "grad_norm": 4.818760871887207,
1546
+ "learning_rate": 7.53840702551223e-06,
1547
+ "loss": 1.4936,
1548
+ "step": 2050
1549
+ },
1550
+ {
1551
+ "epoch": 2.630906768837803,
1552
+ "grad_norm": 4.557876110076904,
1553
+ "learning_rate": 7.536003893475734e-06,
1554
+ "loss": 1.4164,
1555
+ "step": 2060
1556
+ },
1557
+ {
1558
+ "epoch": 2.6436781609195403,
1559
+ "grad_norm": 3.823756456375122,
1560
+ "learning_rate": 7.533446281831764e-06,
1561
+ "loss": 1.3971,
1562
+ "step": 2070
1563
+ },
1564
+ {
1565
+ "epoch": 2.656449553001277,
1566
+ "grad_norm": 4.311744213104248,
1567
+ "learning_rate": 7.530734295713717e-06,
1568
+ "loss": 1.4762,
1569
+ "step": 2080
1570
+ },
1571
+ {
1572
+ "epoch": 2.6692209450830138,
1573
+ "grad_norm": 4.65121603012085,
1574
+ "learning_rate": 7.527868046600719e-06,
1575
+ "loss": 1.5513,
1576
+ "step": 2090
1577
+ },
1578
+ {
1579
+ "epoch": 2.681992337164751,
1580
+ "grad_norm": 4.508437156677246,
1581
+ "learning_rate": 7.524847652313045e-06,
1582
+ "loss": 1.41,
1583
+ "step": 2100
1584
+ },
1585
+ {
1586
+ "epoch": 2.681992337164751,
1587
+ "eval_loss": 1.8178269863128662,
1588
+ "eval_runtime": 45.2353,
1589
+ "eval_samples_per_second": 11.053,
1590
+ "eval_steps_per_second": 11.053,
1591
+ "step": 2100
1592
  }
1593
  ],
1594
  "logging_steps": 10,
 
1608
  "attributes": {}
1609
  }
1610
  },
1611
+ "total_flos": 2.2711771292829696e+17,
1612
  "train_batch_size": 16,
1613
  "trial_name": null,
1614
  "trial_params": null