NairaRahim commited on
Commit
e67d014
·
verified ·
1 Parent(s): 4568f83

Training in progress, epoch 16, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811ccfaa8e3c84f8eb2939ce96631f3ec49f0f3099b1b0e60ca7c519a45c98e0
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e1ea03da8d9a978320d45f1bc6677407a85624af3d9baa7bae32e5c03676367
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:727366cbef486a57fc5b65e236bc10bb55ca8dff9e286c2204c706e7a3b6d855
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98a2483ec61025369cf6eb8fec5397cf636bfb0ffa7a3eedf987ef5b4b9d5c8
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6106dbeee05b0c02d6a33db47221a3364be09d497bc2812c14de2fc90515255
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffc97010f20f826b75fdc09ec365ad76a45dfcdc64194b72334d5902c2cf28eb
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04e18aa381d7c2401ed38f210463d64263e7586c0355129a12549e48c017c1be
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5384c34df266d1db083f57452aa67b48a3012f0aeee7f4ad7194984e89d75fe
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 34.700294494628906,
3
- "best_model_checkpoint": "/kaggle/working/output/checkpoint-13050",
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 19575,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1492,6 +1492,105 @@
1492
  "eval_samples_per_second": 26.507,
1493
  "eval_steps_per_second": 3.331,
1494
  "step": 19575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1495
  }
1496
  ],
1497
  "logging_steps": 100,
@@ -1506,7 +1605,7 @@
1506
  "early_stopping_threshold": 0.0
1507
  },
1508
  "attributes": {
1509
- "early_stopping_patience_counter": 5
1510
  }
1511
  },
1512
  "TrainerControl": {
@@ -1520,7 +1619,7 @@
1520
  "attributes": {}
1521
  }
1522
  },
1523
- "total_flos": 2.110966354570752e+16,
1524
  "train_batch_size": 8,
1525
  "trial_name": null,
1526
  "trial_params": null
 
1
  {
2
+ "best_metric": 34.54485321044922,
3
+ "best_model_checkpoint": "/kaggle/working/output/checkpoint-20880",
4
+ "epoch": 16.0,
5
  "eval_steps": 500,
6
+ "global_step": 20880,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1492
  "eval_samples_per_second": 26.507,
1493
  "eval_steps_per_second": 3.331,
1494
  "step": 19575
1495
+ },
1496
+ {
1497
+ "epoch": 15.019157088122606,
1498
+ "grad_norm": 3.291614294052124,
1499
+ "learning_rate": 4.061733716475096e-05,
1500
+ "loss": 32.9437,
1501
+ "step": 19600
1502
+ },
1503
+ {
1504
+ "epoch": 15.095785440613026,
1505
+ "grad_norm": 4.4670867919921875,
1506
+ "learning_rate": 4.056944444444445e-05,
1507
+ "loss": 33.6879,
1508
+ "step": 19700
1509
+ },
1510
+ {
1511
+ "epoch": 15.172413793103448,
1512
+ "grad_norm": 3.4122018814086914,
1513
+ "learning_rate": 4.0521551724137934e-05,
1514
+ "loss": 33.0167,
1515
+ "step": 19800
1516
+ },
1517
+ {
1518
+ "epoch": 15.24904214559387,
1519
+ "grad_norm": 3.854083299636841,
1520
+ "learning_rate": 4.047365900383142e-05,
1521
+ "loss": 33.8342,
1522
+ "step": 19900
1523
+ },
1524
+ {
1525
+ "epoch": 15.32567049808429,
1526
+ "grad_norm": 2.945396900177002,
1527
+ "learning_rate": 4.042576628352491e-05,
1528
+ "loss": 32.3812,
1529
+ "step": 20000
1530
+ },
1531
+ {
1532
+ "epoch": 15.402298850574713,
1533
+ "grad_norm": 2.5246341228485107,
1534
+ "learning_rate": 4.0377873563218395e-05,
1535
+ "loss": 33.3573,
1536
+ "step": 20100
1537
+ },
1538
+ {
1539
+ "epoch": 15.478927203065133,
1540
+ "grad_norm": 2.837134599685669,
1541
+ "learning_rate": 4.032998084291188e-05,
1542
+ "loss": 33.5981,
1543
+ "step": 20200
1544
+ },
1545
+ {
1546
+ "epoch": 15.555555555555555,
1547
+ "grad_norm": 4.350450038909912,
1548
+ "learning_rate": 4.028208812260537e-05,
1549
+ "loss": 34.0699,
1550
+ "step": 20300
1551
+ },
1552
+ {
1553
+ "epoch": 15.632183908045977,
1554
+ "grad_norm": 2.4908435344696045,
1555
+ "learning_rate": 4.0234195402298855e-05,
1556
+ "loss": 33.8105,
1557
+ "step": 20400
1558
+ },
1559
+ {
1560
+ "epoch": 15.708812260536398,
1561
+ "grad_norm": 2.9461615085601807,
1562
+ "learning_rate": 4.0186302681992336e-05,
1563
+ "loss": 33.3251,
1564
+ "step": 20500
1565
+ },
1566
+ {
1567
+ "epoch": 15.78544061302682,
1568
+ "grad_norm": 2.8716940879821777,
1569
+ "learning_rate": 4.013840996168582e-05,
1570
+ "loss": 33.7594,
1571
+ "step": 20600
1572
+ },
1573
+ {
1574
+ "epoch": 15.862068965517242,
1575
+ "grad_norm": 2.7166991233825684,
1576
+ "learning_rate": 4.009051724137931e-05,
1577
+ "loss": 33.58,
1578
+ "step": 20700
1579
+ },
1580
+ {
1581
+ "epoch": 15.938697318007662,
1582
+ "grad_norm": 2.2878618240356445,
1583
+ "learning_rate": 4.0042624521072796e-05,
1584
+ "loss": 33.4573,
1585
+ "step": 20800
1586
+ },
1587
+ {
1588
+ "epoch": 16.0,
1589
+ "eval_loss": 34.54485321044922,
1590
+ "eval_runtime": 49.3188,
1591
+ "eval_samples_per_second": 26.46,
1592
+ "eval_steps_per_second": 3.325,
1593
+ "step": 20880
1594
  }
1595
  ],
1596
  "logging_steps": 100,
 
1605
  "early_stopping_threshold": 0.0
1606
  },
1607
  "attributes": {
1608
+ "early_stopping_patience_counter": 0
1609
  }
1610
  },
1611
  "TrainerControl": {
 
1619
  "attributes": {}
1620
  }
1621
  },
1622
+ "total_flos": 2.251697444875469e+16,
1623
  "train_batch_size": 8,
1624
  "trial_name": null,
1625
  "trial_params": null