Upload folder using huggingface_hub
Browse files- adapter_model.safetensors +1 -1
- optimizer.pt +1 -1
- rng_state.pth +1 -1
- scheduler.pt +1 -1
- trainer_state.json +3 -2784
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 60189176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:08be5413fd76f7c5ddd4d3fb2f098ea528697a75d5c0f4d1fd8114d552ca9968
|
| 3 |
size 60189176
|
optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 120498699
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d3149c227ee5659bb5e22737e3468b04ad7b68ab2efb91807f392851f5aeb8a
|
| 3 |
size 120498699
|
rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a8e2011629d8bed3ef560fa11175cac55684c4e12a72634bb24abf767b6c7399
|
| 3 |
size 14645
|
scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d322dfc4f66861b0919370db5617fb4fea8855e34f6a67b848c7dcda9eaf750b
|
| 3 |
size 1465
|
trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -5570,2787 +5570,6 @@
|
|
| 5570 |
"eval_samples_per_second": 3.177,
|
| 5571 |
"eval_steps_per_second": 1.059,
|
| 5572 |
"step": 554
|
| 5573 |
-
},
|
| 5574 |
-
{
|
| 5575 |
-
"entropy": 2.0389976799488068,
|
| 5576 |
-
"epoch": 2.0036199095022624,
|
| 5577 |
-
"grad_norm": 0.8596204519271851,
|
| 5578 |
-
"learning_rate": 0.00040871020525162484,
|
| 5579 |
-
"loss": 0.1341,
|
| 5580 |
-
"mean_token_accuracy": 0.9626202881336212,
|
| 5581 |
-
"num_tokens": 4893236.0,
|
| 5582 |
-
"step": 555
|
| 5583 |
-
},
|
| 5584 |
-
{
|
| 5585 |
-
"entropy": 2.245832860469818,
|
| 5586 |
-
"epoch": 2.007239819004525,
|
| 5587 |
-
"grad_norm": 0.39707237482070923,
|
| 5588 |
-
"learning_rate": 0.00040833722310430114,
|
| 5589 |
-
"loss": 0.0564,
|
| 5590 |
-
"mean_token_accuracy": 0.9868980199098587,
|
| 5591 |
-
"num_tokens": 4901819.0,
|
| 5592 |
-
"step": 556
|
| 5593 |
-
},
|
| 5594 |
-
{
|
| 5595 |
-
"entropy": 2.169717162847519,
|
| 5596 |
-
"epoch": 2.0108597285067873,
|
| 5597 |
-
"grad_norm": 0.46584129333496094,
|
| 5598 |
-
"learning_rate": 0.0004079637575038822,
|
| 5599 |
-
"loss": 0.0792,
|
| 5600 |
-
"mean_token_accuracy": 0.9758767485618591,
|
| 5601 |
-
"num_tokens": 4910892.0,
|
| 5602 |
-
"step": 557
|
| 5603 |
-
},
|
| 5604 |
-
{
|
| 5605 |
-
"entropy": 2.27083820104599,
|
| 5606 |
-
"epoch": 2.0144796380090497,
|
| 5607 |
-
"grad_norm": 0.8394352197647095,
|
| 5608 |
-
"learning_rate": 0.0004075898098688381,
|
| 5609 |
-
"loss": 0.0962,
|
| 5610 |
-
"mean_token_accuracy": 0.9723308384418488,
|
| 5611 |
-
"num_tokens": 4919510.0,
|
| 5612 |
-
"step": 558
|
| 5613 |
-
},
|
| 5614 |
-
{
|
| 5615 |
-
"entropy": 2.1067663431167603,
|
| 5616 |
-
"epoch": 2.018099547511312,
|
| 5617 |
-
"grad_norm": 0.4951268434524536,
|
| 5618 |
-
"learning_rate": 0.0004072153816194696,
|
| 5619 |
-
"loss": 0.1195,
|
| 5620 |
-
"mean_token_accuracy": 0.9703402817249298,
|
| 5621 |
-
"num_tokens": 4928439.0,
|
| 5622 |
-
"step": 559
|
| 5623 |
-
},
|
| 5624 |
-
{
|
| 5625 |
-
"entropy": 2.016420066356659,
|
| 5626 |
-
"epoch": 2.0217194570135746,
|
| 5627 |
-
"grad_norm": 0.5574740171432495,
|
| 5628 |
-
"learning_rate": 0.00040684047417790273,
|
| 5629 |
-
"loss": 0.1037,
|
| 5630 |
-
"mean_token_accuracy": 0.9727325141429901,
|
| 5631 |
-
"num_tokens": 4938061.0,
|
| 5632 |
-
"step": 560
|
| 5633 |
-
},
|
| 5634 |
-
{
|
| 5635 |
-
"entropy": 2.1843727231025696,
|
| 5636 |
-
"epoch": 2.025339366515837,
|
| 5637 |
-
"grad_norm": 0.786014199256897,
|
| 5638 |
-
"learning_rate": 0.00040646508896808394,
|
| 5639 |
-
"loss": 0.155,
|
| 5640 |
-
"mean_token_accuracy": 0.9608975350856781,
|
| 5641 |
-
"num_tokens": 4946619.0,
|
| 5642 |
-
"step": 561
|
| 5643 |
-
},
|
| 5644 |
-
{
|
| 5645 |
-
"entropy": 2.160427451133728,
|
| 5646 |
-
"epoch": 2.0289592760180994,
|
| 5647 |
-
"grad_norm": 0.5267161130905151,
|
| 5648 |
-
"learning_rate": 0.000406089227415774,
|
| 5649 |
-
"loss": 0.0632,
|
| 5650 |
-
"mean_token_accuracy": 0.9791042655706406,
|
| 5651 |
-
"num_tokens": 4955324.0,
|
| 5652 |
-
"step": 562
|
| 5653 |
-
},
|
| 5654 |
-
{
|
| 5655 |
-
"entropy": 2.0923200249671936,
|
| 5656 |
-
"epoch": 2.032579185520362,
|
| 5657 |
-
"grad_norm": 0.8306187987327576,
|
| 5658 |
-
"learning_rate": 0.00040571289094854304,
|
| 5659 |
-
"loss": 0.1976,
|
| 5660 |
-
"mean_token_accuracy": 0.9538775235414505,
|
| 5661 |
-
"num_tokens": 4964321.0,
|
| 5662 |
-
"step": 563
|
| 5663 |
-
},
|
| 5664 |
-
{
|
| 5665 |
-
"entropy": 2.0181354880332947,
|
| 5666 |
-
"epoch": 2.0361990950226243,
|
| 5667 |
-
"grad_norm": 0.6798867583274841,
|
| 5668 |
-
"learning_rate": 0.0004053360809957649,
|
| 5669 |
-
"loss": 0.1797,
|
| 5670 |
-
"mean_token_accuracy": 0.9569422006607056,
|
| 5671 |
-
"num_tokens": 4973937.0,
|
| 5672 |
-
"step": 564
|
| 5673 |
-
},
|
| 5674 |
-
{
|
| 5675 |
-
"entropy": 2.123030036687851,
|
| 5676 |
-
"epoch": 2.0398190045248867,
|
| 5677 |
-
"grad_norm": 0.4481683671474457,
|
| 5678 |
-
"learning_rate": 0.00040495879898861173,
|
| 5679 |
-
"loss": 0.0639,
|
| 5680 |
-
"mean_token_accuracy": 0.9827965050935745,
|
| 5681 |
-
"num_tokens": 4982779.0,
|
| 5682 |
-
"step": 565
|
| 5683 |
-
},
|
| 5684 |
-
{
|
| 5685 |
-
"entropy": 2.0797010362148285,
|
| 5686 |
-
"epoch": 2.043438914027149,
|
| 5687 |
-
"grad_norm": 0.7745859622955322,
|
| 5688 |
-
"learning_rate": 0.00040458104636004877,
|
| 5689 |
-
"loss": 0.1602,
|
| 5690 |
-
"mean_token_accuracy": 0.9600242227315903,
|
| 5691 |
-
"num_tokens": 4991793.0,
|
| 5692 |
-
"step": 566
|
| 5693 |
-
},
|
| 5694 |
-
{
|
| 5695 |
-
"entropy": 2.0320390164852142,
|
| 5696 |
-
"epoch": 2.0470588235294116,
|
| 5697 |
-
"grad_norm": 0.5792120695114136,
|
| 5698 |
-
"learning_rate": 0.0004042028245448286,
|
| 5699 |
-
"loss": 0.0816,
|
| 5700 |
-
"mean_token_accuracy": 0.9757721722126007,
|
| 5701 |
-
"num_tokens": 5000834.0,
|
| 5702 |
-
"step": 567
|
| 5703 |
-
},
|
| 5704 |
-
{
|
| 5705 |
-
"entropy": 2.1047743558883667,
|
| 5706 |
-
"epoch": 2.050678733031674,
|
| 5707 |
-
"grad_norm": 0.5770072937011719,
|
| 5708 |
-
"learning_rate": 0.0004038241349794858,
|
| 5709 |
-
"loss": 0.1367,
|
| 5710 |
-
"mean_token_accuracy": 0.9598450362682343,
|
| 5711 |
-
"num_tokens": 5010155.0,
|
| 5712 |
-
"step": 568
|
| 5713 |
-
},
|
| 5714 |
-
{
|
| 5715 |
-
"entropy": 2.022550255060196,
|
| 5716 |
-
"epoch": 2.0542986425339365,
|
| 5717 |
-
"grad_norm": 0.47085902094841003,
|
| 5718 |
-
"learning_rate": 0.0004034449791023319,
|
| 5719 |
-
"loss": 0.1005,
|
| 5720 |
-
"mean_token_accuracy": 0.970214769244194,
|
| 5721 |
-
"num_tokens": 5020010.0,
|
| 5722 |
-
"step": 569
|
| 5723 |
-
},
|
| 5724 |
-
{
|
| 5725 |
-
"entropy": 2.034317582845688,
|
| 5726 |
-
"epoch": 2.057918552036199,
|
| 5727 |
-
"grad_norm": 0.4816018044948578,
|
| 5728 |
-
"learning_rate": 0.0004030653583534489,
|
| 5729 |
-
"loss": 0.118,
|
| 5730 |
-
"mean_token_accuracy": 0.9635649025440216,
|
| 5731 |
-
"num_tokens": 5029205.0,
|
| 5732 |
-
"step": 570
|
| 5733 |
-
},
|
| 5734 |
-
{
|
| 5735 |
-
"entropy": 2.1142700910568237,
|
| 5736 |
-
"epoch": 2.0615384615384613,
|
| 5737 |
-
"grad_norm": 0.561765730381012,
|
| 5738 |
-
"learning_rate": 0.0004026852741746849,
|
| 5739 |
-
"loss": 0.0628,
|
| 5740 |
-
"mean_token_accuracy": 0.9811093211174011,
|
| 5741 |
-
"num_tokens": 5037830.0,
|
| 5742 |
-
"step": 571
|
| 5743 |
-
},
|
| 5744 |
-
{
|
| 5745 |
-
"entropy": 2.1506906747817993,
|
| 5746 |
-
"epoch": 2.065158371040724,
|
| 5747 |
-
"grad_norm": 0.9037840366363525,
|
| 5748 |
-
"learning_rate": 0.0004023047280096482,
|
| 5749 |
-
"loss": 0.1395,
|
| 5750 |
-
"mean_token_accuracy": 0.9645196944475174,
|
| 5751 |
-
"num_tokens": 5046618.0,
|
| 5752 |
-
"step": 572
|
| 5753 |
-
},
|
| 5754 |
-
{
|
| 5755 |
-
"entropy": 2.1811060309410095,
|
| 5756 |
-
"epoch": 2.0687782805429866,
|
| 5757 |
-
"grad_norm": 0.6224188208580017,
|
| 5758 |
-
"learning_rate": 0.0004019237213037014,
|
| 5759 |
-
"loss": 0.0766,
|
| 5760 |
-
"mean_token_accuracy": 0.9752616137266159,
|
| 5761 |
-
"num_tokens": 5055467.0,
|
| 5762 |
-
"step": 573
|
| 5763 |
-
},
|
| 5764 |
-
{
|
| 5765 |
-
"entropy": 2.0479070246219635,
|
| 5766 |
-
"epoch": 2.072398190045249,
|
| 5767 |
-
"grad_norm": 0.5052458643913269,
|
| 5768 |
-
"learning_rate": 0.00040154225550395665,
|
| 5769 |
-
"loss": 0.091,
|
| 5770 |
-
"mean_token_accuracy": 0.9753529280424118,
|
| 5771 |
-
"num_tokens": 5064518.0,
|
| 5772 |
-
"step": 574
|
| 5773 |
-
},
|
| 5774 |
-
{
|
| 5775 |
-
"entropy": 2.18623149394989,
|
| 5776 |
-
"epoch": 2.0760180995475115,
|
| 5777 |
-
"grad_norm": 0.49587905406951904,
|
| 5778 |
-
"learning_rate": 0.00040116033205926964,
|
| 5779 |
-
"loss": 0.0703,
|
| 5780 |
-
"mean_token_accuracy": 0.979348823428154,
|
| 5781 |
-
"num_tokens": 5072713.0,
|
| 5782 |
-
"step": 575
|
| 5783 |
-
},
|
| 5784 |
-
{
|
| 5785 |
-
"entropy": 2.131018817424774,
|
| 5786 |
-
"epoch": 2.079638009049774,
|
| 5787 |
-
"grad_norm": 0.607468843460083,
|
| 5788 |
-
"learning_rate": 0.0004007779524202343,
|
| 5789 |
-
"loss": 0.0988,
|
| 5790 |
-
"mean_token_accuracy": 0.9756181836128235,
|
| 5791 |
-
"num_tokens": 5081046.0,
|
| 5792 |
-
"step": 576
|
| 5793 |
-
},
|
| 5794 |
-
{
|
| 5795 |
-
"entropy": 2.0251292288303375,
|
| 5796 |
-
"epoch": 2.0832579185520363,
|
| 5797 |
-
"grad_norm": 0.867511510848999,
|
| 5798 |
-
"learning_rate": 0.00040039511803917723,
|
| 5799 |
-
"loss": 0.1672,
|
| 5800 |
-
"mean_token_accuracy": 0.9638413190841675,
|
| 5801 |
-
"num_tokens": 5089859.0,
|
| 5802 |
-
"step": 577
|
| 5803 |
-
},
|
| 5804 |
-
{
|
| 5805 |
-
"entropy": 2.0818732380867004,
|
| 5806 |
-
"epoch": 2.086877828054299,
|
| 5807 |
-
"grad_norm": 0.5915331840515137,
|
| 5808 |
-
"learning_rate": 0.0004000118303701521,
|
| 5809 |
-
"loss": 0.1103,
|
| 5810 |
-
"mean_token_accuracy": 0.9715124219655991,
|
| 5811 |
-
"num_tokens": 5098331.0,
|
| 5812 |
-
"step": 578
|
| 5813 |
-
},
|
| 5814 |
-
{
|
| 5815 |
-
"entropy": 1.9556698501110077,
|
| 5816 |
-
"epoch": 2.090497737556561,
|
| 5817 |
-
"grad_norm": 0.5216535329818726,
|
| 5818 |
-
"learning_rate": 0.0003996280908689345,
|
| 5819 |
-
"loss": 0.1481,
|
| 5820 |
-
"mean_token_accuracy": 0.9601311087608337,
|
| 5821 |
-
"num_tokens": 5107557.0,
|
| 5822 |
-
"step": 579
|
| 5823 |
-
},
|
| 5824 |
-
{
|
| 5825 |
-
"entropy": 2.015773117542267,
|
| 5826 |
-
"epoch": 2.0941176470588236,
|
| 5827 |
-
"grad_norm": 0.7138916254043579,
|
| 5828 |
-
"learning_rate": 0.00039924390099301584,
|
| 5829 |
-
"loss": 0.1173,
|
| 5830 |
-
"mean_token_accuracy": 0.9670253992080688,
|
| 5831 |
-
"num_tokens": 5116677.0,
|
| 5832 |
-
"step": 580
|
| 5833 |
-
},
|
| 5834 |
-
{
|
| 5835 |
-
"entropy": 2.0676984786987305,
|
| 5836 |
-
"epoch": 2.097737556561086,
|
| 5837 |
-
"grad_norm": 0.7776201963424683,
|
| 5838 |
-
"learning_rate": 0.0003988592622015984,
|
| 5839 |
-
"loss": 0.0668,
|
| 5840 |
-
"mean_token_accuracy": 0.9766870141029358,
|
| 5841 |
-
"num_tokens": 5125262.0,
|
| 5842 |
-
"step": 581
|
| 5843 |
-
},
|
| 5844 |
-
{
|
| 5845 |
-
"entropy": 2.0256679952144623,
|
| 5846 |
-
"epoch": 2.1013574660633485,
|
| 5847 |
-
"grad_norm": 0.5481430888175964,
|
| 5848 |
-
"learning_rate": 0.00039847417595558903,
|
| 5849 |
-
"loss": 0.0898,
|
| 5850 |
-
"mean_token_accuracy": 0.9747780114412308,
|
| 5851 |
-
"num_tokens": 5133848.0,
|
| 5852 |
-
"step": 582
|
| 5853 |
-
},
|
| 5854 |
-
{
|
| 5855 |
-
"entropy": 2.049301326274872,
|
| 5856 |
-
"epoch": 2.104977375565611,
|
| 5857 |
-
"grad_norm": 0.6634963154792786,
|
| 5858 |
-
"learning_rate": 0.00039808864371759464,
|
| 5859 |
-
"loss": 0.1012,
|
| 5860 |
-
"mean_token_accuracy": 0.9695883542299271,
|
| 5861 |
-
"num_tokens": 5142266.0,
|
| 5862 |
-
"step": 583
|
| 5863 |
-
},
|
| 5864 |
-
{
|
| 5865 |
-
"entropy": 1.8873322904109955,
|
| 5866 |
-
"epoch": 2.1085972850678734,
|
| 5867 |
-
"grad_norm": 0.6262965798377991,
|
| 5868 |
-
"learning_rate": 0.0003977026669519156,
|
| 5869 |
-
"loss": 0.1064,
|
| 5870 |
-
"mean_token_accuracy": 0.9686857610940933,
|
| 5871 |
-
"num_tokens": 5151297.0,
|
| 5872 |
-
"step": 584
|
| 5873 |
-
},
|
| 5874 |
-
{
|
| 5875 |
-
"entropy": 2.0208800733089447,
|
| 5876 |
-
"epoch": 2.112217194570136,
|
| 5877 |
-
"grad_norm": 0.6475429534912109,
|
| 5878 |
-
"learning_rate": 0.0003973162471245411,
|
| 5879 |
-
"loss": 0.126,
|
| 5880 |
-
"mean_token_accuracy": 0.9671273976564407,
|
| 5881 |
-
"num_tokens": 5159913.0,
|
| 5882 |
-
"step": 585
|
| 5883 |
-
},
|
| 5884 |
-
{
|
| 5885 |
-
"entropy": 2.0354510843753815,
|
| 5886 |
-
"epoch": 2.1158371040723982,
|
| 5887 |
-
"grad_norm": 0.6373077034950256,
|
| 5888 |
-
"learning_rate": 0.0003969293857031426,
|
| 5889 |
-
"loss": 0.1403,
|
| 5890 |
-
"mean_token_accuracy": 0.9615094214677811,
|
| 5891 |
-
"num_tokens": 5168392.0,
|
| 5892 |
-
"step": 586
|
| 5893 |
-
},
|
| 5894 |
-
{
|
| 5895 |
-
"entropy": 2.0489701330661774,
|
| 5896 |
-
"epoch": 2.1194570135746607,
|
| 5897 |
-
"grad_norm": 0.7459731698036194,
|
| 5898 |
-
"learning_rate": 0.0003965420841570693,
|
| 5899 |
-
"loss": 0.0847,
|
| 5900 |
-
"mean_token_accuracy": 0.9742033332586288,
|
| 5901 |
-
"num_tokens": 5176858.0,
|
| 5902 |
-
"step": 587
|
| 5903 |
-
},
|
| 5904 |
-
{
|
| 5905 |
-
"entropy": 2.0531455874443054,
|
| 5906 |
-
"epoch": 2.123076923076923,
|
| 5907 |
-
"grad_norm": 0.8357418179512024,
|
| 5908 |
-
"learning_rate": 0.00039615434395734174,
|
| 5909 |
-
"loss": 0.2558,
|
| 5910 |
-
"mean_token_accuracy": 0.9348864704370499,
|
| 5911 |
-
"num_tokens": 5185101.0,
|
| 5912 |
-
"step": 588
|
| 5913 |
-
},
|
| 5914 |
-
{
|
| 5915 |
-
"entropy": 1.9761857986450195,
|
| 5916 |
-
"epoch": 2.1266968325791855,
|
| 5917 |
-
"grad_norm": 0.4816463887691498,
|
| 5918 |
-
"learning_rate": 0.00039576616657664666,
|
| 5919 |
-
"loss": 0.0934,
|
| 5920 |
-
"mean_token_accuracy": 0.9781179577112198,
|
| 5921 |
-
"num_tokens": 5193987.0,
|
| 5922 |
-
"step": 589
|
| 5923 |
-
},
|
| 5924 |
-
{
|
| 5925 |
-
"entropy": 2.0150316655635834,
|
| 5926 |
-
"epoch": 2.130316742081448,
|
| 5927 |
-
"grad_norm": 0.7039950489997864,
|
| 5928 |
-
"learning_rate": 0.0003953775534893311,
|
| 5929 |
-
"loss": 0.1558,
|
| 5930 |
-
"mean_token_accuracy": 0.9602096229791641,
|
| 5931 |
-
"num_tokens": 5202598.0,
|
| 5932 |
-
"step": 590
|
| 5933 |
-
},
|
| 5934 |
-
{
|
| 5935 |
-
"entropy": 2.0542426705360413,
|
| 5936 |
-
"epoch": 2.1339366515837104,
|
| 5937 |
-
"grad_norm": 0.6318346858024597,
|
| 5938 |
-
"learning_rate": 0.00039498850617139737,
|
| 5939 |
-
"loss": 0.1277,
|
| 5940 |
-
"mean_token_accuracy": 0.9658758789300919,
|
| 5941 |
-
"num_tokens": 5211157.0,
|
| 5942 |
-
"step": 591
|
| 5943 |
-
},
|
| 5944 |
-
{
|
| 5945 |
-
"entropy": 2.0793416798114777,
|
| 5946 |
-
"epoch": 2.137556561085973,
|
| 5947 |
-
"grad_norm": 0.6513328552246094,
|
| 5948 |
-
"learning_rate": 0.0003945990261004964,
|
| 5949 |
-
"loss": 0.3452,
|
| 5950 |
-
"mean_token_accuracy": 0.9376382231712341,
|
| 5951 |
-
"num_tokens": 5220057.0,
|
| 5952 |
-
"step": 592
|
| 5953 |
-
},
|
| 5954 |
-
{
|
| 5955 |
-
"entropy": 1.834738850593567,
|
| 5956 |
-
"epoch": 2.1411764705882352,
|
| 5957 |
-
"grad_norm": 0.709550678730011,
|
| 5958 |
-
"learning_rate": 0.0003942091147559234,
|
| 5959 |
-
"loss": 0.1632,
|
| 5960 |
-
"mean_token_accuracy": 0.9588025957345963,
|
| 5961 |
-
"num_tokens": 5229649.0,
|
| 5962 |
-
"step": 593
|
| 5963 |
-
},
|
| 5964 |
-
{
|
| 5965 |
-
"entropy": 2.115740954875946,
|
| 5966 |
-
"epoch": 2.1447963800904977,
|
| 5967 |
-
"grad_norm": 0.6495632529258728,
|
| 5968 |
-
"learning_rate": 0.00039381877361861127,
|
| 5969 |
-
"loss": 0.0799,
|
| 5970 |
-
"mean_token_accuracy": 0.9793208837509155,
|
| 5971 |
-
"num_tokens": 5238060.0,
|
| 5972 |
-
"step": 594
|
| 5973 |
-
},
|
| 5974 |
-
{
|
| 5975 |
-
"entropy": 1.9325994551181793,
|
| 5976 |
-
"epoch": 2.14841628959276,
|
| 5977 |
-
"grad_norm": 0.3864371180534363,
|
| 5978 |
-
"learning_rate": 0.0003934280041711253,
|
| 5979 |
-
"loss": 0.0392,
|
| 5980 |
-
"mean_token_accuracy": 0.9867032468318939,
|
| 5981 |
-
"num_tokens": 5246715.0,
|
| 5982 |
-
"step": 595
|
| 5983 |
-
},
|
| 5984 |
-
{
|
| 5985 |
-
"entropy": 1.9573578834533691,
|
| 5986 |
-
"epoch": 2.1520361990950225,
|
| 5987 |
-
"grad_norm": 0.8978553414344788,
|
| 5988 |
-
"learning_rate": 0.0003930368078976578,
|
| 5989 |
-
"loss": 0.1043,
|
| 5990 |
-
"mean_token_accuracy": 0.9700421690940857,
|
| 5991 |
-
"num_tokens": 5255677.0,
|
| 5992 |
-
"step": 596
|
| 5993 |
-
},
|
| 5994 |
-
{
|
| 5995 |
-
"entropy": 2.017194092273712,
|
| 5996 |
-
"epoch": 2.155656108597285,
|
| 5997 |
-
"grad_norm": 0.8082290887832642,
|
| 5998 |
-
"learning_rate": 0.0003926451862840221,
|
| 5999 |
-
"loss": 0.193,
|
| 6000 |
-
"mean_token_accuracy": 0.9494165182113647,
|
| 6001 |
-
"num_tokens": 5264229.0,
|
| 6002 |
-
"step": 597
|
| 6003 |
-
},
|
| 6004 |
-
{
|
| 6005 |
-
"entropy": 1.8982190787792206,
|
| 6006 |
-
"epoch": 2.1592760180995474,
|
| 6007 |
-
"grad_norm": 0.7600063681602478,
|
| 6008 |
-
"learning_rate": 0.00039225314081764673,
|
| 6009 |
-
"loss": 0.2152,
|
| 6010 |
-
"mean_token_accuracy": 0.9523166120052338,
|
| 6011 |
-
"num_tokens": 5273397.0,
|
| 6012 |
-
"step": 598
|
| 6013 |
-
},
|
| 6014 |
-
{
|
| 6015 |
-
"entropy": 1.9896901845932007,
|
| 6016 |
-
"epoch": 2.16289592760181,
|
| 6017 |
-
"grad_norm": 0.45877528190612793,
|
| 6018 |
-
"learning_rate": 0.0003918606729875706,
|
| 6019 |
-
"loss": 0.0892,
|
| 6020 |
-
"mean_token_accuracy": 0.9720247238874435,
|
| 6021 |
-
"num_tokens": 5282376.0,
|
| 6022 |
-
"step": 599
|
| 6023 |
-
},
|
| 6024 |
-
{
|
| 6025 |
-
"entropy": 1.8235589861869812,
|
| 6026 |
-
"epoch": 2.1665158371040723,
|
| 6027 |
-
"grad_norm": 0.49329352378845215,
|
| 6028 |
-
"learning_rate": 0.0003914677842844365,
|
| 6029 |
-
"loss": 0.0803,
|
| 6030 |
-
"mean_token_accuracy": 0.9721037000417709,
|
| 6031 |
-
"num_tokens": 5291815.0,
|
| 6032 |
-
"step": 600
|
| 6033 |
-
},
|
| 6034 |
-
{
|
| 6035 |
-
"entropy": 1.9400377571582794,
|
| 6036 |
-
"epoch": 2.1701357466063347,
|
| 6037 |
-
"grad_norm": 0.5306346416473389,
|
| 6038 |
-
"learning_rate": 0.0003910744762004857,
|
| 6039 |
-
"loss": 0.0602,
|
| 6040 |
-
"mean_token_accuracy": 0.9762802571058273,
|
| 6041 |
-
"num_tokens": 5300394.0,
|
| 6042 |
-
"step": 601
|
| 6043 |
-
},
|
| 6044 |
-
{
|
| 6045 |
-
"entropy": 1.7808023691177368,
|
| 6046 |
-
"epoch": 2.173755656108597,
|
| 6047 |
-
"grad_norm": 0.5050559043884277,
|
| 6048 |
-
"learning_rate": 0.00039068075022955255,
|
| 6049 |
-
"loss": 0.0862,
|
| 6050 |
-
"mean_token_accuracy": 0.9724314510822296,
|
| 6051 |
-
"num_tokens": 5309685.0,
|
| 6052 |
-
"step": 602
|
| 6053 |
-
},
|
| 6054 |
-
{
|
| 6055 |
-
"entropy": 1.9939678311347961,
|
| 6056 |
-
"epoch": 2.1773755656108595,
|
| 6057 |
-
"grad_norm": 0.6879346966743469,
|
| 6058 |
-
"learning_rate": 0.0003902866078670584,
|
| 6059 |
-
"loss": 0.0936,
|
| 6060 |
-
"mean_token_accuracy": 0.9765703976154327,
|
| 6061 |
-
"num_tokens": 5318020.0,
|
| 6062 |
-
"step": 603
|
| 6063 |
-
},
|
| 6064 |
-
{
|
| 6065 |
-
"entropy": 1.9384137690067291,
|
| 6066 |
-
"epoch": 2.180995475113122,
|
| 6067 |
-
"grad_norm": 0.6881359219551086,
|
| 6068 |
-
"learning_rate": 0.0003898920506100061,
|
| 6069 |
-
"loss": 0.1303,
|
| 6070 |
-
"mean_token_accuracy": 0.9615567773580551,
|
| 6071 |
-
"num_tokens": 5326895.0,
|
| 6072 |
-
"step": 604
|
| 6073 |
-
},
|
| 6074 |
-
{
|
| 6075 |
-
"entropy": 1.9919665455818176,
|
| 6076 |
-
"epoch": 2.184615384615385,
|
| 6077 |
-
"grad_norm": 0.6181508302688599,
|
| 6078 |
-
"learning_rate": 0.00038949707995697446,
|
| 6079 |
-
"loss": 0.0745,
|
| 6080 |
-
"mean_token_accuracy": 0.9808734804391861,
|
| 6081 |
-
"num_tokens": 5335355.0,
|
| 6082 |
-
"step": 605
|
| 6083 |
-
},
|
| 6084 |
-
{
|
| 6085 |
-
"entropy": 1.9376583397388458,
|
| 6086 |
-
"epoch": 2.1882352941176473,
|
| 6087 |
-
"grad_norm": 0.46525871753692627,
|
| 6088 |
-
"learning_rate": 0.0003891016974081125,
|
| 6089 |
-
"loss": 0.0826,
|
| 6090 |
-
"mean_token_accuracy": 0.9753947854042053,
|
| 6091 |
-
"num_tokens": 5343879.0,
|
| 6092 |
-
"step": 606
|
| 6093 |
-
},
|
| 6094 |
-
{
|
| 6095 |
-
"entropy": 1.8252979516983032,
|
| 6096 |
-
"epoch": 2.1918552036199097,
|
| 6097 |
-
"grad_norm": 0.5332593321800232,
|
| 6098 |
-
"learning_rate": 0.00038870590446513325,
|
| 6099 |
-
"loss": 0.1218,
|
| 6100 |
-
"mean_token_accuracy": 0.9644111543893814,
|
| 6101 |
-
"num_tokens": 5352980.0,
|
| 6102 |
-
"step": 607
|
| 6103 |
-
},
|
| 6104 |
-
{
|
| 6105 |
-
"entropy": 1.8981524407863617,
|
| 6106 |
-
"epoch": 2.195475113122172,
|
| 6107 |
-
"grad_norm": 0.5849556922912598,
|
| 6108 |
-
"learning_rate": 0.0003883097026313089,
|
| 6109 |
-
"loss": 0.0854,
|
| 6110 |
-
"mean_token_accuracy": 0.9766328930854797,
|
| 6111 |
-
"num_tokens": 5361576.0,
|
| 6112 |
-
"step": 608
|
| 6113 |
-
},
|
| 6114 |
-
{
|
| 6115 |
-
"entropy": 1.9466857016086578,
|
| 6116 |
-
"epoch": 2.1990950226244346,
|
| 6117 |
-
"grad_norm": 1.0213185548782349,
|
| 6118 |
-
"learning_rate": 0.00038791309341146453,
|
| 6119 |
-
"loss": 0.1282,
|
| 6120 |
-
"mean_token_accuracy": 0.975858062505722,
|
| 6121 |
-
"num_tokens": 5369947.0,
|
| 6122 |
-
"step": 609
|
| 6123 |
-
},
|
| 6124 |
-
{
|
| 6125 |
-
"entropy": 1.9219308197498322,
|
| 6126 |
-
"epoch": 2.202714932126697,
|
| 6127 |
-
"grad_norm": 0.7259594798088074,
|
| 6128 |
-
"learning_rate": 0.00038751607831197243,
|
| 6129 |
-
"loss": 0.0986,
|
| 6130 |
-
"mean_token_accuracy": 0.9709735363721848,
|
| 6131 |
-
"num_tokens": 5378429.0,
|
| 6132 |
-
"step": 610
|
| 6133 |
-
},
|
| 6134 |
-
{
|
| 6135 |
-
"entropy": 1.934881567955017,
|
| 6136 |
-
"epoch": 2.2063348416289594,
|
| 6137 |
-
"grad_norm": 0.6190217137336731,
|
| 6138 |
-
"learning_rate": 0.0003871186588407467,
|
| 6139 |
-
"loss": 0.1259,
|
| 6140 |
-
"mean_token_accuracy": 0.9606761038303375,
|
| 6141 |
-
"num_tokens": 5386986.0,
|
| 6142 |
-
"step": 611
|
| 6143 |
-
},
|
| 6144 |
-
{
|
| 6145 |
-
"entropy": 1.9234256446361542,
|
| 6146 |
-
"epoch": 2.209954751131222,
|
| 6147 |
-
"grad_norm": 1.1731759309768677,
|
| 6148 |
-
"learning_rate": 0.00038672083650723697,
|
| 6149 |
-
"loss": 0.3705,
|
| 6150 |
-
"mean_token_accuracy": 0.9448409974575043,
|
| 6151 |
-
"num_tokens": 5395623.0,
|
| 6152 |
-
"step": 612
|
| 6153 |
-
},
|
| 6154 |
-
{
|
| 6155 |
-
"entropy": 1.9198957085609436,
|
| 6156 |
-
"epoch": 2.2135746606334843,
|
| 6157 |
-
"grad_norm": 0.38831791281700134,
|
| 6158 |
-
"learning_rate": 0.00038632261282242316,
|
| 6159 |
-
"loss": 0.0405,
|
| 6160 |
-
"mean_token_accuracy": 0.9884084165096283,
|
| 6161 |
-
"num_tokens": 5403964.0,
|
| 6162 |
-
"step": 613
|
| 6163 |
-
},
|
| 6164 |
-
{
|
| 6165 |
-
"entropy": 1.9401849210262299,
|
| 6166 |
-
"epoch": 2.2171945701357467,
|
| 6167 |
-
"grad_norm": 0.6391944885253906,
|
| 6168 |
-
"learning_rate": 0.0003859239892988097,
|
| 6169 |
-
"loss": 0.0803,
|
| 6170 |
-
"mean_token_accuracy": 0.9763080179691315,
|
| 6171 |
-
"num_tokens": 5412601.0,
|
| 6172 |
-
"step": 614
|
| 6173 |
-
},
|
| 6174 |
-
{
|
| 6175 |
-
"entropy": 1.906328171491623,
|
| 6176 |
-
"epoch": 2.220814479638009,
|
| 6177 |
-
"grad_norm": 0.5495765805244446,
|
| 6178 |
-
"learning_rate": 0.00038552496745041935,
|
| 6179 |
-
"loss": 0.0919,
|
| 6180 |
-
"mean_token_accuracy": 0.9796502739191055,
|
| 6181 |
-
"num_tokens": 5421112.0,
|
| 6182 |
-
"step": 615
|
| 6183 |
-
},
|
| 6184 |
-
{
|
| 6185 |
-
"entropy": 1.9130763709545135,
|
| 6186 |
-
"epoch": 2.2244343891402716,
|
| 6187 |
-
"grad_norm": 0.8233397006988525,
|
| 6188 |
-
"learning_rate": 0.0003851255487927883,
|
| 6189 |
-
"loss": 0.1246,
|
| 6190 |
-
"mean_token_accuracy": 0.9621723592281342,
|
| 6191 |
-
"num_tokens": 5429851.0,
|
| 6192 |
-
"step": 616
|
| 6193 |
-
},
|
| 6194 |
-
{
|
| 6195 |
-
"entropy": 1.8408336341381073,
|
| 6196 |
-
"epoch": 2.228054298642534,
|
| 6197 |
-
"grad_norm": 0.8857082724571228,
|
| 6198 |
-
"learning_rate": 0.00038472573484295904,
|
| 6199 |
-
"loss": 0.1061,
|
| 6200 |
-
"mean_token_accuracy": 0.9664444029331207,
|
| 6201 |
-
"num_tokens": 5438983.0,
|
| 6202 |
-
"step": 617
|
| 6203 |
-
},
|
| 6204 |
-
{
|
| 6205 |
-
"entropy": 1.8644142150878906,
|
| 6206 |
-
"epoch": 2.2316742081447964,
|
| 6207 |
-
"grad_norm": 0.6762974262237549,
|
| 6208 |
-
"learning_rate": 0.0003843255271194762,
|
| 6209 |
-
"loss": 0.1532,
|
| 6210 |
-
"mean_token_accuracy": 0.952915757894516,
|
| 6211 |
-
"num_tokens": 5447922.0,
|
| 6212 |
-
"step": 618
|
| 6213 |
-
},
|
| 6214 |
-
{
|
| 6215 |
-
"entropy": 1.7125722169876099,
|
| 6216 |
-
"epoch": 2.235294117647059,
|
| 6217 |
-
"grad_norm": 0.44111478328704834,
|
| 6218 |
-
"learning_rate": 0.00038392492714237975,
|
| 6219 |
-
"loss": 0.0819,
|
| 6220 |
-
"mean_token_accuracy": 0.9738304615020752,
|
| 6221 |
-
"num_tokens": 5457128.0,
|
| 6222 |
-
"step": 619
|
| 6223 |
-
},
|
| 6224 |
-
{
|
| 6225 |
-
"entropy": 1.7900195717811584,
|
| 6226 |
-
"epoch": 2.2389140271493213,
|
| 6227 |
-
"grad_norm": 0.5224407911300659,
|
| 6228 |
-
"learning_rate": 0.0003835239364331993,
|
| 6229 |
-
"loss": 0.1023,
|
| 6230 |
-
"mean_token_accuracy": 0.975239485502243,
|
| 6231 |
-
"num_tokens": 5465760.0,
|
| 6232 |
-
"step": 620
|
| 6233 |
-
},
|
| 6234 |
-
{
|
| 6235 |
-
"entropy": 1.715638667345047,
|
| 6236 |
-
"epoch": 2.2425339366515837,
|
| 6237 |
-
"grad_norm": 0.6327251195907593,
|
| 6238 |
-
"learning_rate": 0.00038312255651494866,
|
| 6239 |
-
"loss": 0.154,
|
| 6240 |
-
"mean_token_accuracy": 0.9579339027404785,
|
| 6241 |
-
"num_tokens": 5475190.0,
|
| 6242 |
-
"step": 621
|
| 6243 |
-
},
|
| 6244 |
-
{
|
| 6245 |
-
"entropy": 1.8499042093753815,
|
| 6246 |
-
"epoch": 2.246153846153846,
|
| 6247 |
-
"grad_norm": 0.6490166187286377,
|
| 6248 |
-
"learning_rate": 0.00038272078891212017,
|
| 6249 |
-
"loss": 0.1248,
|
| 6250 |
-
"mean_token_accuracy": 0.9679877310991287,
|
| 6251 |
-
"num_tokens": 5484011.0,
|
| 6252 |
-
"step": 622
|
| 6253 |
-
},
|
| 6254 |
-
{
|
| 6255 |
-
"entropy": 1.7533331513404846,
|
| 6256 |
-
"epoch": 2.2497737556561086,
|
| 6257 |
-
"grad_norm": 0.6320033073425293,
|
| 6258 |
-
"learning_rate": 0.000382318635150678,
|
| 6259 |
-
"loss": 0.1588,
|
| 6260 |
-
"mean_token_accuracy": 0.9576389044523239,
|
| 6261 |
-
"num_tokens": 5493123.0,
|
| 6262 |
-
"step": 623
|
| 6263 |
-
},
|
| 6264 |
-
{
|
| 6265 |
-
"entropy": 1.8554400503635406,
|
| 6266 |
-
"epoch": 2.253393665158371,
|
| 6267 |
-
"grad_norm": 0.7169481515884399,
|
| 6268 |
-
"learning_rate": 0.0003819160967580536,
|
| 6269 |
-
"loss": 0.1316,
|
| 6270 |
-
"mean_token_accuracy": 0.966967299580574,
|
| 6271 |
-
"num_tokens": 5501923.0,
|
| 6272 |
-
"step": 624
|
| 6273 |
-
},
|
| 6274 |
-
{
|
| 6275 |
-
"entropy": 1.9283805191516876,
|
| 6276 |
-
"epoch": 2.2570135746606335,
|
| 6277 |
-
"grad_norm": 0.599856436252594,
|
| 6278 |
-
"learning_rate": 0.00038151317526313917,
|
| 6279 |
-
"loss": 0.1326,
|
| 6280 |
-
"mean_token_accuracy": 0.961080014705658,
|
| 6281 |
-
"num_tokens": 5510356.0,
|
| 6282 |
-
"step": 625
|
| 6283 |
-
},
|
| 6284 |
-
{
|
| 6285 |
-
"entropy": 1.7921342253684998,
|
| 6286 |
-
"epoch": 2.260633484162896,
|
| 6287 |
-
"grad_norm": 0.7019768357276917,
|
| 6288 |
-
"learning_rate": 0.0003811098721962818,
|
| 6289 |
-
"loss": 0.0976,
|
| 6290 |
-
"mean_token_accuracy": 0.970125287771225,
|
| 6291 |
-
"num_tokens": 5519016.0,
|
| 6292 |
-
"step": 626
|
| 6293 |
-
},
|
| 6294 |
-
{
|
| 6295 |
-
"entropy": 1.7646876573562622,
|
| 6296 |
-
"epoch": 2.2642533936651583,
|
| 6297 |
-
"grad_norm": 0.7311795949935913,
|
| 6298 |
-
"learning_rate": 0.00038070618908927784,
|
| 6299 |
-
"loss": 0.0908,
|
| 6300 |
-
"mean_token_accuracy": 0.9719386845827103,
|
| 6301 |
-
"num_tokens": 5528139.0,
|
| 6302 |
-
"step": 627
|
| 6303 |
-
},
|
| 6304 |
-
{
|
| 6305 |
-
"entropy": 1.8233769237995148,
|
| 6306 |
-
"epoch": 2.2678733031674208,
|
| 6307 |
-
"grad_norm": 0.6742154955863953,
|
| 6308 |
-
"learning_rate": 0.0003803021274753674,
|
| 6309 |
-
"loss": 0.1348,
|
| 6310 |
-
"mean_token_accuracy": 0.9619691967964172,
|
| 6311 |
-
"num_tokens": 5537036.0,
|
| 6312 |
-
"step": 628
|
| 6313 |
-
},
|
| 6314 |
-
{
|
| 6315 |
-
"entropy": 1.7711736857891083,
|
| 6316 |
-
"epoch": 2.271493212669683,
|
| 6317 |
-
"grad_norm": 0.6000869274139404,
|
| 6318 |
-
"learning_rate": 0.00037989768888922775,
|
| 6319 |
-
"loss": 0.1086,
|
| 6320 |
-
"mean_token_accuracy": 0.9672373533248901,
|
| 6321 |
-
"num_tokens": 5545932.0,
|
| 6322 |
-
"step": 629
|
| 6323 |
-
},
|
| 6324 |
-
{
|
| 6325 |
-
"entropy": 1.8396382629871368,
|
| 6326 |
-
"epoch": 2.2751131221719456,
|
| 6327 |
-
"grad_norm": 0.541504979133606,
|
| 6328 |
-
"learning_rate": 0.0003794928748669683,
|
| 6329 |
-
"loss": 0.0775,
|
| 6330 |
-
"mean_token_accuracy": 0.977355495095253,
|
| 6331 |
-
"num_tokens": 5554403.0,
|
| 6332 |
-
"step": 630
|
| 6333 |
-
},
|
| 6334 |
-
{
|
| 6335 |
-
"entropy": 1.890054315328598,
|
| 6336 |
-
"epoch": 2.278733031674208,
|
| 6337 |
-
"grad_norm": 0.5629594326019287,
|
| 6338 |
-
"learning_rate": 0.00037908768694612434,
|
| 6339 |
-
"loss": 0.0711,
|
| 6340 |
-
"mean_token_accuracy": 0.9779117107391357,
|
| 6341 |
-
"num_tokens": 5563156.0,
|
| 6342 |
-
"step": 631
|
| 6343 |
-
},
|
| 6344 |
-
{
|
| 6345 |
-
"entropy": 1.9505741894245148,
|
| 6346 |
-
"epoch": 2.2823529411764705,
|
| 6347 |
-
"grad_norm": 0.6717761754989624,
|
| 6348 |
-
"learning_rate": 0.0003786821266656512,
|
| 6349 |
-
"loss": 0.1077,
|
| 6350 |
-
"mean_token_accuracy": 0.9674138873815536,
|
| 6351 |
-
"num_tokens": 5571618.0,
|
| 6352 |
-
"step": 632
|
| 6353 |
-
},
|
| 6354 |
-
{
|
| 6355 |
-
"entropy": 1.8377742171287537,
|
| 6356 |
-
"epoch": 2.285972850678733,
|
| 6357 |
-
"grad_norm": 0.6176472902297974,
|
| 6358 |
-
"learning_rate": 0.0003782761955659185,
|
| 6359 |
-
"loss": 0.1106,
|
| 6360 |
-
"mean_token_accuracy": 0.9669957906007767,
|
| 6361 |
-
"num_tokens": 5580668.0,
|
| 6362 |
-
"step": 633
|
| 6363 |
-
},
|
| 6364 |
-
{
|
| 6365 |
-
"entropy": 1.8336479365825653,
|
| 6366 |
-
"epoch": 2.2895927601809953,
|
| 6367 |
-
"grad_norm": 0.5120813846588135,
|
| 6368 |
-
"learning_rate": 0.0003778698951887042,
|
| 6369 |
-
"loss": 0.0732,
|
| 6370 |
-
"mean_token_accuracy": 0.9774532318115234,
|
| 6371 |
-
"num_tokens": 5589491.0,
|
| 6372 |
-
"step": 634
|
| 6373 |
-
},
|
| 6374 |
-
{
|
| 6375 |
-
"entropy": 1.9576656222343445,
|
| 6376 |
-
"epoch": 2.2932126696832578,
|
| 6377 |
-
"grad_norm": 0.9347079396247864,
|
| 6378 |
-
"learning_rate": 0.00037746322707718895,
|
| 6379 |
-
"loss": 0.2275,
|
| 6380 |
-
"mean_token_accuracy": 0.9512088149785995,
|
| 6381 |
-
"num_tokens": 5598327.0,
|
| 6382 |
-
"step": 635
|
| 6383 |
-
},
|
| 6384 |
-
{
|
| 6385 |
-
"entropy": 1.9309991896152496,
|
| 6386 |
-
"epoch": 2.29683257918552,
|
| 6387 |
-
"grad_norm": 0.506108283996582,
|
| 6388 |
-
"learning_rate": 0.0003770561927759502,
|
| 6389 |
-
"loss": 0.1046,
|
| 6390 |
-
"mean_token_accuracy": 0.9633967131376266,
|
| 6391 |
-
"num_tokens": 5606948.0,
|
| 6392 |
-
"step": 636
|
| 6393 |
-
},
|
| 6394 |
-
{
|
| 6395 |
-
"entropy": 1.963425725698471,
|
| 6396 |
-
"epoch": 2.3004524886877826,
|
| 6397 |
-
"grad_norm": 0.5499919056892395,
|
| 6398 |
-
"learning_rate": 0.0003766487938309561,
|
| 6399 |
-
"loss": 0.0804,
|
| 6400 |
-
"mean_token_accuracy": 0.9783825874328613,
|
| 6401 |
-
"num_tokens": 5615342.0,
|
| 6402 |
-
"step": 637
|
| 6403 |
-
},
|
| 6404 |
-
{
|
| 6405 |
-
"entropy": 1.8853708505630493,
|
| 6406 |
-
"epoch": 2.304072398190045,
|
| 6407 |
-
"grad_norm": 0.5846657156944275,
|
| 6408 |
-
"learning_rate": 0.00037624103178955946,
|
| 6409 |
-
"loss": 0.0904,
|
| 6410 |
-
"mean_token_accuracy": 0.9774703830480576,
|
| 6411 |
-
"num_tokens": 5624449.0,
|
| 6412 |
-
"step": 638
|
| 6413 |
-
},
|
| 6414 |
-
{
|
| 6415 |
-
"entropy": 1.928403079509735,
|
| 6416 |
-
"epoch": 2.3076923076923075,
|
| 6417 |
-
"grad_norm": 0.5203971266746521,
|
| 6418 |
-
"learning_rate": 0.0003758329082004928,
|
| 6419 |
-
"loss": 0.0917,
|
| 6420 |
-
"mean_token_accuracy": 0.9723261743783951,
|
| 6421 |
-
"num_tokens": 5633273.0,
|
| 6422 |
-
"step": 639
|
| 6423 |
-
},
|
| 6424 |
-
{
|
| 6425 |
-
"entropy": 1.8914157152175903,
|
| 6426 |
-
"epoch": 2.31131221719457,
|
| 6427 |
-
"grad_norm": 0.5215239524841309,
|
| 6428 |
-
"learning_rate": 0.00037542442461386145,
|
| 6429 |
-
"loss": 0.1072,
|
| 6430 |
-
"mean_token_accuracy": 0.9704900681972504,
|
| 6431 |
-
"num_tokens": 5642357.0,
|
| 6432 |
-
"step": 640
|
| 6433 |
-
},
|
| 6434 |
-
{
|
| 6435 |
-
"entropy": 1.9754666090011597,
|
| 6436 |
-
"epoch": 2.3149321266968323,
|
| 6437 |
-
"grad_norm": 0.6710624694824219,
|
| 6438 |
-
"learning_rate": 0.0003750155825811379,
|
| 6439 |
-
"loss": 0.1344,
|
| 6440 |
-
"mean_token_accuracy": 0.9615458548069,
|
| 6441 |
-
"num_tokens": 5651409.0,
|
| 6442 |
-
"step": 641
|
| 6443 |
-
},
|
| 6444 |
-
{
|
| 6445 |
-
"entropy": 1.97001314163208,
|
| 6446 |
-
"epoch": 2.318552036199095,
|
| 6447 |
-
"grad_norm": 0.6511638164520264,
|
| 6448 |
-
"learning_rate": 0.00037460638365515673,
|
| 6449 |
-
"loss": 0.0502,
|
| 6450 |
-
"mean_token_accuracy": 0.9829420000314713,
|
| 6451 |
-
"num_tokens": 5660362.0,
|
| 6452 |
-
"step": 642
|
| 6453 |
-
},
|
| 6454 |
-
{
|
| 6455 |
-
"entropy": 1.9473612904548645,
|
| 6456 |
-
"epoch": 2.3221719457013577,
|
| 6457 |
-
"grad_norm": 0.5315663814544678,
|
| 6458 |
-
"learning_rate": 0.00037419682939010725,
|
| 6459 |
-
"loss": 0.1004,
|
| 6460 |
-
"mean_token_accuracy": 0.9741797298192978,
|
| 6461 |
-
"num_tokens": 5669386.0,
|
| 6462 |
-
"step": 643
|
| 6463 |
-
},
|
| 6464 |
-
{
|
| 6465 |
-
"entropy": 1.9136508405208588,
|
| 6466 |
-
"epoch": 2.32579185520362,
|
| 6467 |
-
"grad_norm": 0.6636398434638977,
|
| 6468 |
-
"learning_rate": 0.00037378692134152887,
|
| 6469 |
-
"loss": 0.0928,
|
| 6470 |
-
"mean_token_accuracy": 0.9753085225820541,
|
| 6471 |
-
"num_tokens": 5678226.0,
|
| 6472 |
-
"step": 644
|
| 6473 |
-
},
|
| 6474 |
-
{
|
| 6475 |
-
"entropy": 2.0870893597602844,
|
| 6476 |
-
"epoch": 2.3294117647058825,
|
| 6477 |
-
"grad_norm": 0.45003074407577515,
|
| 6478 |
-
"learning_rate": 0.00037337666106630464,
|
| 6479 |
-
"loss": 0.0937,
|
| 6480 |
-
"mean_token_accuracy": 0.9742898046970367,
|
| 6481 |
-
"num_tokens": 5687017.0,
|
| 6482 |
-
"step": 645
|
| 6483 |
-
},
|
| 6484 |
-
{
|
| 6485 |
-
"entropy": 2.084017276763916,
|
| 6486 |
-
"epoch": 2.333031674208145,
|
| 6487 |
-
"grad_norm": 0.6305840611457825,
|
| 6488 |
-
"learning_rate": 0.0003729660501226553,
|
| 6489 |
-
"loss": 0.1085,
|
| 6490 |
-
"mean_token_accuracy": 0.9696957617998123,
|
| 6491 |
-
"num_tokens": 5695585.0,
|
| 6492 |
-
"step": 646
|
| 6493 |
-
},
|
| 6494 |
-
{
|
| 6495 |
-
"entropy": 2.0916273295879364,
|
| 6496 |
-
"epoch": 2.3366515837104074,
|
| 6497 |
-
"grad_norm": 0.6674802303314209,
|
| 6498 |
-
"learning_rate": 0.00037255509007013353,
|
| 6499 |
-
"loss": 0.1214,
|
| 6500 |
-
"mean_token_accuracy": 0.9657080322504044,
|
| 6501 |
-
"num_tokens": 5704167.0,
|
| 6502 |
-
"step": 647
|
| 6503 |
-
},
|
| 6504 |
-
{
|
| 6505 |
-
"entropy": 2.0445155799388885,
|
| 6506 |
-
"epoch": 2.34027149321267,
|
| 6507 |
-
"grad_norm": 0.9245135188102722,
|
| 6508 |
-
"learning_rate": 0.0003721437824696181,
|
| 6509 |
-
"loss": 0.124,
|
| 6510 |
-
"mean_token_accuracy": 0.9668982475996017,
|
| 6511 |
-
"num_tokens": 5712896.0,
|
| 6512 |
-
"step": 648
|
| 6513 |
-
},
|
| 6514 |
-
{
|
| 6515 |
-
"entropy": 2.040050685405731,
|
| 6516 |
-
"epoch": 2.3438914027149322,
|
| 6517 |
-
"grad_norm": 0.558266818523407,
|
| 6518 |
-
"learning_rate": 0.00037173212888330756,
|
| 6519 |
-
"loss": 0.103,
|
| 6520 |
-
"mean_token_accuracy": 0.9663692861795425,
|
| 6521 |
-
"num_tokens": 5721568.0,
|
| 6522 |
-
"step": 649
|
| 6523 |
-
},
|
| 6524 |
-
{
|
| 6525 |
-
"entropy": 2.078313887119293,
|
| 6526 |
-
"epoch": 2.3475113122171947,
|
| 6527 |
-
"grad_norm": 0.6157237887382507,
|
| 6528 |
-
"learning_rate": 0.0003713201308747148,
|
| 6529 |
-
"loss": 0.1247,
|
| 6530 |
-
"mean_token_accuracy": 0.9645204842090607,
|
| 6531 |
-
"num_tokens": 5730097.0,
|
| 6532 |
-
"step": 650
|
| 6533 |
-
},
|
| 6534 |
-
{
|
| 6535 |
-
"entropy": 1.9473297894001007,
|
| 6536 |
-
"epoch": 2.351131221719457,
|
| 6537 |
-
"grad_norm": 0.6460309028625488,
|
| 6538 |
-
"learning_rate": 0.0003709077900086607,
|
| 6539 |
-
"loss": 0.193,
|
| 6540 |
-
"mean_token_accuracy": 0.9537071883678436,
|
| 6541 |
-
"num_tokens": 5738953.0,
|
| 6542 |
-
"step": 651
|
| 6543 |
-
},
|
| 6544 |
-
{
|
| 6545 |
-
"entropy": 1.9319245219230652,
|
| 6546 |
-
"epoch": 2.3547511312217195,
|
| 6547 |
-
"grad_norm": 0.826302170753479,
|
| 6548 |
-
"learning_rate": 0.0003704951078512684,
|
| 6549 |
-
"loss": 0.2072,
|
| 6550 |
-
"mean_token_accuracy": 0.9553762674331665,
|
| 6551 |
-
"num_tokens": 5748421.0,
|
| 6552 |
-
"step": 652
|
| 6553 |
-
},
|
| 6554 |
-
{
|
| 6555 |
-
"entropy": 2.000667005777359,
|
| 6556 |
-
"epoch": 2.358371040723982,
|
| 6557 |
-
"grad_norm": 0.508975625038147,
|
| 6558 |
-
"learning_rate": 0.00037008208596995743,
|
| 6559 |
-
"loss": 0.1124,
|
| 6560 |
-
"mean_token_accuracy": 0.9674097448587418,
|
| 6561 |
-
"num_tokens": 5757333.0,
|
| 6562 |
-
"step": 653
|
| 6563 |
-
},
|
| 6564 |
-
{
|
| 6565 |
-
"entropy": 1.9692010879516602,
|
| 6566 |
-
"epoch": 2.3619909502262444,
|
| 6567 |
-
"grad_norm": 0.597391664981842,
|
| 6568 |
-
"learning_rate": 0.00036966872593343747,
|
| 6569 |
-
"loss": 0.0958,
|
| 6570 |
-
"mean_token_accuracy": 0.9727880656719208,
|
| 6571 |
-
"num_tokens": 5766427.0,
|
| 6572 |
-
"step": 654
|
| 6573 |
-
},
|
| 6574 |
-
{
|
| 6575 |
-
"entropy": 1.9356706142425537,
|
| 6576 |
-
"epoch": 2.365610859728507,
|
| 6577 |
-
"grad_norm": 0.6264978051185608,
|
| 6578 |
-
"learning_rate": 0.0003692550293117025,
|
| 6579 |
-
"loss": 0.0925,
|
| 6580 |
-
"mean_token_accuracy": 0.9736592024564743,
|
| 6581 |
-
"num_tokens": 5775578.0,
|
| 6582 |
-
"step": 655
|
| 6583 |
-
},
|
| 6584 |
-
{
|
| 6585 |
-
"entropy": 2.086688846349716,
|
| 6586 |
-
"epoch": 2.3692307692307693,
|
| 6587 |
-
"grad_norm": 0.926537811756134,
|
| 6588 |
-
"learning_rate": 0.00036884099767602523,
|
| 6589 |
-
"loss": 0.1772,
|
| 6590 |
-
"mean_token_accuracy": 0.9588586837053299,
|
| 6591 |
-
"num_tokens": 5783754.0,
|
| 6592 |
-
"step": 656
|
| 6593 |
-
},
|
| 6594 |
-
{
|
| 6595 |
-
"entropy": 1.8272685706615448,
|
| 6596 |
-
"epoch": 2.3728506787330317,
|
| 6597 |
-
"grad_norm": 0.5276276469230652,
|
| 6598 |
-
"learning_rate": 0.0003684266325989504,
|
| 6599 |
-
"loss": 0.106,
|
| 6600 |
-
"mean_token_accuracy": 0.9692760407924652,
|
| 6601 |
-
"num_tokens": 5793159.0,
|
| 6602 |
-
"step": 657
|
| 6603 |
-
},
|
| 6604 |
-
{
|
| 6605 |
-
"entropy": 1.8490014672279358,
|
| 6606 |
-
"epoch": 2.376470588235294,
|
| 6607 |
-
"grad_norm": 0.6970511078834534,
|
| 6608 |
-
"learning_rate": 0.0003680119356542895,
|
| 6609 |
-
"loss": 0.0849,
|
| 6610 |
-
"mean_token_accuracy": 0.9812656790018082,
|
| 6611 |
-
"num_tokens": 5802503.0,
|
| 6612 |
-
"step": 658
|
| 6613 |
-
},
|
| 6614 |
-
{
|
| 6615 |
-
"entropy": 1.8577990531921387,
|
| 6616 |
-
"epoch": 2.3800904977375565,
|
| 6617 |
-
"grad_norm": 0.49535682797431946,
|
| 6618 |
-
"learning_rate": 0.00036759690841711435,
|
| 6619 |
-
"loss": 0.0965,
|
| 6620 |
-
"mean_token_accuracy": 0.9723764955997467,
|
| 6621 |
-
"num_tokens": 5811839.0,
|
| 6622 |
-
"step": 659
|
| 6623 |
-
},
|
| 6624 |
-
{
|
| 6625 |
-
"entropy": 1.785957396030426,
|
| 6626 |
-
"epoch": 2.383710407239819,
|
| 6627 |
-
"grad_norm": 0.7373266220092773,
|
| 6628 |
-
"learning_rate": 0.00036718155246375124,
|
| 6629 |
-
"loss": 0.103,
|
| 6630 |
-
"mean_token_accuracy": 0.9659082442522049,
|
| 6631 |
-
"num_tokens": 5821076.0,
|
| 6632 |
-
"step": 660
|
| 6633 |
-
},
|
| 6634 |
-
{
|
| 6635 |
-
"entropy": 1.8944315016269684,
|
| 6636 |
-
"epoch": 2.3873303167420814,
|
| 6637 |
-
"grad_norm": 0.4784161448478699,
|
| 6638 |
-
"learning_rate": 0.000366765869371775,
|
| 6639 |
-
"loss": 0.0899,
|
| 6640 |
-
"mean_token_accuracy": 0.9731316566467285,
|
| 6641 |
-
"num_tokens": 5830098.0,
|
| 6642 |
-
"step": 661
|
| 6643 |
-
},
|
| 6644 |
-
{
|
| 6645 |
-
"entropy": 1.8901143372058868,
|
| 6646 |
-
"epoch": 2.390950226244344,
|
| 6647 |
-
"grad_norm": 0.5539003610610962,
|
| 6648 |
-
"learning_rate": 0.00036634986072000305,
|
| 6649 |
-
"loss": 0.078,
|
| 6650 |
-
"mean_token_accuracy": 0.9769923985004425,
|
| 6651 |
-
"num_tokens": 5839149.0,
|
| 6652 |
-
"step": 662
|
| 6653 |
-
},
|
| 6654 |
-
{
|
| 6655 |
-
"entropy": 1.8183043003082275,
|
| 6656 |
-
"epoch": 2.3945701357466063,
|
| 6657 |
-
"grad_norm": 0.48431649804115295,
|
| 6658 |
-
"learning_rate": 0.0003659335280884893,
|
| 6659 |
-
"loss": 0.0669,
|
| 6660 |
-
"mean_token_accuracy": 0.978607714176178,
|
| 6661 |
-
"num_tokens": 5848064.0,
|
| 6662 |
-
"step": 663
|
| 6663 |
-
},
|
| 6664 |
-
{
|
| 6665 |
-
"entropy": 1.7216700911521912,
|
| 6666 |
-
"epoch": 2.3981900452488687,
|
| 6667 |
-
"grad_norm": 0.5597919821739197,
|
| 6668 |
-
"learning_rate": 0.00036551687305851803,
|
| 6669 |
-
"loss": 0.1026,
|
| 6670 |
-
"mean_token_accuracy": 0.9733614027500153,
|
| 6671 |
-
"num_tokens": 5857075.0,
|
| 6672 |
-
"step": 664
|
| 6673 |
-
},
|
| 6674 |
-
{
|
| 6675 |
-
"entropy": 1.7788107991218567,
|
| 6676 |
-
"epoch": 2.401809954751131,
|
| 6677 |
-
"grad_norm": 0.6780642867088318,
|
| 6678 |
-
"learning_rate": 0.00036509989721259824,
|
| 6679 |
-
"loss": 0.0895,
|
| 6680 |
-
"mean_token_accuracy": 0.9711848199367523,
|
| 6681 |
-
"num_tokens": 5866029.0,
|
| 6682 |
-
"step": 665
|
| 6683 |
-
},
|
| 6684 |
-
{
|
| 6685 |
-
"entropy": 1.8354471325874329,
|
| 6686 |
-
"epoch": 2.4054298642533936,
|
| 6687 |
-
"grad_norm": 0.6284046769142151,
|
| 6688 |
-
"learning_rate": 0.0003646826021344573,
|
| 6689 |
-
"loss": 0.1153,
|
| 6690 |
-
"mean_token_accuracy": 0.9645407199859619,
|
| 6691 |
-
"num_tokens": 5874523.0,
|
| 6692 |
-
"step": 666
|
| 6693 |
-
},
|
| 6694 |
-
{
|
| 6695 |
-
"entropy": 1.829980492591858,
|
| 6696 |
-
"epoch": 2.409049773755656,
|
| 6697 |
-
"grad_norm": 0.6398605704307556,
|
| 6698 |
-
"learning_rate": 0.00036426498940903506,
|
| 6699 |
-
"loss": 0.0605,
|
| 6700 |
-
"mean_token_accuracy": 0.9823256582021713,
|
| 6701 |
-
"num_tokens": 5883067.0,
|
| 6702 |
-
"step": 667
|
| 6703 |
-
},
|
| 6704 |
-
{
|
| 6705 |
-
"entropy": 1.839373379945755,
|
| 6706 |
-
"epoch": 2.4126696832579184,
|
| 6707 |
-
"grad_norm": 0.6254173517227173,
|
| 6708 |
-
"learning_rate": 0.000363847060622478,
|
| 6709 |
-
"loss": 0.0708,
|
| 6710 |
-
"mean_token_accuracy": 0.978134423494339,
|
| 6711 |
-
"num_tokens": 5891921.0,
|
| 6712 |
-
"step": 668
|
| 6713 |
-
},
|
| 6714 |
-
{
|
| 6715 |
-
"entropy": 1.7790280282497406,
|
| 6716 |
-
"epoch": 2.416289592760181,
|
| 6717 |
-
"grad_norm": 0.5987306833267212,
|
| 6718 |
-
"learning_rate": 0.0003634288173621326,
|
| 6719 |
-
"loss": 0.0888,
|
| 6720 |
-
"mean_token_accuracy": 0.9814571887254715,
|
| 6721 |
-
"num_tokens": 5900603.0,
|
| 6722 |
-
"step": 669
|
| 6723 |
-
},
|
| 6724 |
-
{
|
| 6725 |
-
"entropy": 1.6918425559997559,
|
| 6726 |
-
"epoch": 2.4199095022624433,
|
| 6727 |
-
"grad_norm": 0.784694492816925,
|
| 6728 |
-
"learning_rate": 0.00036301026121654057,
|
| 6729 |
-
"loss": 0.1353,
|
| 6730 |
-
"mean_token_accuracy": 0.9646909832954407,
|
| 6731 |
-
"num_tokens": 5910028.0,
|
| 6732 |
-
"step": 670
|
| 6733 |
-
},
|
| 6734 |
-
{
|
| 6735 |
-
"entropy": 1.726965218782425,
|
| 6736 |
-
"epoch": 2.4235294117647057,
|
| 6737 |
-
"grad_norm": 0.7017857432365417,
|
| 6738 |
-
"learning_rate": 0.00036259139377543104,
|
| 6739 |
-
"loss": 0.1531,
|
| 6740 |
-
"mean_token_accuracy": 0.9617924690246582,
|
| 6741 |
-
"num_tokens": 5919145.0,
|
| 6742 |
-
"step": 671
|
| 6743 |
-
},
|
| 6744 |
-
{
|
| 6745 |
-
"entropy": 1.7354467511177063,
|
| 6746 |
-
"epoch": 2.427149321266968,
|
| 6747 |
-
"grad_norm": 0.49217918515205383,
|
| 6748 |
-
"learning_rate": 0.00036217221662971613,
|
| 6749 |
-
"loss": 0.1217,
|
| 6750 |
-
"mean_token_accuracy": 0.96451136469841,
|
| 6751 |
-
"num_tokens": 5928203.0,
|
| 6752 |
-
"step": 672
|
| 6753 |
-
},
|
| 6754 |
-
{
|
| 6755 |
-
"entropy": 1.827672392129898,
|
| 6756 |
-
"epoch": 2.430769230769231,
|
| 6757 |
-
"grad_norm": 0.5875037312507629,
|
| 6758 |
-
"learning_rate": 0.0003617527313714841,
|
| 6759 |
-
"loss": 0.1151,
|
| 6760 |
-
"mean_token_accuracy": 0.9714375436306,
|
| 6761 |
-
"num_tokens": 5936876.0,
|
| 6762 |
-
"step": 673
|
| 6763 |
-
},
|
| 6764 |
-
{
|
| 6765 |
-
"entropy": 1.787518948316574,
|
| 6766 |
-
"epoch": 2.4343891402714934,
|
| 6767 |
-
"grad_norm": 0.5444310307502747,
|
| 6768 |
-
"learning_rate": 0.0003613329395939933,
|
| 6769 |
-
"loss": 0.1096,
|
| 6770 |
-
"mean_token_accuracy": 0.9701481461524963,
|
| 6771 |
-
"num_tokens": 5946025.0,
|
| 6772 |
-
"step": 674
|
| 6773 |
-
},
|
| 6774 |
-
{
|
| 6775 |
-
"entropy": 1.832441657781601,
|
| 6776 |
-
"epoch": 2.438009049773756,
|
| 6777 |
-
"grad_norm": 0.6885861754417419,
|
| 6778 |
-
"learning_rate": 0.00036091284289166637,
|
| 6779 |
-
"loss": 0.1409,
|
| 6780 |
-
"mean_token_accuracy": 0.9587968736886978,
|
| 6781 |
-
"num_tokens": 5954406.0,
|
| 6782 |
-
"step": 675
|
| 6783 |
-
},
|
| 6784 |
-
{
|
| 6785 |
-
"entropy": 1.7488494515419006,
|
| 6786 |
-
"epoch": 2.4416289592760183,
|
| 6787 |
-
"grad_norm": 0.4765988290309906,
|
| 6788 |
-
"learning_rate": 0.0003604924428600843,
|
| 6789 |
-
"loss": 0.1183,
|
| 6790 |
-
"mean_token_accuracy": 0.9581810384988785,
|
| 6791 |
-
"num_tokens": 5963472.0,
|
| 6792 |
-
"step": 676
|
| 6793 |
-
},
|
| 6794 |
-
{
|
| 6795 |
-
"entropy": 1.885668009519577,
|
| 6796 |
-
"epoch": 2.4452488687782807,
|
| 6797 |
-
"grad_norm": 0.7310354113578796,
|
| 6798 |
-
"learning_rate": 0.00036007174109597983,
|
| 6799 |
-
"loss": 0.1248,
|
| 6800 |
-
"mean_token_accuracy": 0.9588694721460342,
|
| 6801 |
-
"num_tokens": 5971771.0,
|
| 6802 |
-
"step": 677
|
| 6803 |
-
},
|
| 6804 |
-
{
|
| 6805 |
-
"entropy": 1.8329627513885498,
|
| 6806 |
-
"epoch": 2.448868778280543,
|
| 6807 |
-
"grad_norm": 0.37075191736221313,
|
| 6808 |
-
"learning_rate": 0.00035965073919723206,
|
| 6809 |
-
"loss": 0.0694,
|
| 6810 |
-
"mean_token_accuracy": 0.9812011271715164,
|
| 6811 |
-
"num_tokens": 5980536.0,
|
| 6812 |
-
"step": 678
|
| 6813 |
-
},
|
| 6814 |
-
{
|
| 6815 |
-
"entropy": 1.8218618333339691,
|
| 6816 |
-
"epoch": 2.4524886877828056,
|
| 6817 |
-
"grad_norm": 0.5196499228477478,
|
| 6818 |
-
"learning_rate": 0.0003592294387628597,
|
| 6819 |
-
"loss": 0.0833,
|
| 6820 |
-
"mean_token_accuracy": 0.9765996187925339,
|
| 6821 |
-
"num_tokens": 5989462.0,
|
| 6822 |
-
"step": 679
|
| 6823 |
-
},
|
| 6824 |
-
{
|
| 6825 |
-
"entropy": 1.7702144086360931,
|
| 6826 |
-
"epoch": 2.456108597285068,
|
| 6827 |
-
"grad_norm": 0.68550044298172,
|
| 6828 |
-
"learning_rate": 0.0003588078413930155,
|
| 6829 |
-
"loss": 0.1395,
|
| 6830 |
-
"mean_token_accuracy": 0.9701545089483261,
|
| 6831 |
-
"num_tokens": 5998702.0,
|
| 6832 |
-
"step": 680
|
| 6833 |
-
},
|
| 6834 |
-
{
|
| 6835 |
-
"entropy": 1.729397028684616,
|
| 6836 |
-
"epoch": 2.4597285067873305,
|
| 6837 |
-
"grad_norm": 0.6107930541038513,
|
| 6838 |
-
"learning_rate": 0.00035838594868898004,
|
| 6839 |
-
"loss": 0.1009,
|
| 6840 |
-
"mean_token_accuracy": 0.9712544083595276,
|
| 6841 |
-
"num_tokens": 6007594.0,
|
| 6842 |
-
"step": 681
|
| 6843 |
-
},
|
| 6844 |
-
{
|
| 6845 |
-
"entropy": 1.6558150053024292,
|
| 6846 |
-
"epoch": 2.463348416289593,
|
| 6847 |
-
"grad_norm": 0.45058509707450867,
|
| 6848 |
-
"learning_rate": 0.0003579637622531555,
|
| 6849 |
-
"loss": 0.0747,
|
| 6850 |
-
"mean_token_accuracy": 0.9791784882545471,
|
| 6851 |
-
"num_tokens": 6016874.0,
|
| 6852 |
-
"step": 682
|
| 6853 |
-
},
|
| 6854 |
-
{
|
| 6855 |
-
"entropy": 1.7209869921207428,
|
| 6856 |
-
"epoch": 2.4669683257918553,
|
| 6857 |
-
"grad_norm": 0.6103800535202026,
|
| 6858 |
-
"learning_rate": 0.0003575412836890599,
|
| 6859 |
-
"loss": 0.1096,
|
| 6860 |
-
"mean_token_accuracy": 0.9665796160697937,
|
| 6861 |
-
"num_tokens": 6026056.0,
|
| 6862 |
-
"step": 683
|
| 6863 |
-
},
|
| 6864 |
-
{
|
| 6865 |
-
"entropy": 1.790249615907669,
|
| 6866 |
-
"epoch": 2.4705882352941178,
|
| 6867 |
-
"grad_norm": 0.67525315284729,
|
| 6868 |
-
"learning_rate": 0.0003571185146013205,
|
| 6869 |
-
"loss": 0.0811,
|
| 6870 |
-
"mean_token_accuracy": 0.9776998162269592,
|
| 6871 |
-
"num_tokens": 6034624.0,
|
| 6872 |
-
"step": 684
|
| 6873 |
-
},
|
| 6874 |
-
{
|
| 6875 |
-
"entropy": 1.735906183719635,
|
| 6876 |
-
"epoch": 2.47420814479638,
|
| 6877 |
-
"grad_norm": 0.884986162185669,
|
| 6878 |
-
"learning_rate": 0.00035669545659566836,
|
| 6879 |
-
"loss": 0.2324,
|
| 6880 |
-
"mean_token_accuracy": 0.9448857754468918,
|
| 6881 |
-
"num_tokens": 6043557.0,
|
| 6882 |
-
"step": 685
|
| 6883 |
-
},
|
| 6884 |
-
{
|
| 6885 |
-
"entropy": 1.673194944858551,
|
| 6886 |
-
"epoch": 2.4778280542986426,
|
| 6887 |
-
"grad_norm": 0.7441328763961792,
|
| 6888 |
-
"learning_rate": 0.0003562721112789316,
|
| 6889 |
-
"loss": 0.1661,
|
| 6890 |
-
"mean_token_accuracy": 0.9566781520843506,
|
| 6891 |
-
"num_tokens": 6052623.0,
|
| 6892 |
-
"step": 686
|
| 6893 |
-
},
|
| 6894 |
-
{
|
| 6895 |
-
"entropy": 1.736072987318039,
|
| 6896 |
-
"epoch": 2.481447963800905,
|
| 6897 |
-
"grad_norm": 0.5674424767494202,
|
| 6898 |
-
"learning_rate": 0.00035584848025902973,
|
| 6899 |
-
"loss": 0.0751,
|
| 6900 |
-
"mean_token_accuracy": 0.9750215858221054,
|
| 6901 |
-
"num_tokens": 6061347.0,
|
| 6902 |
-
"step": 687
|
| 6903 |
-
},
|
| 6904 |
-
{
|
| 6905 |
-
"entropy": 1.625234305858612,
|
| 6906 |
-
"epoch": 2.4850678733031675,
|
| 6907 |
-
"grad_norm": 0.6596720218658447,
|
| 6908 |
-
"learning_rate": 0.00035542456514496725,
|
| 6909 |
-
"loss": 0.0796,
|
| 6910 |
-
"mean_token_accuracy": 0.9773041009902954,
|
| 6911 |
-
"num_tokens": 6070396.0,
|
| 6912 |
-
"step": 688
|
| 6913 |
-
},
|
| 6914 |
-
{
|
| 6915 |
-
"entropy": 1.6548752784729004,
|
| 6916 |
-
"epoch": 2.48868778280543,
|
| 6917 |
-
"grad_norm": 0.5798892378807068,
|
| 6918 |
-
"learning_rate": 0.00035500036754682794,
|
| 6919 |
-
"loss": 0.1412,
|
| 6920 |
-
"mean_token_accuracy": 0.9653023481369019,
|
| 6921 |
-
"num_tokens": 6079757.0,
|
| 6922 |
-
"step": 689
|
| 6923 |
-
},
|
| 6924 |
-
{
|
| 6925 |
-
"entropy": 1.6213977932929993,
|
| 6926 |
-
"epoch": 2.4923076923076923,
|
| 6927 |
-
"grad_norm": 0.44931474328041077,
|
| 6928 |
-
"learning_rate": 0.00035457588907576823,
|
| 6929 |
-
"loss": 0.0724,
|
| 6930 |
-
"mean_token_accuracy": 0.9800422787666321,
|
| 6931 |
-
"num_tokens": 6088646.0,
|
| 6932 |
-
"step": 690
|
| 6933 |
-
},
|
| 6934 |
-
{
|
| 6935 |
-
"entropy": 1.6762541830539703,
|
| 6936 |
-
"epoch": 2.4959276018099548,
|
| 6937 |
-
"grad_norm": 0.6818104386329651,
|
| 6938 |
-
"learning_rate": 0.0003541511313440114,
|
| 6939 |
-
"loss": 0.1217,
|
| 6940 |
-
"mean_token_accuracy": 0.9675028026103973,
|
| 6941 |
-
"num_tokens": 6097441.0,
|
| 6942 |
-
"step": 691
|
| 6943 |
-
},
|
| 6944 |
-
{
|
| 6945 |
-
"entropy": 1.7241974771022797,
|
| 6946 |
-
"epoch": 2.499547511312217,
|
| 6947 |
-
"grad_norm": 0.4126259982585907,
|
| 6948 |
-
"learning_rate": 0.00035372609596484166,
|
| 6949 |
-
"loss": 0.0615,
|
| 6950 |
-
"mean_token_accuracy": 0.9799284338951111,
|
| 6951 |
-
"num_tokens": 6105578.0,
|
| 6952 |
-
"step": 692
|
| 6953 |
-
},
|
| 6954 |
-
{
|
| 6955 |
-
"entropy": 1.6379709541797638,
|
| 6956 |
-
"epoch": 2.5031674208144796,
|
| 6957 |
-
"grad_norm": 0.47291842103004456,
|
| 6958 |
-
"learning_rate": 0.00035330078455259734,
|
| 6959 |
-
"loss": 0.0858,
|
| 6960 |
-
"mean_token_accuracy": 0.9744312763214111,
|
| 6961 |
-
"num_tokens": 6114404.0,
|
| 6962 |
-
"step": 693
|
| 6963 |
-
},
|
| 6964 |
-
{
|
| 6965 |
-
"entropy": 1.6317658722400665,
|
| 6966 |
-
"epoch": 2.506787330316742,
|
| 6967 |
-
"grad_norm": 0.5747683048248291,
|
| 6968 |
-
"learning_rate": 0.00035287519872266544,
|
| 6969 |
-
"loss": 0.1344,
|
| 6970 |
-
"mean_token_accuracy": 0.9632531553506851,
|
| 6971 |
-
"num_tokens": 6123319.0,
|
| 6972 |
-
"step": 694
|
| 6973 |
-
},
|
| 6974 |
-
{
|
| 6975 |
-
"entropy": 1.6969698369503021,
|
| 6976 |
-
"epoch": 2.5104072398190045,
|
| 6977 |
-
"grad_norm": 0.5810018181800842,
|
| 6978 |
-
"learning_rate": 0.00035244934009147523,
|
| 6979 |
-
"loss": 0.0927,
|
| 6980 |
-
"mean_token_accuracy": 0.9729650169610977,
|
| 6981 |
-
"num_tokens": 6131814.0,
|
| 6982 |
-
"step": 695
|
| 6983 |
-
},
|
| 6984 |
-
{
|
| 6985 |
-
"entropy": 1.631262481212616,
|
| 6986 |
-
"epoch": 2.514027149321267,
|
| 6987 |
-
"grad_norm": 0.44387346506118774,
|
| 6988 |
-
"learning_rate": 0.00035202321027649205,
|
| 6989 |
-
"loss": 0.0657,
|
| 6990 |
-
"mean_token_accuracy": 0.9802225232124329,
|
| 6991 |
-
"num_tokens": 6140967.0,
|
| 6992 |
-
"step": 696
|
| 6993 |
-
},
|
| 6994 |
-
{
|
| 6995 |
-
"entropy": 1.610716551542282,
|
| 6996 |
-
"epoch": 2.5176470588235293,
|
| 6997 |
-
"grad_norm": 0.6546471118927002,
|
| 6998 |
-
"learning_rate": 0.0003515968108962112,
|
| 6999 |
-
"loss": 0.1114,
|
| 7000 |
-
"mean_token_accuracy": 0.9671156108379364,
|
| 7001 |
-
"num_tokens": 6149938.0,
|
| 7002 |
-
"step": 697
|
| 7003 |
-
},
|
| 7004 |
-
{
|
| 7005 |
-
"entropy": 1.598843276500702,
|
| 7006 |
-
"epoch": 2.521266968325792,
|
| 7007 |
-
"grad_norm": 0.541953444480896,
|
| 7008 |
-
"learning_rate": 0.0003511701435701519,
|
| 7009 |
-
"loss": 0.0504,
|
| 7010 |
-
"mean_token_accuracy": 0.98616062104702,
|
| 7011 |
-
"num_tokens": 6158686.0,
|
| 7012 |
-
"step": 698
|
| 7013 |
-
},
|
| 7014 |
-
{
|
| 7015 |
-
"entropy": 1.7793676853179932,
|
| 7016 |
-
"epoch": 2.524886877828054,
|
| 7017 |
-
"grad_norm": 0.6303162574768066,
|
| 7018 |
-
"learning_rate": 0.00035074320991885106,
|
| 7019 |
-
"loss": 0.0797,
|
| 7020 |
-
"mean_token_accuracy": 0.9783169627189636,
|
| 7021 |
-
"num_tokens": 6166835.0,
|
| 7022 |
-
"step": 699
|
| 7023 |
-
},
|
| 7024 |
-
{
|
| 7025 |
-
"entropy": 1.598317414522171,
|
| 7026 |
-
"epoch": 2.5285067873303166,
|
| 7027 |
-
"grad_norm": 0.4783090054988861,
|
| 7028 |
-
"learning_rate": 0.000350316011563857,
|
| 7029 |
-
"loss": 0.0693,
|
| 7030 |
-
"mean_token_accuracy": 0.9740357846021652,
|
| 7031 |
-
"num_tokens": 6175978.0,
|
| 7032 |
-
"step": 700
|
| 7033 |
-
},
|
| 7034 |
-
{
|
| 7035 |
-
"entropy": 1.6361595392227173,
|
| 7036 |
-
"epoch": 2.532126696832579,
|
| 7037 |
-
"grad_norm": 0.46353498101234436,
|
| 7038 |
-
"learning_rate": 0.00034988855012772367,
|
| 7039 |
-
"loss": 0.0543,
|
| 7040 |
-
"mean_token_accuracy": 0.9821173399686813,
|
| 7041 |
-
"num_tokens": 6185071.0,
|
| 7042 |
-
"step": 701
|
| 7043 |
-
},
|
| 7044 |
-
{
|
| 7045 |
-
"entropy": 1.6333596408367157,
|
| 7046 |
-
"epoch": 2.5357466063348415,
|
| 7047 |
-
"grad_norm": 0.4968421459197998,
|
| 7048 |
-
"learning_rate": 0.0003494608272340039,
|
| 7049 |
-
"loss": 0.1588,
|
| 7050 |
-
"mean_token_accuracy": 0.9692430347204208,
|
| 7051 |
-
"num_tokens": 6194279.0,
|
| 7052 |
-
"step": 702
|
| 7053 |
-
},
|
| 7054 |
-
{
|
| 7055 |
-
"entropy": 1.6701206266880035,
|
| 7056 |
-
"epoch": 2.539366515837104,
|
| 7057 |
-
"grad_norm": 0.7050784826278687,
|
| 7058 |
-
"learning_rate": 0.00034903284450724385,
|
| 7059 |
-
"loss": 0.1298,
|
| 7060 |
-
"mean_token_accuracy": 0.9623726159334183,
|
| 7061 |
-
"num_tokens": 6203017.0,
|
| 7062 |
-
"step": 703
|
| 7063 |
-
},
|
| 7064 |
-
{
|
| 7065 |
-
"entropy": 1.6594900786876678,
|
| 7066 |
-
"epoch": 2.5429864253393664,
|
| 7067 |
-
"grad_norm": 0.7955659031867981,
|
| 7068 |
-
"learning_rate": 0.0003486046035729765,
|
| 7069 |
-
"loss": 0.1695,
|
| 7070 |
-
"mean_token_accuracy": 0.9616524875164032,
|
| 7071 |
-
"num_tokens": 6212016.0,
|
| 7072 |
-
"step": 704
|
| 7073 |
-
},
|
| 7074 |
-
{
|
| 7075 |
-
"entropy": 1.7208792865276337,
|
| 7076 |
-
"epoch": 2.546606334841629,
|
| 7077 |
-
"grad_norm": 0.7105070352554321,
|
| 7078 |
-
"learning_rate": 0.00034817610605771546,
|
| 7079 |
-
"loss": 0.1655,
|
| 7080 |
-
"mean_token_accuracy": 0.9637335985898972,
|
| 7081 |
-
"num_tokens": 6220619.0,
|
| 7082 |
-
"step": 705
|
| 7083 |
-
},
|
| 7084 |
-
{
|
| 7085 |
-
"entropy": 1.668517529964447,
|
| 7086 |
-
"epoch": 2.5502262443438912,
|
| 7087 |
-
"grad_norm": 0.3955032527446747,
|
| 7088 |
-
"learning_rate": 0.0003477473535889488,
|
| 7089 |
-
"loss": 0.0502,
|
| 7090 |
-
"mean_token_accuracy": 0.9823585599660873,
|
| 7091 |
-
"num_tokens": 6229785.0,
|
| 7092 |
-
"step": 706
|
| 7093 |
-
},
|
| 7094 |
-
{
|
| 7095 |
-
"entropy": 1.7515103816986084,
|
| 7096 |
-
"epoch": 2.5538461538461537,
|
| 7097 |
-
"grad_norm": 0.6166616082191467,
|
| 7098 |
-
"learning_rate": 0.00034731834779513313,
|
| 7099 |
-
"loss": 0.1113,
|
| 7100 |
-
"mean_token_accuracy": 0.9675650298595428,
|
| 7101 |
-
"num_tokens": 6238724.0,
|
| 7102 |
-
"step": 707
|
| 7103 |
-
},
|
| 7104 |
-
{
|
| 7105 |
-
"entropy": 1.8460631668567657,
|
| 7106 |
-
"epoch": 2.557466063348416,
|
| 7107 |
-
"grad_norm": 0.8243921399116516,
|
| 7108 |
-
"learning_rate": 0.0003468890903056872,
|
| 7109 |
-
"loss": 0.1625,
|
| 7110 |
-
"mean_token_accuracy": 0.9648249596357346,
|
| 7111 |
-
"num_tokens": 6246939.0,
|
| 7112 |
-
"step": 708
|
| 7113 |
-
},
|
| 7114 |
-
{
|
| 7115 |
-
"entropy": 1.784417450428009,
|
| 7116 |
-
"epoch": 2.5610859728506785,
|
| 7117 |
-
"grad_norm": 0.5633116960525513,
|
| 7118 |
-
"learning_rate": 0.00034645958275098557,
|
| 7119 |
-
"loss": 0.1074,
|
| 7120 |
-
"mean_token_accuracy": 0.9705483913421631,
|
| 7121 |
-
"num_tokens": 6255686.0,
|
| 7122 |
-
"step": 709
|
| 7123 |
-
},
|
| 7124 |
-
{
|
| 7125 |
-
"entropy": 1.7208334505558014,
|
| 7126 |
-
"epoch": 2.564705882352941,
|
| 7127 |
-
"grad_norm": 0.8083389401435852,
|
| 7128 |
-
"learning_rate": 0.0003460298267623526,
|
| 7129 |
-
"loss": 0.1184,
|
| 7130 |
-
"mean_token_accuracy": 0.9747882932424545,
|
| 7131 |
-
"num_tokens": 6265047.0,
|
| 7132 |
-
"step": 710
|
| 7133 |
-
},
|
| 7134 |
-
{
|
| 7135 |
-
"entropy": 1.7345463037490845,
|
| 7136 |
-
"epoch": 2.5683257918552034,
|
| 7137 |
-
"grad_norm": 0.6094368100166321,
|
| 7138 |
-
"learning_rate": 0.0003455998239720565,
|
| 7139 |
-
"loss": 0.1689,
|
| 7140 |
-
"mean_token_accuracy": 0.9613602459430695,
|
| 7141 |
-
"num_tokens": 6274460.0,
|
| 7142 |
-
"step": 711
|
| 7143 |
-
},
|
| 7144 |
-
{
|
| 7145 |
-
"entropy": 1.9464713335037231,
|
| 7146 |
-
"epoch": 2.571945701357466,
|
| 7147 |
-
"grad_norm": 0.6025084853172302,
|
| 7148 |
-
"learning_rate": 0.0003451695760133025,
|
| 7149 |
-
"loss": 0.1477,
|
| 7150 |
-
"mean_token_accuracy": 0.9618766456842422,
|
| 7151 |
-
"num_tokens": 6282700.0,
|
| 7152 |
-
"step": 712
|
| 7153 |
-
},
|
| 7154 |
-
{
|
| 7155 |
-
"entropy": 1.8449675738811493,
|
| 7156 |
-
"epoch": 2.5755656108597282,
|
| 7157 |
-
"grad_norm": 0.43869853019714355,
|
| 7158 |
-
"learning_rate": 0.0003447390845202272,
|
| 7159 |
-
"loss": 0.0892,
|
| 7160 |
-
"mean_token_accuracy": 0.974039301276207,
|
| 7161 |
-
"num_tokens": 6291627.0,
|
| 7162 |
-
"step": 713
|
| 7163 |
-
},
|
| 7164 |
-
{
|
| 7165 |
-
"entropy": 1.9028298556804657,
|
| 7166 |
-
"epoch": 2.579185520361991,
|
| 7167 |
-
"grad_norm": 0.5455291271209717,
|
| 7168 |
-
"learning_rate": 0.0003443083511278922,
|
| 7169 |
-
"loss": 0.0939,
|
| 7170 |
-
"mean_token_accuracy": 0.9729337990283966,
|
| 7171 |
-
"num_tokens": 6300198.0,
|
| 7172 |
-
"step": 714
|
| 7173 |
-
},
|
| 7174 |
-
{
|
| 7175 |
-
"entropy": 1.8395194113254547,
|
| 7176 |
-
"epoch": 2.5828054298642535,
|
| 7177 |
-
"grad_norm": 0.48734748363494873,
|
| 7178 |
-
"learning_rate": 0.00034387737747227786,
|
| 7179 |
-
"loss": 0.0791,
|
| 7180 |
-
"mean_token_accuracy": 0.9785804748535156,
|
| 7181 |
-
"num_tokens": 6309362.0,
|
| 7182 |
-
"step": 715
|
| 7183 |
-
},
|
| 7184 |
-
{
|
| 7185 |
-
"entropy": 1.8357026278972626,
|
| 7186 |
-
"epoch": 2.586425339366516,
|
| 7187 |
-
"grad_norm": 0.4359396994113922,
|
| 7188 |
-
"learning_rate": 0.000343446165190277,
|
| 7189 |
-
"loss": 0.0752,
|
| 7190 |
-
"mean_token_accuracy": 0.9807359129190445,
|
| 7191 |
-
"num_tokens": 6318232.0,
|
| 7192 |
-
"step": 716
|
| 7193 |
-
},
|
| 7194 |
-
{
|
| 7195 |
-
"entropy": 1.7531521618366241,
|
| 7196 |
-
"epoch": 2.5900452488687784,
|
| 7197 |
-
"grad_norm": 0.7446436882019043,
|
| 7198 |
-
"learning_rate": 0.0003430147159196887,
|
| 7199 |
-
"loss": 0.1467,
|
| 7200 |
-
"mean_token_accuracy": 0.9661064445972443,
|
| 7201 |
-
"num_tokens": 6327607.0,
|
| 7202 |
-
"step": 717
|
| 7203 |
-
},
|
| 7204 |
-
{
|
| 7205 |
-
"entropy": 1.83816197514534,
|
| 7206 |
-
"epoch": 2.593665158371041,
|
| 7207 |
-
"grad_norm": 0.3669150173664093,
|
| 7208 |
-
"learning_rate": 0.0003425830312992125,
|
| 7209 |
-
"loss": 0.076,
|
| 7210 |
-
"mean_token_accuracy": 0.9777591675519943,
|
| 7211 |
-
"num_tokens": 6336991.0,
|
| 7212 |
-
"step": 718
|
| 7213 |
-
},
|
| 7214 |
-
{
|
| 7215 |
-
"entropy": 1.9396244585514069,
|
| 7216 |
-
"epoch": 2.5972850678733033,
|
| 7217 |
-
"grad_norm": 0.6049129962921143,
|
| 7218 |
-
"learning_rate": 0.00034215111296844147,
|
| 7219 |
-
"loss": 0.1001,
|
| 7220 |
-
"mean_token_accuracy": 0.968943640589714,
|
| 7221 |
-
"num_tokens": 6345381.0,
|
| 7222 |
-
"step": 719
|
| 7223 |
-
},
|
| 7224 |
-
{
|
| 7225 |
-
"entropy": 1.8745197057724,
|
| 7226 |
-
"epoch": 2.6009049773755657,
|
| 7227 |
-
"grad_norm": 0.8561233878135681,
|
| 7228 |
-
"learning_rate": 0.00034171896256785645,
|
| 7229 |
-
"loss": 0.2378,
|
| 7230 |
-
"mean_token_accuracy": 0.9442594349384308,
|
| 7231 |
-
"num_tokens": 6354290.0,
|
| 7232 |
-
"step": 720
|
| 7233 |
-
},
|
| 7234 |
-
{
|
| 7235 |
-
"entropy": 1.8199078440666199,
|
| 7236 |
-
"epoch": 2.604524886877828,
|
| 7237 |
-
"grad_norm": 0.4546636939048767,
|
| 7238 |
-
"learning_rate": 0.00034128658173881993,
|
| 7239 |
-
"loss": 0.0407,
|
| 7240 |
-
"mean_token_accuracy": 0.9873656630516052,
|
| 7241 |
-
"num_tokens": 6362826.0,
|
| 7242 |
-
"step": 721
|
| 7243 |
-
},
|
| 7244 |
-
{
|
| 7245 |
-
"entropy": 1.8066097497940063,
|
| 7246 |
-
"epoch": 2.6081447963800906,
|
| 7247 |
-
"grad_norm": 0.6496687531471252,
|
| 7248 |
-
"learning_rate": 0.0003408539721235691,
|
| 7249 |
-
"loss": 0.1279,
|
| 7250 |
-
"mean_token_accuracy": 0.9674505293369293,
|
| 7251 |
-
"num_tokens": 6371666.0,
|
| 7252 |
-
"step": 722
|
| 7253 |
-
},
|
| 7254 |
-
{
|
| 7255 |
-
"entropy": 1.8027856945991516,
|
| 7256 |
-
"epoch": 2.611764705882353,
|
| 7257 |
-
"grad_norm": 0.6001412272453308,
|
| 7258 |
-
"learning_rate": 0.0003404211353652106,
|
| 7259 |
-
"loss": 0.1144,
|
| 7260 |
-
"mean_token_accuracy": 0.9672902077436447,
|
| 7261 |
-
"num_tokens": 6380469.0,
|
| 7262 |
-
"step": 723
|
| 7263 |
-
},
|
| 7264 |
-
{
|
| 7265 |
-
"entropy": 1.7859437465667725,
|
| 7266 |
-
"epoch": 2.6153846153846154,
|
| 7267 |
-
"grad_norm": 0.4654795229434967,
|
| 7268 |
-
"learning_rate": 0.0003399880731077136,
|
| 7269 |
-
"loss": 0.0655,
|
| 7270 |
-
"mean_token_accuracy": 0.9804074019193649,
|
| 7271 |
-
"num_tokens": 6389485.0,
|
| 7272 |
-
"step": 724
|
| 7273 |
-
},
|
| 7274 |
-
{
|
| 7275 |
-
"entropy": 1.722127079963684,
|
| 7276 |
-
"epoch": 2.619004524886878,
|
| 7277 |
-
"grad_norm": 0.5452624559402466,
|
| 7278 |
-
"learning_rate": 0.0003395547869959037,
|
| 7279 |
-
"loss": 0.0827,
|
| 7280 |
-
"mean_token_accuracy": 0.972189649939537,
|
| 7281 |
-
"num_tokens": 6398523.0,
|
| 7282 |
-
"step": 725
|
| 7283 |
-
},
|
| 7284 |
-
{
|
| 7285 |
-
"entropy": 1.7406074404716492,
|
| 7286 |
-
"epoch": 2.6226244343891403,
|
| 7287 |
-
"grad_norm": 0.5524203181266785,
|
| 7288 |
-
"learning_rate": 0.00033912127867545685,
|
| 7289 |
-
"loss": 0.1279,
|
| 7290 |
-
"mean_token_accuracy": 0.9688322842121124,
|
| 7291 |
-
"num_tokens": 6407560.0,
|
| 7292 |
-
"step": 726
|
| 7293 |
-
},
|
| 7294 |
-
{
|
| 7295 |
-
"entropy": 1.7783840000629425,
|
| 7296 |
-
"epoch": 2.6262443438914027,
|
| 7297 |
-
"grad_norm": 0.6428073644638062,
|
| 7298 |
-
"learning_rate": 0.00033868754979289275,
|
| 7299 |
-
"loss": 0.1392,
|
| 7300 |
-
"mean_token_accuracy": 0.9665655642747879,
|
| 7301 |
-
"num_tokens": 6416230.0,
|
| 7302 |
-
"step": 727
|
| 7303 |
-
},
|
| 7304 |
-
{
|
| 7305 |
-
"entropy": 1.7406431436538696,
|
| 7306 |
-
"epoch": 2.629864253393665,
|
| 7307 |
-
"grad_norm": 0.6197221875190735,
|
| 7308 |
-
"learning_rate": 0.0003382536019955691,
|
| 7309 |
-
"loss": 0.2688,
|
| 7310 |
-
"mean_token_accuracy": 0.9567561745643616,
|
| 7311 |
-
"num_tokens": 6425158.0,
|
| 7312 |
-
"step": 728
|
| 7313 |
-
},
|
| 7314 |
-
{
|
| 7315 |
-
"entropy": 1.7054848670959473,
|
| 7316 |
-
"epoch": 2.6334841628959276,
|
| 7317 |
-
"grad_norm": 0.499615877866745,
|
| 7318 |
-
"learning_rate": 0.0003378194369316749,
|
| 7319 |
-
"loss": 0.0765,
|
| 7320 |
-
"mean_token_accuracy": 0.9788558930158615,
|
| 7321 |
-
"num_tokens": 6434219.0,
|
| 7322 |
-
"step": 729
|
| 7323 |
-
},
|
| 7324 |
-
{
|
| 7325 |
-
"entropy": 1.8623437583446503,
|
| 7326 |
-
"epoch": 2.63710407239819,
|
| 7327 |
-
"grad_norm": 0.428608775138855,
|
| 7328 |
-
"learning_rate": 0.0003373850562502243,
|
| 7329 |
-
"loss": 0.044,
|
| 7330 |
-
"mean_token_accuracy": 0.9862259030342102,
|
| 7331 |
-
"num_tokens": 6442657.0,
|
| 7332 |
-
"step": 730
|
| 7333 |
-
},
|
| 7334 |
-
{
|
| 7335 |
-
"entropy": 1.6827208995819092,
|
| 7336 |
-
"epoch": 2.6407239819004524,
|
| 7337 |
-
"grad_norm": 0.46222713589668274,
|
| 7338 |
-
"learning_rate": 0.00033695046160105076,
|
| 7339 |
-
"loss": 0.0687,
|
| 7340 |
-
"mean_token_accuracy": 0.9762164503335953,
|
| 7341 |
-
"num_tokens": 6451550.0,
|
| 7342 |
-
"step": 731
|
| 7343 |
-
},
|
| 7344 |
-
{
|
| 7345 |
-
"entropy": 1.707773894071579,
|
| 7346 |
-
"epoch": 2.644343891402715,
|
| 7347 |
-
"grad_norm": 0.4701695442199707,
|
| 7348 |
-
"learning_rate": 0.0003365156546347998,
|
| 7349 |
-
"loss": 0.0622,
|
| 7350 |
-
"mean_token_accuracy": 0.9804075062274933,
|
| 7351 |
-
"num_tokens": 6460494.0,
|
| 7352 |
-
"step": 732
|
| 7353 |
-
},
|
| 7354 |
-
{
|
| 7355 |
-
"entropy": 1.7011042833328247,
|
| 7356 |
-
"epoch": 2.6479638009049773,
|
| 7357 |
-
"grad_norm": 0.5986224412918091,
|
| 7358 |
-
"learning_rate": 0.0003360806370029239,
|
| 7359 |
-
"loss": 0.0954,
|
| 7360 |
-
"mean_token_accuracy": 0.9730664491653442,
|
| 7361 |
-
"num_tokens": 6469728.0,
|
| 7362 |
-
"step": 733
|
| 7363 |
-
},
|
| 7364 |
-
{
|
| 7365 |
-
"entropy": 1.810427963733673,
|
| 7366 |
-
"epoch": 2.6515837104072397,
|
| 7367 |
-
"grad_norm": 0.8224559426307678,
|
| 7368 |
-
"learning_rate": 0.0003356454103576754,
|
| 7369 |
-
"loss": 0.1218,
|
| 7370 |
-
"mean_token_accuracy": 0.9742488712072372,
|
| 7371 |
-
"num_tokens": 6478643.0,
|
| 7372 |
-
"step": 734
|
| 7373 |
-
},
|
| 7374 |
-
{
|
| 7375 |
-
"entropy": 1.773183435201645,
|
| 7376 |
-
"epoch": 2.655203619909502,
|
| 7377 |
-
"grad_norm": 0.609344482421875,
|
| 7378 |
-
"learning_rate": 0.0003352099763521006,
|
| 7379 |
-
"loss": 0.0955,
|
| 7380 |
-
"mean_token_accuracy": 0.9747250378131866,
|
| 7381 |
-
"num_tokens": 6487314.0,
|
| 7382 |
-
"step": 735
|
| 7383 |
-
},
|
| 7384 |
-
{
|
| 7385 |
-
"entropy": 1.7761066555976868,
|
| 7386 |
-
"epoch": 2.6588235294117646,
|
| 7387 |
-
"grad_norm": 0.6947258114814758,
|
| 7388 |
-
"learning_rate": 0.0003347743366400333,
|
| 7389 |
-
"loss": 0.1188,
|
| 7390 |
-
"mean_token_accuracy": 0.9693178832530975,
|
| 7391 |
-
"num_tokens": 6496074.0,
|
| 7392 |
-
"step": 736
|
| 7393 |
-
},
|
| 7394 |
-
{
|
| 7395 |
-
"entropy": 1.7725336253643036,
|
| 7396 |
-
"epoch": 2.662443438914027,
|
| 7397 |
-
"grad_norm": 0.6928444504737854,
|
| 7398 |
-
"learning_rate": 0.0003343384928760887,
|
| 7399 |
-
"loss": 0.1589,
|
| 7400 |
-
"mean_token_accuracy": 0.9603369683027267,
|
| 7401 |
-
"num_tokens": 6504997.0,
|
| 7402 |
-
"step": 737
|
| 7403 |
-
},
|
| 7404 |
-
{
|
| 7405 |
-
"entropy": 1.8763961493968964,
|
| 7406 |
-
"epoch": 2.6660633484162894,
|
| 7407 |
-
"grad_norm": 0.6204855442047119,
|
| 7408 |
-
"learning_rate": 0.00033390244671565694,
|
| 7409 |
-
"loss": 0.1115,
|
| 7410 |
-
"mean_token_accuracy": 0.9727036952972412,
|
| 7411 |
-
"num_tokens": 6513639.0,
|
| 7412 |
-
"step": 738
|
| 7413 |
-
},
|
| 7414 |
-
{
|
| 7415 |
-
"entropy": 1.8347080647945404,
|
| 7416 |
-
"epoch": 2.669683257918552,
|
| 7417 |
-
"grad_norm": 0.4470975697040558,
|
| 7418 |
-
"learning_rate": 0.00033346619981489687,
|
| 7419 |
-
"loss": 0.0707,
|
| 7420 |
-
"mean_token_accuracy": 0.9816004037857056,
|
| 7421 |
-
"num_tokens": 6522524.0,
|
| 7422 |
-
"step": 739
|
| 7423 |
-
},
|
| 7424 |
-
{
|
| 7425 |
-
"entropy": 1.8440867066383362,
|
| 7426 |
-
"epoch": 2.6733031674208148,
|
| 7427 |
-
"grad_norm": 0.6848122477531433,
|
| 7428 |
-
"learning_rate": 0.0003330297538307298,
|
| 7429 |
-
"loss": 0.1133,
|
| 7430 |
-
"mean_token_accuracy": 0.966602012515068,
|
| 7431 |
-
"num_tokens": 6531421.0,
|
| 7432 |
-
"step": 740
|
| 7433 |
-
},
|
| 7434 |
-
{
|
| 7435 |
-
"entropy": 1.829009771347046,
|
| 7436 |
-
"epoch": 2.676923076923077,
|
| 7437 |
-
"grad_norm": 0.37875643372535706,
|
| 7438 |
-
"learning_rate": 0.0003325931104208333,
|
| 7439 |
-
"loss": 0.0539,
|
| 7440 |
-
"mean_token_accuracy": 0.9850967526435852,
|
| 7441 |
-
"num_tokens": 6540304.0,
|
| 7442 |
-
"step": 741
|
| 7443 |
-
},
|
| 7444 |
-
{
|
| 7445 |
-
"entropy": 1.8256315886974335,
|
| 7446 |
-
"epoch": 2.6805429864253396,
|
| 7447 |
-
"grad_norm": 0.4970630407333374,
|
| 7448 |
-
"learning_rate": 0.00033215627124363466,
|
| 7449 |
-
"loss": 0.1195,
|
| 7450 |
-
"mean_token_accuracy": 0.9662436544895172,
|
| 7451 |
-
"num_tokens": 6549267.0,
|
| 7452 |
-
"step": 742
|
| 7453 |
-
},
|
| 7454 |
-
{
|
| 7455 |
-
"entropy": 1.823629915714264,
|
| 7456 |
-
"epoch": 2.684162895927602,
|
| 7457 |
-
"grad_norm": 0.659981906414032,
|
| 7458 |
-
"learning_rate": 0.0003317192379583047,
|
| 7459 |
-
"loss": 0.1368,
|
| 7460 |
-
"mean_token_accuracy": 0.9655566364526749,
|
| 7461 |
-
"num_tokens": 6558447.0,
|
| 7462 |
-
"step": 743
|
| 7463 |
-
},
|
| 7464 |
-
{
|
| 7465 |
-
"entropy": 1.8459455370903015,
|
| 7466 |
-
"epoch": 2.6877828054298645,
|
| 7467 |
-
"grad_norm": 0.620197057723999,
|
| 7468 |
-
"learning_rate": 0.0003312820122247515,
|
| 7469 |
-
"loss": 0.1766,
|
| 7470 |
-
"mean_token_accuracy": 0.9569400995969772,
|
| 7471 |
-
"num_tokens": 6567424.0,
|
| 7472 |
-
"step": 744
|
| 7473 |
-
},
|
| 7474 |
-
{
|
| 7475 |
-
"entropy": 1.7685991525650024,
|
| 7476 |
-
"epoch": 2.691402714932127,
|
| 7477 |
-
"grad_norm": 0.34498465061187744,
|
| 7478 |
-
"learning_rate": 0.0003308445957036142,
|
| 7479 |
-
"loss": 0.0615,
|
| 7480 |
-
"mean_token_accuracy": 0.982216015458107,
|
| 7481 |
-
"num_tokens": 6577071.0,
|
| 7482 |
-
"step": 745
|
| 7483 |
-
},
|
| 7484 |
-
{
|
| 7485 |
-
"entropy": 1.8037284910678864,
|
| 7486 |
-
"epoch": 2.6950226244343893,
|
| 7487 |
-
"grad_norm": 0.5550521016120911,
|
| 7488 |
-
"learning_rate": 0.00033040699005625654,
|
| 7489 |
-
"loss": 0.0701,
|
| 7490 |
-
"mean_token_accuracy": 0.9795115292072296,
|
| 7491 |
-
"num_tokens": 6586396.0,
|
| 7492 |
-
"step": 746
|
| 7493 |
-
},
|
| 7494 |
-
{
|
| 7495 |
-
"entropy": 1.813001424074173,
|
| 7496 |
-
"epoch": 2.6986425339366518,
|
| 7497 |
-
"grad_norm": 0.4117080271244049,
|
| 7498 |
-
"learning_rate": 0.0003299691969447603,
|
| 7499 |
-
"loss": 0.0657,
|
| 7500 |
-
"mean_token_accuracy": 0.978747770190239,
|
| 7501 |
-
"num_tokens": 6595189.0,
|
| 7502 |
-
"step": 747
|
| 7503 |
-
},
|
| 7504 |
-
{
|
| 7505 |
-
"entropy": 1.844575196504593,
|
| 7506 |
-
"epoch": 2.702262443438914,
|
| 7507 |
-
"grad_norm": 0.32197874784469604,
|
| 7508 |
-
"learning_rate": 0.00032953121803191976,
|
| 7509 |
-
"loss": 0.0342,
|
| 7510 |
-
"mean_token_accuracy": 0.9904316365718842,
|
| 7511 |
-
"num_tokens": 6604169.0,
|
| 7512 |
-
"step": 748
|
| 7513 |
-
},
|
| 7514 |
-
{
|
| 7515 |
-
"entropy": 1.9490505158901215,
|
| 7516 |
-
"epoch": 2.7058823529411766,
|
| 7517 |
-
"grad_norm": 0.5810762047767639,
|
| 7518 |
-
"learning_rate": 0.00032909305498123465,
|
| 7519 |
-
"loss": 0.1419,
|
| 7520 |
-
"mean_token_accuracy": 0.9646100401878357,
|
| 7521 |
-
"num_tokens": 6612744.0,
|
| 7522 |
-
"step": 749
|
| 7523 |
-
},
|
| 7524 |
-
{
|
| 7525 |
-
"entropy": 1.9927488267421722,
|
| 7526 |
-
"epoch": 2.709502262443439,
|
| 7527 |
-
"grad_norm": 0.7435065507888794,
|
| 7528 |
-
"learning_rate": 0.0003286547094569039,
|
| 7529 |
-
"loss": 0.1368,
|
| 7530 |
-
"mean_token_accuracy": 0.9609140008687973,
|
| 7531 |
-
"num_tokens": 6621000.0,
|
| 7532 |
-
"step": 750
|
| 7533 |
-
},
|
| 7534 |
-
{
|
| 7535 |
-
"entropy": 1.8266884088516235,
|
| 7536 |
-
"epoch": 2.7131221719457015,
|
| 7537 |
-
"grad_norm": 0.6717537045478821,
|
| 7538 |
-
"learning_rate": 0.00032821618312381975,
|
| 7539 |
-
"loss": 0.1449,
|
| 7540 |
-
"mean_token_accuracy": 0.9694183021783829,
|
| 7541 |
-
"num_tokens": 6629893.0,
|
| 7542 |
-
"step": 751
|
| 7543 |
-
},
|
| 7544 |
-
{
|
| 7545 |
-
"entropy": 1.850794643163681,
|
| 7546 |
-
"epoch": 2.716742081447964,
|
| 7547 |
-
"grad_norm": 0.44241195917129517,
|
| 7548 |
-
"learning_rate": 0.00032777747764756117,
|
| 7549 |
-
"loss": 0.0602,
|
| 7550 |
-
"mean_token_accuracy": 0.9823136776685715,
|
| 7551 |
-
"num_tokens": 6638696.0,
|
| 7552 |
-
"step": 752
|
| 7553 |
-
},
|
| 7554 |
-
{
|
| 7555 |
-
"entropy": 1.8408480882644653,
|
| 7556 |
-
"epoch": 2.7203619909502263,
|
| 7557 |
-
"grad_norm": 0.6299809217453003,
|
| 7558 |
-
"learning_rate": 0.00032733859469438736,
|
| 7559 |
-
"loss": 0.1408,
|
| 7560 |
-
"mean_token_accuracy": 0.9629880636930466,
|
| 7561 |
-
"num_tokens": 6647431.0,
|
| 7562 |
-
"step": 753
|
| 7563 |
-
},
|
| 7564 |
-
{
|
| 7565 |
-
"entropy": 1.7875444293022156,
|
| 7566 |
-
"epoch": 2.723981900452489,
|
| 7567 |
-
"grad_norm": 0.48492106795310974,
|
| 7568 |
-
"learning_rate": 0.00032689953593123175,
|
| 7569 |
-
"loss": 0.0806,
|
| 7570 |
-
"mean_token_accuracy": 0.9798424690961838,
|
| 7571 |
-
"num_tokens": 6656443.0,
|
| 7572 |
-
"step": 754
|
| 7573 |
-
},
|
| 7574 |
-
{
|
| 7575 |
-
"entropy": 1.778283566236496,
|
| 7576 |
-
"epoch": 2.727601809954751,
|
| 7577 |
-
"grad_norm": 0.46145930886268616,
|
| 7578 |
-
"learning_rate": 0.0003264603030256955,
|
| 7579 |
-
"loss": 0.0707,
|
| 7580 |
-
"mean_token_accuracy": 0.9741399586200714,
|
| 7581 |
-
"num_tokens": 6665465.0,
|
| 7582 |
-
"step": 755
|
| 7583 |
-
},
|
| 7584 |
-
{
|
| 7585 |
-
"entropy": 1.7340950965881348,
|
| 7586 |
-
"epoch": 2.7312217194570136,
|
| 7587 |
-
"grad_norm": 0.5734900236129761,
|
| 7588 |
-
"learning_rate": 0.00032602089764604126,
|
| 7589 |
-
"loss": 0.1443,
|
| 7590 |
-
"mean_token_accuracy": 0.96195288002491,
|
| 7591 |
-
"num_tokens": 6674797.0,
|
| 7592 |
-
"step": 756
|
| 7593 |
-
},
|
| 7594 |
-
{
|
| 7595 |
-
"entropy": 1.7791962027549744,
|
| 7596 |
-
"epoch": 2.734841628959276,
|
| 7597 |
-
"grad_norm": 0.5199477076530457,
|
| 7598 |
-
"learning_rate": 0.00032558132146118636,
|
| 7599 |
-
"loss": 0.0794,
|
| 7600 |
-
"mean_token_accuracy": 0.975062221288681,
|
| 7601 |
-
"num_tokens": 6683578.0,
|
| 7602 |
-
"step": 757
|
| 7603 |
-
},
|
| 7604 |
-
{
|
| 7605 |
-
"entropy": 1.825905591249466,
|
| 7606 |
-
"epoch": 2.7384615384615385,
|
| 7607 |
-
"grad_norm": 0.5944926738739014,
|
| 7608 |
-
"learning_rate": 0.0003251415761406975,
|
| 7609 |
-
"loss": 0.0909,
|
| 7610 |
-
"mean_token_accuracy": 0.954865038394928,
|
| 7611 |
-
"num_tokens": 6691818.0,
|
| 7612 |
-
"step": 758
|
| 7613 |
-
},
|
| 7614 |
-
{
|
| 7615 |
-
"entropy": 1.804949015378952,
|
| 7616 |
-
"epoch": 2.742081447963801,
|
| 7617 |
-
"grad_norm": 0.7065241932868958,
|
| 7618 |
-
"learning_rate": 0.0003247016633547833,
|
| 7619 |
-
"loss": 0.1511,
|
| 7620 |
-
"mean_token_accuracy": 0.9687065333127975,
|
| 7621 |
-
"num_tokens": 6700619.0,
|
| 7622 |
-
"step": 759
|
| 7623 |
-
},
|
| 7624 |
-
{
|
| 7625 |
-
"entropy": 1.7419202327728271,
|
| 7626 |
-
"epoch": 2.7457013574660634,
|
| 7627 |
-
"grad_norm": 0.49316564202308655,
|
| 7628 |
-
"learning_rate": 0.00032426158477428857,
|
| 7629 |
-
"loss": 0.0867,
|
| 7630 |
-
"mean_token_accuracy": 0.9774050414562225,
|
| 7631 |
-
"num_tokens": 6709635.0,
|
| 7632 |
-
"step": 760
|
| 7633 |
-
},
|
| 7634 |
-
{
|
| 7635 |
-
"entropy": 1.8934829235076904,
|
| 7636 |
-
"epoch": 2.749321266968326,
|
| 7637 |
-
"grad_norm": 0.9417999386787415,
|
| 7638 |
-
"learning_rate": 0.00032382134207068787,
|
| 7639 |
-
"loss": 0.1464,
|
| 7640 |
-
"mean_token_accuracy": 0.9591032713651657,
|
| 7641 |
-
"num_tokens": 6717657.0,
|
| 7642 |
-
"step": 761
|
| 7643 |
-
},
|
| 7644 |
-
{
|
| 7645 |
-
"entropy": 1.7354997992515564,
|
| 7646 |
-
"epoch": 2.7529411764705882,
|
| 7647 |
-
"grad_norm": 0.7240809798240662,
|
| 7648 |
-
"learning_rate": 0.00032338093691607907,
|
| 7649 |
-
"loss": 0.13,
|
| 7650 |
-
"mean_token_accuracy": 0.9705345183610916,
|
| 7651 |
-
"num_tokens": 6726671.0,
|
| 7652 |
-
"step": 762
|
| 7653 |
-
},
|
| 7654 |
-
{
|
| 7655 |
-
"entropy": 1.7620687186717987,
|
| 7656 |
-
"epoch": 2.7565610859728507,
|
| 7657 |
-
"grad_norm": 0.4986638128757477,
|
| 7658 |
-
"learning_rate": 0.0003229403709831772,
|
| 7659 |
-
"loss": 0.0963,
|
| 7660 |
-
"mean_token_accuracy": 0.9756871312856674,
|
| 7661 |
-
"num_tokens": 6735157.0,
|
| 7662 |
-
"step": 763
|
| 7663 |
-
},
|
| 7664 |
-
{
|
| 7665 |
-
"entropy": 1.7719130218029022,
|
| 7666 |
-
"epoch": 2.760180995475113,
|
| 7667 |
-
"grad_norm": 0.6204966902732849,
|
| 7668 |
-
"learning_rate": 0.00032249964594530757,
|
| 7669 |
-
"loss": 0.0578,
|
| 7670 |
-
"mean_token_accuracy": 0.9815829247236252,
|
| 7671 |
-
"num_tokens": 6743855.0,
|
| 7672 |
-
"step": 764
|
| 7673 |
-
},
|
| 7674 |
-
{
|
| 7675 |
-
"entropy": 1.7228702902793884,
|
| 7676 |
-
"epoch": 2.7638009049773755,
|
| 7677 |
-
"grad_norm": 0.5283492207527161,
|
| 7678 |
-
"learning_rate": 0.0003220587634764003,
|
| 7679 |
-
"loss": 0.069,
|
| 7680 |
-
"mean_token_accuracy": 0.9851528853178024,
|
| 7681 |
-
"num_tokens": 6753040.0,
|
| 7682 |
-
"step": 765
|
| 7683 |
-
},
|
| 7684 |
-
{
|
| 7685 |
-
"entropy": 1.7129736840724945,
|
| 7686 |
-
"epoch": 2.767420814479638,
|
| 7687 |
-
"grad_norm": 0.49026060104370117,
|
| 7688 |
-
"learning_rate": 0.0003216177252509831,
|
| 7689 |
-
"loss": 0.0672,
|
| 7690 |
-
"mean_token_accuracy": 0.9857761710882187,
|
| 7691 |
-
"num_tokens": 6762014.0,
|
| 7692 |
-
"step": 766
|
| 7693 |
-
},
|
| 7694 |
-
{
|
| 7695 |
-
"entropy": 1.7600707411766052,
|
| 7696 |
-
"epoch": 2.7710407239819004,
|
| 7697 |
-
"grad_norm": 0.5250128507614136,
|
| 7698 |
-
"learning_rate": 0.00032117653294417523,
|
| 7699 |
-
"loss": 0.1134,
|
| 7700 |
-
"mean_token_accuracy": 0.9638848602771759,
|
| 7701 |
-
"num_tokens": 6771012.0,
|
| 7702 |
-
"step": 767
|
| 7703 |
-
},
|
| 7704 |
-
{
|
| 7705 |
-
"entropy": 1.768298476934433,
|
| 7706 |
-
"epoch": 2.774660633484163,
|
| 7707 |
-
"grad_norm": 0.5671310424804688,
|
| 7708 |
-
"learning_rate": 0.00032073518823168143,
|
| 7709 |
-
"loss": 0.057,
|
| 7710 |
-
"mean_token_accuracy": 0.9840837568044662,
|
| 7711 |
-
"num_tokens": 6779601.0,
|
| 7712 |
-
"step": 768
|
| 7713 |
-
},
|
| 7714 |
-
{
|
| 7715 |
-
"entropy": 1.7464122474193573,
|
| 7716 |
-
"epoch": 2.7782805429864252,
|
| 7717 |
-
"grad_norm": 0.6007266044616699,
|
| 7718 |
-
"learning_rate": 0.0003202936927897852,
|
| 7719 |
-
"loss": 0.081,
|
| 7720 |
-
"mean_token_accuracy": 0.9773043692111969,
|
| 7721 |
-
"num_tokens": 6788518.0,
|
| 7722 |
-
"step": 769
|
| 7723 |
-
},
|
| 7724 |
-
{
|
| 7725 |
-
"entropy": 1.6484523713588715,
|
| 7726 |
-
"epoch": 2.7819004524886877,
|
| 7727 |
-
"grad_norm": 0.5163906812667847,
|
| 7728 |
-
"learning_rate": 0.00031985204829534236,
|
| 7729 |
-
"loss": 0.1215,
|
| 7730 |
-
"mean_token_accuracy": 0.9645300209522247,
|
| 7731 |
-
"num_tokens": 6797924.0,
|
| 7732 |
-
"step": 770
|
| 7733 |
-
},
|
| 7734 |
-
{
|
| 7735 |
-
"entropy": 1.7306124567985535,
|
| 7736 |
-
"epoch": 2.78552036199095,
|
| 7737 |
-
"grad_norm": 0.5778948068618774,
|
| 7738 |
-
"learning_rate": 0.00031941025642577515,
|
| 7739 |
-
"loss": 0.127,
|
| 7740 |
-
"mean_token_accuracy": 0.9713134616613388,
|
| 7741 |
-
"num_tokens": 6806828.0,
|
| 7742 |
-
"step": 771
|
| 7743 |
-
},
|
| 7744 |
-
{
|
| 7745 |
-
"entropy": 1.6599189043045044,
|
| 7746 |
-
"epoch": 2.7891402714932125,
|
| 7747 |
-
"grad_norm": 0.5121646523475647,
|
| 7748 |
-
"learning_rate": 0.0003189683188590653,
|
| 7749 |
-
"loss": 0.1066,
|
| 7750 |
-
"mean_token_accuracy": 0.9707446396350861,
|
| 7751 |
-
"num_tokens": 6816144.0,
|
| 7752 |
-
"step": 772
|
| 7753 |
-
},
|
| 7754 |
-
{
|
| 7755 |
-
"entropy": 1.71377295255661,
|
| 7756 |
-
"epoch": 2.792760180995475,
|
| 7757 |
-
"grad_norm": 0.9535031318664551,
|
| 7758 |
-
"learning_rate": 0.00031852623727374787,
|
| 7759 |
-
"loss": 0.2316,
|
| 7760 |
-
"mean_token_accuracy": 0.9587533473968506,
|
| 7761 |
-
"num_tokens": 6824849.0,
|
| 7762 |
-
"step": 773
|
| 7763 |
-
},
|
| 7764 |
-
{
|
| 7765 |
-
"entropy": 1.7716725766658783,
|
| 7766 |
-
"epoch": 2.7963800904977374,
|
| 7767 |
-
"grad_norm": 0.5735589265823364,
|
| 7768 |
-
"learning_rate": 0.00031808401334890537,
|
| 7769 |
-
"loss": 0.1028,
|
| 7770 |
-
"mean_token_accuracy": 0.9716143608093262,
|
| 7771 |
-
"num_tokens": 6833331.0,
|
| 7772 |
-
"step": 774
|
| 7773 |
-
},
|
| 7774 |
-
{
|
| 7775 |
-
"entropy": 1.7134707272052765,
|
| 7776 |
-
"epoch": 2.8,
|
| 7777 |
-
"grad_norm": 0.7087857127189636,
|
| 7778 |
-
"learning_rate": 0.00031764164876416036,
|
| 7779 |
-
"loss": 0.1201,
|
| 7780 |
-
"mean_token_accuracy": 0.9686445444822311,
|
| 7781 |
-
"num_tokens": 6842254.0,
|
| 7782 |
-
"step": 775
|
| 7783 |
-
},
|
| 7784 |
-
{
|
| 7785 |
-
"entropy": 1.6055873930454254,
|
| 7786 |
-
"epoch": 2.8036199095022623,
|
| 7787 |
-
"grad_norm": 0.4578965902328491,
|
| 7788 |
-
"learning_rate": 0.00031719914519967,
|
| 7789 |
-
"loss": 0.0827,
|
| 7790 |
-
"mean_token_accuracy": 0.972065269947052,
|
| 7791 |
-
"num_tokens": 6851644.0,
|
| 7792 |
-
"step": 776
|
| 7793 |
-
},
|
| 7794 |
-
{
|
| 7795 |
-
"entropy": 1.6444376707077026,
|
| 7796 |
-
"epoch": 2.8072398190045247,
|
| 7797 |
-
"grad_norm": 0.5656917095184326,
|
| 7798 |
-
"learning_rate": 0.0003167565043361194,
|
| 7799 |
-
"loss": 0.1036,
|
| 7800 |
-
"mean_token_accuracy": 0.9723617881536484,
|
| 7801 |
-
"num_tokens": 6860787.0,
|
| 7802 |
-
"step": 777
|
| 7803 |
-
},
|
| 7804 |
-
{
|
| 7805 |
-
"entropy": 1.6980305314064026,
|
| 7806 |
-
"epoch": 2.810859728506787,
|
| 7807 |
-
"grad_norm": 0.7013098001480103,
|
| 7808 |
-
"learning_rate": 0.0003163137278547146,
|
| 7809 |
-
"loss": 0.0838,
|
| 7810 |
-
"mean_token_accuracy": 0.9793482422828674,
|
| 7811 |
-
"num_tokens": 6869378.0,
|
| 7812 |
-
"step": 778
|
| 7813 |
-
},
|
| 7814 |
-
{
|
| 7815 |
-
"entropy": 1.6744478940963745,
|
| 7816 |
-
"epoch": 2.8144796380090495,
|
| 7817 |
-
"grad_norm": 0.6889812350273132,
|
| 7818 |
-
"learning_rate": 0.00031587081743717735,
|
| 7819 |
-
"loss": 0.0964,
|
| 7820 |
-
"mean_token_accuracy": 0.9762091189622879,
|
| 7821 |
-
"num_tokens": 6878050.0,
|
| 7822 |
-
"step": 779
|
| 7823 |
-
},
|
| 7824 |
-
{
|
| 7825 |
-
"entropy": 1.6397214829921722,
|
| 7826 |
-
"epoch": 2.818099547511312,
|
| 7827 |
-
"grad_norm": 0.7166011333465576,
|
| 7828 |
-
"learning_rate": 0.00031542777476573785,
|
| 7829 |
-
"loss": 0.1792,
|
| 7830 |
-
"mean_token_accuracy": 0.9539972990751266,
|
| 7831 |
-
"num_tokens": 6887153.0,
|
| 7832 |
-
"step": 780
|
| 7833 |
-
},
|
| 7834 |
-
{
|
| 7835 |
-
"entropy": 1.6447750926017761,
|
| 7836 |
-
"epoch": 2.8217194570135744,
|
| 7837 |
-
"grad_norm": 0.7113035321235657,
|
| 7838 |
-
"learning_rate": 0.0003149846015231286,
|
| 7839 |
-
"loss": 0.1464,
|
| 7840 |
-
"mean_token_accuracy": 0.96909099817276,
|
| 7841 |
-
"num_tokens": 6895877.0,
|
| 7842 |
-
"step": 781
|
| 7843 |
-
},
|
| 7844 |
-
{
|
| 7845 |
-
"entropy": 1.6827795505523682,
|
| 7846 |
-
"epoch": 2.825339366515837,
|
| 7847 |
-
"grad_norm": 0.6915350556373596,
|
| 7848 |
-
"learning_rate": 0.0003145412993925781,
|
| 7849 |
-
"loss": 0.1335,
|
| 7850 |
-
"mean_token_accuracy": 0.9615183472633362,
|
| 7851 |
-
"num_tokens": 6904553.0,
|
| 7852 |
-
"step": 782
|
| 7853 |
-
},
|
| 7854 |
-
{
|
| 7855 |
-
"entropy": 1.6189779937267303,
|
| 7856 |
-
"epoch": 2.8289592760180997,
|
| 7857 |
-
"grad_norm": 0.467428982257843,
|
| 7858 |
-
"learning_rate": 0.00031409787005780423,
|
| 7859 |
-
"loss": 0.0829,
|
| 7860 |
-
"mean_token_accuracy": 0.9781016558408737,
|
| 7861 |
-
"num_tokens": 6913634.0,
|
| 7862 |
-
"step": 783
|
| 7863 |
-
},
|
| 7864 |
-
{
|
| 7865 |
-
"entropy": 1.6323690116405487,
|
| 7866 |
-
"epoch": 2.832579185520362,
|
| 7867 |
-
"grad_norm": 0.49170154333114624,
|
| 7868 |
-
"learning_rate": 0.00031365431520300813,
|
| 7869 |
-
"loss": 0.0828,
|
| 7870 |
-
"mean_token_accuracy": 0.9719655811786652,
|
| 7871 |
-
"num_tokens": 6922638.0,
|
| 7872 |
-
"step": 784
|
| 7873 |
-
},
|
| 7874 |
-
{
|
| 7875 |
-
"entropy": 1.6121336817741394,
|
| 7876 |
-
"epoch": 2.8361990950226246,
|
| 7877 |
-
"grad_norm": 0.5629302263259888,
|
| 7878 |
-
"learning_rate": 0.00031321063651286777,
|
| 7879 |
-
"loss": 0.0757,
|
| 7880 |
-
"mean_token_accuracy": 0.9791934490203857,
|
| 7881 |
-
"num_tokens": 6931590.0,
|
| 7882 |
-
"step": 785
|
| 7883 |
-
},
|
| 7884 |
-
{
|
| 7885 |
-
"entropy": 1.7345627546310425,
|
| 7886 |
-
"epoch": 2.839819004524887,
|
| 7887 |
-
"grad_norm": 0.5514137148857117,
|
| 7888 |
-
"learning_rate": 0.0003127668356725313,
|
| 7889 |
-
"loss": 0.0819,
|
| 7890 |
-
"mean_token_accuracy": 0.9800210148096085,
|
| 7891 |
-
"num_tokens": 6940137.0,
|
| 7892 |
-
"step": 786
|
| 7893 |
-
},
|
| 7894 |
-
{
|
| 7895 |
-
"entropy": 1.6671563386917114,
|
| 7896 |
-
"epoch": 2.8434389140271494,
|
| 7897 |
-
"grad_norm": 0.5090643167495728,
|
| 7898 |
-
"learning_rate": 0.0003123229143676109,
|
| 7899 |
-
"loss": 0.0794,
|
| 7900 |
-
"mean_token_accuracy": 0.9826332330703735,
|
| 7901 |
-
"num_tokens": 6948616.0,
|
| 7902 |
-
"step": 787
|
| 7903 |
-
},
|
| 7904 |
-
{
|
| 7905 |
-
"entropy": 1.551501840353012,
|
| 7906 |
-
"epoch": 2.847058823529412,
|
| 7907 |
-
"grad_norm": 0.3994922935962677,
|
| 7908 |
-
"learning_rate": 0.0003118788742841761,
|
| 7909 |
-
"loss": 0.0491,
|
| 7910 |
-
"mean_token_accuracy": 0.9865831136703491,
|
| 7911 |
-
"num_tokens": 6957369.0,
|
| 7912 |
-
"step": 788
|
| 7913 |
-
},
|
| 7914 |
-
{
|
| 7915 |
-
"entropy": 1.500845193862915,
|
| 7916 |
-
"epoch": 2.8506787330316743,
|
| 7917 |
-
"grad_norm": 0.6023295521736145,
|
| 7918 |
-
"learning_rate": 0.00031143471710874795,
|
| 7919 |
-
"loss": 0.114,
|
| 7920 |
-
"mean_token_accuracy": 0.9669302552938461,
|
| 7921 |
-
"num_tokens": 6966667.0,
|
| 7922 |
-
"step": 789
|
| 7923 |
-
},
|
| 7924 |
-
{
|
| 7925 |
-
"entropy": 1.5258118510246277,
|
| 7926 |
-
"epoch": 2.8542986425339367,
|
| 7927 |
-
"grad_norm": 0.5326524972915649,
|
| 7928 |
-
"learning_rate": 0.00031099044452829186,
|
| 7929 |
-
"loss": 0.0657,
|
| 7930 |
-
"mean_token_accuracy": 0.9833361059427261,
|
| 7931 |
-
"num_tokens": 6975880.0,
|
| 7932 |
-
"step": 790
|
| 7933 |
-
},
|
| 7934 |
-
{
|
| 7935 |
-
"entropy": 1.5674570798873901,
|
| 7936 |
-
"epoch": 2.857918552036199,
|
| 7937 |
-
"grad_norm": 0.4518730044364929,
|
| 7938 |
-
"learning_rate": 0.00031054605823021186,
|
| 7939 |
-
"loss": 0.0569,
|
| 7940 |
-
"mean_token_accuracy": 0.9832890778779984,
|
| 7941 |
-
"num_tokens": 6984824.0,
|
| 7942 |
-
"step": 791
|
| 7943 |
-
},
|
| 7944 |
-
{
|
| 7945 |
-
"entropy": 1.5301121771335602,
|
| 7946 |
-
"epoch": 2.8615384615384616,
|
| 7947 |
-
"grad_norm": 0.5933698415756226,
|
| 7948 |
-
"learning_rate": 0.00031010155990234364,
|
| 7949 |
-
"loss": 0.1129,
|
| 7950 |
-
"mean_token_accuracy": 0.9684284627437592,
|
| 7951 |
-
"num_tokens": 6994076.0,
|
| 7952 |
-
"step": 792
|
| 7953 |
-
},
|
| 7954 |
-
{
|
| 7955 |
-
"entropy": 1.5711756348609924,
|
| 7956 |
-
"epoch": 2.865158371040724,
|
| 7957 |
-
"grad_norm": 0.6634730696678162,
|
| 7958 |
-
"learning_rate": 0.00030965695123294837,
|
| 7959 |
-
"loss": 0.1204,
|
| 7960 |
-
"mean_token_accuracy": 0.972825437784195,
|
| 7961 |
-
"num_tokens": 7003048.0,
|
| 7962 |
-
"step": 793
|
| 7963 |
-
},
|
| 7964 |
-
{
|
| 7965 |
-
"entropy": 1.6537431180477142,
|
| 7966 |
-
"epoch": 2.8687782805429864,
|
| 7967 |
-
"grad_norm": 0.5688450336456299,
|
| 7968 |
-
"learning_rate": 0.0003092122339107067,
|
| 7969 |
-
"loss": 0.0659,
|
| 7970 |
-
"mean_token_accuracy": 0.9861912727355957,
|
| 7971 |
-
"num_tokens": 7011743.0,
|
| 7972 |
-
"step": 794
|
| 7973 |
-
},
|
| 7974 |
-
{
|
| 7975 |
-
"entropy": 1.731940358877182,
|
| 7976 |
-
"epoch": 2.872398190045249,
|
| 7977 |
-
"grad_norm": 0.9030163288116455,
|
| 7978 |
-
"learning_rate": 0.0003087674096247115,
|
| 7979 |
-
"loss": 0.0829,
|
| 7980 |
-
"mean_token_accuracy": 0.9802074134349823,
|
| 7981 |
-
"num_tokens": 7020003.0,
|
| 7982 |
-
"step": 795
|
| 7983 |
-
},
|
| 7984 |
-
{
|
| 7985 |
-
"entropy": 1.6672345995903015,
|
| 7986 |
-
"epoch": 2.8760180995475113,
|
| 7987 |
-
"grad_norm": 0.5129911303520203,
|
| 7988 |
-
"learning_rate": 0.00030832248006446223,
|
| 7989 |
-
"loss": 0.0823,
|
| 7990 |
-
"mean_token_accuracy": 0.9805259853601456,
|
| 7991 |
-
"num_tokens": 7029275.0,
|
| 7992 |
-
"step": 796
|
| 7993 |
-
},
|
| 7994 |
-
{
|
| 7995 |
-
"entropy": 1.7102139592170715,
|
| 7996 |
-
"epoch": 2.8796380090497737,
|
| 7997 |
-
"grad_norm": 0.6210790872573853,
|
| 7998 |
-
"learning_rate": 0.00030787744691985797,
|
| 7999 |
-
"loss": 0.1248,
|
| 8000 |
-
"mean_token_accuracy": 0.9665560126304626,
|
| 8001 |
-
"num_tokens": 7038068.0,
|
| 8002 |
-
"step": 797
|
| 8003 |
-
},
|
| 8004 |
-
{
|
| 8005 |
-
"entropy": 1.659182459115982,
|
| 8006 |
-
"epoch": 2.883257918552036,
|
| 8007 |
-
"grad_norm": 0.6379976868629456,
|
| 8008 |
-
"learning_rate": 0.0003074323118811913,
|
| 8009 |
-
"loss": 0.1065,
|
| 8010 |
-
"mean_token_accuracy": 0.9647062122821808,
|
| 8011 |
-
"num_tokens": 7047039.0,
|
| 8012 |
-
"step": 798
|
| 8013 |
-
},
|
| 8014 |
-
{
|
| 8015 |
-
"entropy": 1.6344517767429352,
|
| 8016 |
-
"epoch": 2.8868778280542986,
|
| 8017 |
-
"grad_norm": 0.5851842761039734,
|
| 8018 |
-
"learning_rate": 0.00030698707663914186,
|
| 8019 |
-
"loss": 0.1046,
|
| 8020 |
-
"mean_token_accuracy": 0.9666399955749512,
|
| 8021 |
-
"num_tokens": 7056105.0,
|
| 8022 |
-
"step": 799
|
| 8023 |
-
},
|
| 8024 |
-
{
|
| 8025 |
-
"entropy": 1.6803805828094482,
|
| 8026 |
-
"epoch": 2.890497737556561,
|
| 8027 |
-
"grad_norm": 0.5926725268363953,
|
| 8028 |
-
"learning_rate": 0.00030654174288477,
|
| 8029 |
-
"loss": 0.1019,
|
| 8030 |
-
"mean_token_accuracy": 0.9712099581956863,
|
| 8031 |
-
"num_tokens": 7064710.0,
|
| 8032 |
-
"step": 800
|
| 8033 |
-
},
|
| 8034 |
-
{
|
| 8035 |
-
"entropy": 1.7004003822803497,
|
| 8036 |
-
"epoch": 2.8941176470588235,
|
| 8037 |
-
"grad_norm": 0.6103729605674744,
|
| 8038 |
-
"learning_rate": 0.0003060963123095098,
|
| 8039 |
-
"loss": 0.091,
|
| 8040 |
-
"mean_token_accuracy": 0.9780148714780807,
|
| 8041 |
-
"num_tokens": 7073218.0,
|
| 8042 |
-
"step": 801
|
| 8043 |
-
},
|
| 8044 |
-
{
|
| 8045 |
-
"entropy": 1.8133964240550995,
|
| 8046 |
-
"epoch": 2.897737556561086,
|
| 8047 |
-
"grad_norm": 0.872008740901947,
|
| 8048 |
-
"learning_rate": 0.0003056507866051636,
|
| 8049 |
-
"loss": 0.3003,
|
| 8050 |
-
"mean_token_accuracy": 0.9385994374752045,
|
| 8051 |
-
"num_tokens": 7081791.0,
|
| 8052 |
-
"step": 802
|
| 8053 |
-
},
|
| 8054 |
-
{
|
| 8055 |
-
"entropy": 1.7527997195720673,
|
| 8056 |
-
"epoch": 2.9013574660633483,
|
| 8057 |
-
"grad_norm": 0.553669810295105,
|
| 8058 |
-
"learning_rate": 0.0003052051674638945,
|
| 8059 |
-
"loss": 0.0999,
|
| 8060 |
-
"mean_token_accuracy": 0.9695112109184265,
|
| 8061 |
-
"num_tokens": 7090196.0,
|
| 8062 |
-
"step": 803
|
| 8063 |
-
},
|
| 8064 |
-
{
|
| 8065 |
-
"entropy": 1.6374657154083252,
|
| 8066 |
-
"epoch": 2.9049773755656108,
|
| 8067 |
-
"grad_norm": 0.4158615469932556,
|
| 8068 |
-
"learning_rate": 0.00030475945657822107,
|
| 8069 |
-
"loss": 0.0682,
|
| 8070 |
-
"mean_token_accuracy": 0.9802833646535873,
|
| 8071 |
-
"num_tokens": 7099216.0,
|
| 8072 |
-
"step": 804
|
| 8073 |
-
},
|
| 8074 |
-
{
|
| 8075 |
-
"entropy": 1.6056133210659027,
|
| 8076 |
-
"epoch": 2.908597285067873,
|
| 8077 |
-
"grad_norm": 0.47468429803848267,
|
| 8078 |
-
"learning_rate": 0.00030431365564101003,
|
| 8079 |
-
"loss": 0.1188,
|
| 8080 |
-
"mean_token_accuracy": 0.9720293581485748,
|
| 8081 |
-
"num_tokens": 7108787.0,
|
| 8082 |
-
"step": 805
|
| 8083 |
-
},
|
| 8084 |
-
{
|
| 8085 |
-
"entropy": 1.7184821665287018,
|
| 8086 |
-
"epoch": 2.9122171945701356,
|
| 8087 |
-
"grad_norm": 0.6617569923400879,
|
| 8088 |
-
"learning_rate": 0.00030386776634547003,
|
| 8089 |
-
"loss": 0.1121,
|
| 8090 |
-
"mean_token_accuracy": 0.9623472690582275,
|
| 8091 |
-
"num_tokens": 7117158.0,
|
| 8092 |
-
"step": 806
|
| 8093 |
-
},
|
| 8094 |
-
{
|
| 8095 |
-
"entropy": 1.7546651065349579,
|
| 8096 |
-
"epoch": 2.915837104072398,
|
| 8097 |
-
"grad_norm": 0.5058173537254333,
|
| 8098 |
-
"learning_rate": 0.0003034217903851454,
|
| 8099 |
-
"loss": 0.0861,
|
| 8100 |
-
"mean_token_accuracy": 0.9664297550916672,
|
| 8101 |
-
"num_tokens": 7125800.0,
|
| 8102 |
-
"step": 807
|
| 8103 |
-
},
|
| 8104 |
-
{
|
| 8105 |
-
"entropy": 1.6985557675361633,
|
| 8106 |
-
"epoch": 2.9194570135746605,
|
| 8107 |
-
"grad_norm": 0.5197705626487732,
|
| 8108 |
-
"learning_rate": 0.00030297572945390996,
|
| 8109 |
-
"loss": 0.1009,
|
| 8110 |
-
"mean_token_accuracy": 0.9677706956863403,
|
| 8111 |
-
"num_tokens": 7134221.0,
|
| 8112 |
-
"step": 808
|
| 8113 |
-
},
|
| 8114 |
-
{
|
| 8115 |
-
"entropy": 1.6737182438373566,
|
| 8116 |
-
"epoch": 2.9230769230769234,
|
| 8117 |
-
"grad_norm": 0.4528989791870117,
|
| 8118 |
-
"learning_rate": 0.00030252958524595966,
|
| 8119 |
-
"loss": 0.0656,
|
| 8120 |
-
"mean_token_accuracy": 0.9853187948465347,
|
| 8121 |
-
"num_tokens": 7142716.0,
|
| 8122 |
-
"step": 809
|
| 8123 |
-
},
|
| 8124 |
-
{
|
| 8125 |
-
"entropy": 1.687746375799179,
|
| 8126 |
-
"epoch": 2.926696832579186,
|
| 8127 |
-
"grad_norm": 0.8552060723304749,
|
| 8128 |
-
"learning_rate": 0.00030208335945580716,
|
| 8129 |
-
"loss": 0.1584,
|
| 8130 |
-
"mean_token_accuracy": 0.958037719130516,
|
| 8131 |
-
"num_tokens": 7151288.0,
|
| 8132 |
-
"step": 810
|
| 8133 |
-
},
|
| 8134 |
-
{
|
| 8135 |
-
"entropy": 1.6994356215000153,
|
| 8136 |
-
"epoch": 2.930316742081448,
|
| 8137 |
-
"grad_norm": 0.470833957195282,
|
| 8138 |
-
"learning_rate": 0.00030163705377827496,
|
| 8139 |
-
"loss": 0.0537,
|
| 8140 |
-
"mean_token_accuracy": 0.9804185479879379,
|
| 8141 |
-
"num_tokens": 7159738.0,
|
| 8142 |
-
"step": 811
|
| 8143 |
-
},
|
| 8144 |
-
{
|
| 8145 |
-
"entropy": 1.7072536945343018,
|
| 8146 |
-
"epoch": 2.9339366515837106,
|
| 8147 |
-
"grad_norm": 0.5749104022979736,
|
| 8148 |
-
"learning_rate": 0.0003011906699084888,
|
| 8149 |
-
"loss": 0.0502,
|
| 8150 |
-
"mean_token_accuracy": 0.9830235093832016,
|
| 8151 |
-
"num_tokens": 7168101.0,
|
| 8152 |
-
"step": 812
|
| 8153 |
-
},
|
| 8154 |
-
{
|
| 8155 |
-
"entropy": 1.70310440659523,
|
| 8156 |
-
"epoch": 2.937556561085973,
|
| 8157 |
-
"grad_norm": 0.7587386965751648,
|
| 8158 |
-
"learning_rate": 0.0003007442095418715,
|
| 8159 |
-
"loss": 0.1362,
|
| 8160 |
-
"mean_token_accuracy": 0.9594880938529968,
|
| 8161 |
-
"num_tokens": 7176663.0,
|
| 8162 |
-
"step": 813
|
| 8163 |
-
},
|
| 8164 |
-
{
|
| 8165 |
-
"entropy": 1.6307457983493805,
|
| 8166 |
-
"epoch": 2.9411764705882355,
|
| 8167 |
-
"grad_norm": 0.5054190754890442,
|
| 8168 |
-
"learning_rate": 0.00030029767437413665,
|
| 8169 |
-
"loss": 0.0744,
|
| 8170 |
-
"mean_token_accuracy": 0.9738886505365372,
|
| 8171 |
-
"num_tokens": 7185376.0,
|
| 8172 |
-
"step": 814
|
| 8173 |
-
},
|
| 8174 |
-
{
|
| 8175 |
-
"entropy": 1.5872860848903656,
|
| 8176 |
-
"epoch": 2.944796380090498,
|
| 8177 |
-
"grad_norm": 0.5463546514511108,
|
| 8178 |
-
"learning_rate": 0.00029985106610128147,
|
| 8179 |
-
"loss": 0.0916,
|
| 8180 |
-
"mean_token_accuracy": 0.9782509952783585,
|
| 8181 |
-
"num_tokens": 7194304.0,
|
| 8182 |
-
"step": 815
|
| 8183 |
-
},
|
| 8184 |
-
{
|
| 8185 |
-
"entropy": 1.6643644273281097,
|
| 8186 |
-
"epoch": 2.9484162895927604,
|
| 8187 |
-
"grad_norm": 0.5434613823890686,
|
| 8188 |
-
"learning_rate": 0.0002994043864195811,
|
| 8189 |
-
"loss": 0.1007,
|
| 8190 |
-
"mean_token_accuracy": 0.9665197134017944,
|
| 8191 |
-
"num_tokens": 7202895.0,
|
| 8192 |
-
"step": 816
|
| 8193 |
-
},
|
| 8194 |
-
{
|
| 8195 |
-
"entropy": 1.701482743024826,
|
| 8196 |
-
"epoch": 2.952036199095023,
|
| 8197 |
-
"grad_norm": 1.2643967866897583,
|
| 8198 |
-
"learning_rate": 0.00029895763702558206,
|
| 8199 |
-
"loss": 0.1377,
|
| 8200 |
-
"mean_token_accuracy": 0.9696027487516403,
|
| 8201 |
-
"num_tokens": 7211000.0,
|
| 8202 |
-
"step": 817
|
| 8203 |
-
},
|
| 8204 |
-
{
|
| 8205 |
-
"entropy": 1.688760131597519,
|
| 8206 |
-
"epoch": 2.9556561085972852,
|
| 8207 |
-
"grad_norm": 0.5438109636306763,
|
| 8208 |
-
"learning_rate": 0.00029851081961609536,
|
| 8209 |
-
"loss": 0.0637,
|
| 8210 |
-
"mean_token_accuracy": 0.9724639654159546,
|
| 8211 |
-
"num_tokens": 7219274.0,
|
| 8212 |
-
"step": 818
|
| 8213 |
-
},
|
| 8214 |
-
{
|
| 8215 |
-
"entropy": 1.6547857522964478,
|
| 8216 |
-
"epoch": 2.9592760180995477,
|
| 8217 |
-
"grad_norm": 0.4520387649536133,
|
| 8218 |
-
"learning_rate": 0.0002980639358881906,
|
| 8219 |
-
"loss": 0.0376,
|
| 8220 |
-
"mean_token_accuracy": 0.9887004494667053,
|
| 8221 |
-
"num_tokens": 7228000.0,
|
| 8222 |
-
"step": 819
|
| 8223 |
-
},
|
| 8224 |
-
{
|
| 8225 |
-
"entropy": 1.5814381837844849,
|
| 8226 |
-
"epoch": 2.96289592760181,
|
| 8227 |
-
"grad_norm": 0.49122339487075806,
|
| 8228 |
-
"learning_rate": 0.00029761698753918894,
|
| 8229 |
-
"loss": 0.0533,
|
| 8230 |
-
"mean_token_accuracy": 0.983299508690834,
|
| 8231 |
-
"num_tokens": 7236798.0,
|
| 8232 |
-
"step": 820
|
| 8233 |
-
},
|
| 8234 |
-
{
|
| 8235 |
-
"entropy": 1.5796774625778198,
|
| 8236 |
-
"epoch": 2.9665158371040725,
|
| 8237 |
-
"grad_norm": 0.43303897976875305,
|
| 8238 |
-
"learning_rate": 0.00029716997626665726,
|
| 8239 |
-
"loss": 0.0517,
|
| 8240 |
-
"mean_token_accuracy": 0.984140008687973,
|
| 8241 |
-
"num_tokens": 7245570.0,
|
| 8242 |
-
"step": 821
|
| 8243 |
-
},
|
| 8244 |
-
{
|
| 8245 |
-
"entropy": 1.5434466302394867,
|
| 8246 |
-
"epoch": 2.970135746606335,
|
| 8247 |
-
"grad_norm": 0.5712567567825317,
|
| 8248 |
-
"learning_rate": 0.0002967229037684014,
|
| 8249 |
-
"loss": 0.0634,
|
| 8250 |
-
"mean_token_accuracy": 0.9851510971784592,
|
| 8251 |
-
"num_tokens": 7254482.0,
|
| 8252 |
-
"step": 822
|
| 8253 |
-
},
|
| 8254 |
-
{
|
| 8255 |
-
"entropy": 1.5368549823760986,
|
| 8256 |
-
"epoch": 2.9737556561085974,
|
| 8257 |
-
"grad_norm": 0.5042312741279602,
|
| 8258 |
-
"learning_rate": 0.0002962757717424595,
|
| 8259 |
-
"loss": 0.1041,
|
| 8260 |
-
"mean_token_accuracy": 0.9698852747678757,
|
| 8261 |
-
"num_tokens": 7263428.0,
|
| 8262 |
-
"step": 823
|
| 8263 |
-
},
|
| 8264 |
-
{
|
| 8265 |
-
"entropy": 1.5740615129470825,
|
| 8266 |
-
"epoch": 2.97737556561086,
|
| 8267 |
-
"grad_norm": 0.8506835699081421,
|
| 8268 |
-
"learning_rate": 0.0002958285818870963,
|
| 8269 |
-
"loss": 0.0653,
|
| 8270 |
-
"mean_token_accuracy": 0.9827365875244141,
|
| 8271 |
-
"num_tokens": 7272425.0,
|
| 8272 |
-
"step": 824
|
| 8273 |
-
},
|
| 8274 |
-
{
|
| 8275 |
-
"entropy": 1.625010073184967,
|
| 8276 |
-
"epoch": 2.9809954751131222,
|
| 8277 |
-
"grad_norm": 0.6260822415351868,
|
| 8278 |
-
"learning_rate": 0.00029538133590079556,
|
| 8279 |
-
"loss": 0.1112,
|
| 8280 |
-
"mean_token_accuracy": 0.9715189933776855,
|
| 8281 |
-
"num_tokens": 7281312.0,
|
| 8282 |
-
"step": 825
|
| 8283 |
-
},
|
| 8284 |
-
{
|
| 8285 |
-
"entropy": 1.6078990697860718,
|
| 8286 |
-
"epoch": 2.9846153846153847,
|
| 8287 |
-
"grad_norm": 0.4316014349460602,
|
| 8288 |
-
"learning_rate": 0.00029493403548225467,
|
| 8289 |
-
"loss": 0.059,
|
| 8290 |
-
"mean_token_accuracy": 0.9821690768003464,
|
| 8291 |
-
"num_tokens": 7289748.0,
|
| 8292 |
-
"step": 826
|
| 8293 |
-
},
|
| 8294 |
-
{
|
| 8295 |
-
"entropy": 1.6132618486881256,
|
| 8296 |
-
"epoch": 2.988235294117647,
|
| 8297 |
-
"grad_norm": 0.6471059322357178,
|
| 8298 |
-
"learning_rate": 0.0002944866823303776,
|
| 8299 |
-
"loss": 0.0839,
|
| 8300 |
-
"mean_token_accuracy": 0.9747331887483597,
|
| 8301 |
-
"num_tokens": 7298453.0,
|
| 8302 |
-
"step": 827
|
| 8303 |
-
},
|
| 8304 |
-
{
|
| 8305 |
-
"entropy": 1.6038751900196075,
|
| 8306 |
-
"epoch": 2.9918552036199095,
|
| 8307 |
-
"grad_norm": 0.5383681654930115,
|
| 8308 |
-
"learning_rate": 0.0002940392781442686,
|
| 8309 |
-
"loss": 0.0728,
|
| 8310 |
-
"mean_token_accuracy": 0.9774085730314255,
|
| 8311 |
-
"num_tokens": 7307116.0,
|
| 8312 |
-
"step": 828
|
| 8313 |
-
},
|
| 8314 |
-
{
|
| 8315 |
-
"entropy": 1.6446776688098907,
|
| 8316 |
-
"epoch": 2.995475113122172,
|
| 8317 |
-
"grad_norm": 0.5420554280281067,
|
| 8318 |
-
"learning_rate": 0.0002935918246232259,
|
| 8319 |
-
"loss": 0.0799,
|
| 8320 |
-
"mean_token_accuracy": 0.977481946349144,
|
| 8321 |
-
"num_tokens": 7315668.0,
|
| 8322 |
-
"step": 829
|
| 8323 |
-
},
|
| 8324 |
-
{
|
| 8325 |
-
"entropy": 1.5571844279766083,
|
| 8326 |
-
"epoch": 2.9990950226244344,
|
| 8327 |
-
"grad_norm": 0.6471306681632996,
|
| 8328 |
-
"learning_rate": 0.00029314432346673485,
|
| 8329 |
-
"loss": 0.1657,
|
| 8330 |
-
"mean_token_accuracy": 0.9566951394081116,
|
| 8331 |
-
"num_tokens": 7324721.0,
|
| 8332 |
-
"step": 830
|
| 8333 |
-
},
|
| 8334 |
-
{
|
| 8335 |
-
"entropy": 2.0783205032348633,
|
| 8336 |
-
"epoch": 3.0,
|
| 8337 |
-
"grad_norm": 3.195817232131958,
|
| 8338 |
-
"learning_rate": 0.000292696776374462,
|
| 8339 |
-
"loss": 0.0742,
|
| 8340 |
-
"mean_token_accuracy": 0.96875,
|
| 8341 |
-
"num_tokens": 7325175.0,
|
| 8342 |
-
"step": 831
|
| 8343 |
-
},
|
| 8344 |
-
{
|
| 8345 |
-
"epoch": 3.0,
|
| 8346 |
-
"eval_entropy": 1.6213929740394033,
|
| 8347 |
-
"eval_loss": 0.14780744910240173,
|
| 8348 |
-
"eval_mean_token_accuracy": 0.9634173047251817,
|
| 8349 |
-
"eval_num_tokens": 7325175.0,
|
| 8350 |
-
"eval_runtime": 116.0041,
|
| 8351 |
-
"eval_samples_per_second": 3.181,
|
| 8352 |
-
"eval_steps_per_second": 1.06,
|
| 8353 |
-
"step": 831
|
| 8354 |
}
|
| 8355 |
],
|
| 8356 |
"logging_steps": 1,
|
|
@@ -8370,7 +5589,7 @@
|
|
| 8370 |
"attributes": {}
|
| 8371 |
}
|
| 8372 |
},
|
| 8373 |
-
"total_flos":
|
| 8374 |
"train_batch_size": 3,
|
| 8375 |
"trial_name": null,
|
| 8376 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 2.0,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 554,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 5570 |
"eval_samples_per_second": 3.177,
|
| 5571 |
"eval_steps_per_second": 1.059,
|
| 5572 |
"step": 554
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5573 |
}
|
| 5574 |
],
|
| 5575 |
"logging_steps": 1,
|
|
|
|
| 5589 |
"attributes": {}
|
| 5590 |
}
|
| 5591 |
},
|
| 5592 |
+
"total_flos": 6.634384518674615e+17,
|
| 5593 |
"train_batch_size": 3,
|
| 5594 |
"trial_name": null,
|
| 5595 |
"trial_params": null
|