Training in progress, step 7000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c76cf6da384756da592c5c50d0169c8d71834422387d445a1df34766e643d9bb
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:68bf75b9ec958e92f211abfe579212c04190172b49bdfbcf136eea9aae980133
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:012c45d165b3369856a4591817420a71a07d3d2cd37f890b655313517015a2fd
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c87a18ccc821b756f8fecf0a1e33873b3617702f02d6f52c0042644b36bee0d
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4669,6 +4669,364 @@
|
|
| 4669 |
"eval_samples_per_second": 273.54,
|
| 4670 |
"eval_steps_per_second": 5.744,
|
| 4671 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4672 |
}
|
| 4673 |
],
|
| 4674 |
"logging_steps": 10,
|
|
@@ -4688,7 +5046,7 @@
|
|
| 4688 |
"attributes": {}
|
| 4689 |
}
|
| 4690 |
},
|
| 4691 |
-
"total_flos": 2.
|
| 4692 |
"train_batch_size": 48,
|
| 4693 |
"trial_name": null,
|
| 4694 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.1826322013853692,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 7000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4669 |
"eval_samples_per_second": 273.54,
|
| 4670 |
"eval_steps_per_second": 5.744,
|
| 4671 |
"step": 6500
|
| 4672 |
+
},
|
| 4673 |
+
{
|
| 4674 |
+
"epoch": 1.0998479472883933,
|
| 4675 |
+
"grad_norm": 0.48126745223999023,
|
| 4676 |
+
"learning_rate": 0.0001695828863325459,
|
| 4677 |
+
"loss": 4.468023300170898,
|
| 4678 |
+
"step": 6510
|
| 4679 |
+
},
|
| 4680 |
+
{
|
| 4681 |
+
"epoch": 1.101537421861801,
|
| 4682 |
+
"grad_norm": 0.480144202709198,
|
| 4683 |
+
"learning_rate": 0.00016910788818067434,
|
| 4684 |
+
"loss": 4.435828399658203,
|
| 4685 |
+
"step": 6520
|
| 4686 |
+
},
|
| 4687 |
+
{
|
| 4688 |
+
"epoch": 1.1032268964352085,
|
| 4689 |
+
"grad_norm": 0.4941800832748413,
|
| 4690 |
+
"learning_rate": 0.0001686326951796907,
|
| 4691 |
+
"loss": 4.450290679931641,
|
| 4692 |
+
"step": 6530
|
| 4693 |
+
},
|
| 4694 |
+
{
|
| 4695 |
+
"epoch": 1.1049163710086163,
|
| 4696 |
+
"grad_norm": 0.510553240776062,
|
| 4697 |
+
"learning_rate": 0.00016815731217528667,
|
| 4698 |
+
"loss": 4.447552108764649,
|
| 4699 |
+
"step": 6540
|
| 4700 |
+
},
|
| 4701 |
+
{
|
| 4702 |
+
"epoch": 1.106605845582024,
|
| 4703 |
+
"grad_norm": 0.5066401958465576,
|
| 4704 |
+
"learning_rate": 0.00016768174401509143,
|
| 4705 |
+
"loss": 4.465335083007813,
|
| 4706 |
+
"step": 6550
|
| 4707 |
+
},
|
| 4708 |
+
{
|
| 4709 |
+
"epoch": 1.1082953201554318,
|
| 4710 |
+
"grad_norm": 0.5177704095840454,
|
| 4711 |
+
"learning_rate": 0.0001672059955486223,
|
| 4712 |
+
"loss": 4.457915496826172,
|
| 4713 |
+
"step": 6560
|
| 4714 |
+
},
|
| 4715 |
+
{
|
| 4716 |
+
"epoch": 1.1099847947288393,
|
| 4717 |
+
"grad_norm": 0.4799138009548187,
|
| 4718 |
+
"learning_rate": 0.000166730071627235,
|
| 4719 |
+
"loss": 4.464352035522461,
|
| 4720 |
+
"step": 6570
|
| 4721 |
+
},
|
| 4722 |
+
{
|
| 4723 |
+
"epoch": 1.111674269302247,
|
| 4724 |
+
"grad_norm": 0.49141937494277954,
|
| 4725 |
+
"learning_rate": 0.00016625397710407487,
|
| 4726 |
+
"loss": 4.451135635375977,
|
| 4727 |
+
"step": 6580
|
| 4728 |
+
},
|
| 4729 |
+
{
|
| 4730 |
+
"epoch": 1.1133637438756547,
|
| 4731 |
+
"grad_norm": 0.4723527431488037,
|
| 4732 |
+
"learning_rate": 0.00016577771683402647,
|
| 4733 |
+
"loss": 4.462360382080078,
|
| 4734 |
+
"step": 6590
|
| 4735 |
+
},
|
| 4736 |
+
{
|
| 4737 |
+
"epoch": 1.1150532184490622,
|
| 4738 |
+
"grad_norm": 0.5147464275360107,
|
| 4739 |
+
"learning_rate": 0.00016530129567366483,
|
| 4740 |
+
"loss": 4.456496047973633,
|
| 4741 |
+
"step": 6600
|
| 4742 |
+
},
|
| 4743 |
+
{
|
| 4744 |
+
"epoch": 1.11674269302247,
|
| 4745 |
+
"grad_norm": 0.5018206834793091,
|
| 4746 |
+
"learning_rate": 0.0001648247184812054,
|
| 4747 |
+
"loss": 4.427286148071289,
|
| 4748 |
+
"step": 6610
|
| 4749 |
+
},
|
| 4750 |
+
{
|
| 4751 |
+
"epoch": 1.1184321675958777,
|
| 4752 |
+
"grad_norm": 0.5022728443145752,
|
| 4753 |
+
"learning_rate": 0.00016434799011645507,
|
| 4754 |
+
"loss": 4.437714004516602,
|
| 4755 |
+
"step": 6620
|
| 4756 |
+
},
|
| 4757 |
+
{
|
| 4758 |
+
"epoch": 1.1201216421692854,
|
| 4759 |
+
"grad_norm": 0.4947036802768707,
|
| 4760 |
+
"learning_rate": 0.00016387111544076193,
|
| 4761 |
+
"loss": 4.460609436035156,
|
| 4762 |
+
"step": 6630
|
| 4763 |
+
},
|
| 4764 |
+
{
|
| 4765 |
+
"epoch": 1.121811116742693,
|
| 4766 |
+
"grad_norm": 0.49200695753097534,
|
| 4767 |
+
"learning_rate": 0.00016339409931696625,
|
| 4768 |
+
"loss": 4.438530731201172,
|
| 4769 |
+
"step": 6640
|
| 4770 |
+
},
|
| 4771 |
+
{
|
| 4772 |
+
"epoch": 1.1235005913161007,
|
| 4773 |
+
"grad_norm": 0.5190764665603638,
|
| 4774 |
+
"learning_rate": 0.00016291694660935065,
|
| 4775 |
+
"loss": 4.455600357055664,
|
| 4776 |
+
"step": 6650
|
| 4777 |
+
},
|
| 4778 |
+
{
|
| 4779 |
+
"epoch": 1.1251900658895084,
|
| 4780 |
+
"grad_norm": 0.48938512802124023,
|
| 4781 |
+
"learning_rate": 0.00016243966218359047,
|
| 4782 |
+
"loss": 4.428539276123047,
|
| 4783 |
+
"step": 6660
|
| 4784 |
+
},
|
| 4785 |
+
{
|
| 4786 |
+
"epoch": 1.126879540462916,
|
| 4787 |
+
"grad_norm": 0.5315442681312561,
|
| 4788 |
+
"learning_rate": 0.00016196225090670435,
|
| 4789 |
+
"loss": 4.410963821411133,
|
| 4790 |
+
"step": 6670
|
| 4791 |
+
},
|
| 4792 |
+
{
|
| 4793 |
+
"epoch": 1.1285690150363237,
|
| 4794 |
+
"grad_norm": 0.520609438419342,
|
| 4795 |
+
"learning_rate": 0.0001614847176470043,
|
| 4796 |
+
"loss": 4.434111785888672,
|
| 4797 |
+
"step": 6680
|
| 4798 |
+
},
|
| 4799 |
+
{
|
| 4800 |
+
"epoch": 1.1302584896097314,
|
| 4801 |
+
"grad_norm": 0.4818267822265625,
|
| 4802 |
+
"learning_rate": 0.00016100706727404645,
|
| 4803 |
+
"loss": 4.427024459838867,
|
| 4804 |
+
"step": 6690
|
| 4805 |
+
},
|
| 4806 |
+
{
|
| 4807 |
+
"epoch": 1.131947964183139,
|
| 4808 |
+
"grad_norm": 0.5050205588340759,
|
| 4809 |
+
"learning_rate": 0.00016052930465858094,
|
| 4810 |
+
"loss": 4.43774642944336,
|
| 4811 |
+
"step": 6700
|
| 4812 |
+
},
|
| 4813 |
+
{
|
| 4814 |
+
"epoch": 1.1336374387565467,
|
| 4815 |
+
"grad_norm": 0.48701977729797363,
|
| 4816 |
+
"learning_rate": 0.00016005143467250267,
|
| 4817 |
+
"loss": 4.45825309753418,
|
| 4818 |
+
"step": 6710
|
| 4819 |
+
},
|
| 4820 |
+
{
|
| 4821 |
+
"epoch": 1.1353269133299544,
|
| 4822 |
+
"grad_norm": 0.49582117795944214,
|
| 4823 |
+
"learning_rate": 0.00015957346218880124,
|
| 4824 |
+
"loss": 4.454438781738281,
|
| 4825 |
+
"step": 6720
|
| 4826 |
+
},
|
| 4827 |
+
{
|
| 4828 |
+
"epoch": 1.1370163879033621,
|
| 4829 |
+
"grad_norm": 0.5180007219314575,
|
| 4830 |
+
"learning_rate": 0.0001590953920815117,
|
| 4831 |
+
"loss": 4.4425701141357425,
|
| 4832 |
+
"step": 6730
|
| 4833 |
+
},
|
| 4834 |
+
{
|
| 4835 |
+
"epoch": 1.1387058624767696,
|
| 4836 |
+
"grad_norm": 0.4941588342189789,
|
| 4837 |
+
"learning_rate": 0.00015861722922566436,
|
| 4838 |
+
"loss": 4.433919143676758,
|
| 4839 |
+
"step": 6740
|
| 4840 |
+
},
|
| 4841 |
+
{
|
| 4842 |
+
"epoch": 1.1403953370501774,
|
| 4843 |
+
"grad_norm": 0.5498872399330139,
|
| 4844 |
+
"learning_rate": 0.00015813897849723544,
|
| 4845 |
+
"loss": 4.43182373046875,
|
| 4846 |
+
"step": 6750
|
| 4847 |
+
},
|
| 4848 |
+
{
|
| 4849 |
+
"epoch": 1.142084811623585,
|
| 4850 |
+
"grad_norm": 0.4908352792263031,
|
| 4851 |
+
"learning_rate": 0.0001576606447730972,
|
| 4852 |
+
"loss": 4.4357856750488285,
|
| 4853 |
+
"step": 6760
|
| 4854 |
+
},
|
| 4855 |
+
{
|
| 4856 |
+
"epoch": 1.1437742861969928,
|
| 4857 |
+
"grad_norm": 0.5008165240287781,
|
| 4858 |
+
"learning_rate": 0.0001571822329309682,
|
| 4859 |
+
"loss": 4.422341156005859,
|
| 4860 |
+
"step": 6770
|
| 4861 |
+
},
|
| 4862 |
+
{
|
| 4863 |
+
"epoch": 1.1454637607704004,
|
| 4864 |
+
"grad_norm": 0.49346834421157837,
|
| 4865 |
+
"learning_rate": 0.00015670374784936371,
|
| 4866 |
+
"loss": 4.4391838073730465,
|
| 4867 |
+
"step": 6780
|
| 4868 |
+
},
|
| 4869 |
+
{
|
| 4870 |
+
"epoch": 1.147153235343808,
|
| 4871 |
+
"grad_norm": 0.5131083726882935,
|
| 4872 |
+
"learning_rate": 0.00015622519440754566,
|
| 4873 |
+
"loss": 4.423881149291992,
|
| 4874 |
+
"step": 6790
|
| 4875 |
+
},
|
| 4876 |
+
{
|
| 4877 |
+
"epoch": 1.1488427099172158,
|
| 4878 |
+
"grad_norm": 0.5656039714813232,
|
| 4879 |
+
"learning_rate": 0.0001557465774854732,
|
| 4880 |
+
"loss": 4.450177383422852,
|
| 4881 |
+
"step": 6800
|
| 4882 |
+
},
|
| 4883 |
+
{
|
| 4884 |
+
"epoch": 1.1505321844906233,
|
| 4885 |
+
"grad_norm": 0.511678159236908,
|
| 4886 |
+
"learning_rate": 0.0001552679019637528,
|
| 4887 |
+
"loss": 4.4376472473144535,
|
| 4888 |
+
"step": 6810
|
| 4889 |
+
},
|
| 4890 |
+
{
|
| 4891 |
+
"epoch": 1.152221659064031,
|
| 4892 |
+
"grad_norm": 0.4897667467594147,
|
| 4893 |
+
"learning_rate": 0.00015478917272358848,
|
| 4894 |
+
"loss": 4.427242660522461,
|
| 4895 |
+
"step": 6820
|
| 4896 |
+
},
|
| 4897 |
+
{
|
| 4898 |
+
"epoch": 1.1539111336374388,
|
| 4899 |
+
"grad_norm": 0.49397048354148865,
|
| 4900 |
+
"learning_rate": 0.000154310394646732,
|
| 4901 |
+
"loss": 4.464704895019532,
|
| 4902 |
+
"step": 6830
|
| 4903 |
+
},
|
| 4904 |
+
{
|
| 4905 |
+
"epoch": 1.1556006082108463,
|
| 4906 |
+
"grad_norm": 0.5576964616775513,
|
| 4907 |
+
"learning_rate": 0.00015383157261543318,
|
| 4908 |
+
"loss": 4.417141723632812,
|
| 4909 |
+
"step": 6840
|
| 4910 |
+
},
|
| 4911 |
+
{
|
| 4912 |
+
"epoch": 1.157290082784254,
|
| 4913 |
+
"grad_norm": 0.49850115180015564,
|
| 4914 |
+
"learning_rate": 0.00015335271151239,
|
| 4915 |
+
"loss": 4.421836090087891,
|
| 4916 |
+
"step": 6850
|
| 4917 |
+
},
|
| 4918 |
+
{
|
| 4919 |
+
"epoch": 1.1589795573576618,
|
| 4920 |
+
"grad_norm": 0.4906124770641327,
|
| 4921 |
+
"learning_rate": 0.00015287381622069892,
|
| 4922 |
+
"loss": 4.414659881591797,
|
| 4923 |
+
"step": 6860
|
| 4924 |
+
},
|
| 4925 |
+
{
|
| 4926 |
+
"epoch": 1.1606690319310695,
|
| 4927 |
+
"grad_norm": 0.49217015504837036,
|
| 4928 |
+
"learning_rate": 0.00015239489162380504,
|
| 4929 |
+
"loss": 4.422631072998047,
|
| 4930 |
+
"step": 6870
|
| 4931 |
+
},
|
| 4932 |
+
{
|
| 4933 |
+
"epoch": 1.162358506504477,
|
| 4934 |
+
"grad_norm": 0.5112168788909912,
|
| 4935 |
+
"learning_rate": 0.0001519159426054522,
|
| 4936 |
+
"loss": 4.436067581176758,
|
| 4937 |
+
"step": 6880
|
| 4938 |
+
},
|
| 4939 |
+
{
|
| 4940 |
+
"epoch": 1.1640479810778848,
|
| 4941 |
+
"grad_norm": 0.5121352076530457,
|
| 4942 |
+
"learning_rate": 0.0001514369740496334,
|
| 4943 |
+
"loss": 4.4104469299316404,
|
| 4944 |
+
"step": 6890
|
| 4945 |
+
},
|
| 4946 |
+
{
|
| 4947 |
+
"epoch": 1.1657374556512925,
|
| 4948 |
+
"grad_norm": 0.46113350987434387,
|
| 4949 |
+
"learning_rate": 0.00015095799084054073,
|
| 4950 |
+
"loss": 4.437421798706055,
|
| 4951 |
+
"step": 6900
|
| 4952 |
+
},
|
| 4953 |
+
{
|
| 4954 |
+
"epoch": 1.1674269302247002,
|
| 4955 |
+
"grad_norm": 0.48472002148628235,
|
| 4956 |
+
"learning_rate": 0.00015047899786251587,
|
| 4957 |
+
"loss": 4.441893005371094,
|
| 4958 |
+
"step": 6910
|
| 4959 |
+
},
|
| 4960 |
+
{
|
| 4961 |
+
"epoch": 1.1691164047981077,
|
| 4962 |
+
"grad_norm": 0.4898509383201599,
|
| 4963 |
+
"learning_rate": 0.00015,
|
| 4964 |
+
"loss": 4.440791320800781,
|
| 4965 |
+
"step": 6920
|
| 4966 |
+
},
|
| 4967 |
+
{
|
| 4968 |
+
"epoch": 1.1708058793715155,
|
| 4969 |
+
"grad_norm": 0.5069623589515686,
|
| 4970 |
+
"learning_rate": 0.0001495210021374841,
|
| 4971 |
+
"loss": 4.430186080932617,
|
| 4972 |
+
"step": 6930
|
| 4973 |
+
},
|
| 4974 |
+
{
|
| 4975 |
+
"epoch": 1.1724953539449232,
|
| 4976 |
+
"grad_norm": 0.48332998156547546,
|
| 4977 |
+
"learning_rate": 0.00014904200915945927,
|
| 4978 |
+
"loss": 4.4347789764404295,
|
| 4979 |
+
"step": 6940
|
| 4980 |
+
},
|
| 4981 |
+
{
|
| 4982 |
+
"epoch": 1.1741848285183307,
|
| 4983 |
+
"grad_norm": 0.4905153214931488,
|
| 4984 |
+
"learning_rate": 0.00014856302595036663,
|
| 4985 |
+
"loss": 4.427460861206055,
|
| 4986 |
+
"step": 6950
|
| 4987 |
+
},
|
| 4988 |
+
{
|
| 4989 |
+
"epoch": 1.1758743030917385,
|
| 4990 |
+
"grad_norm": 0.49028924107551575,
|
| 4991 |
+
"learning_rate": 0.00014808405739454776,
|
| 4992 |
+
"loss": 4.407707977294922,
|
| 4993 |
+
"step": 6960
|
| 4994 |
+
},
|
| 4995 |
+
{
|
| 4996 |
+
"epoch": 1.1775637776651462,
|
| 4997 |
+
"grad_norm": 0.4613584876060486,
|
| 4998 |
+
"learning_rate": 0.00014760510837619493,
|
| 4999 |
+
"loss": 4.417768096923828,
|
| 5000 |
+
"step": 6970
|
| 5001 |
+
},
|
| 5002 |
+
{
|
| 5003 |
+
"epoch": 1.1792532522385537,
|
| 5004 |
+
"grad_norm": 0.50248122215271,
|
| 5005 |
+
"learning_rate": 0.00014712618377930105,
|
| 5006 |
+
"loss": 4.419640350341797,
|
| 5007 |
+
"step": 6980
|
| 5008 |
+
},
|
| 5009 |
+
{
|
| 5010 |
+
"epoch": 1.1809427268119614,
|
| 5011 |
+
"grad_norm": 0.5108124613761902,
|
| 5012 |
+
"learning_rate": 0.00014664728848760996,
|
| 5013 |
+
"loss": 4.420491790771484,
|
| 5014 |
+
"step": 6990
|
| 5015 |
+
},
|
| 5016 |
+
{
|
| 5017 |
+
"epoch": 1.1826322013853692,
|
| 5018 |
+
"grad_norm": 0.4787265658378601,
|
| 5019 |
+
"learning_rate": 0.00014616842738456682,
|
| 5020 |
+
"loss": 4.394326400756836,
|
| 5021 |
+
"step": 7000
|
| 5022 |
+
},
|
| 5023 |
+
{
|
| 5024 |
+
"epoch": 1.1826322013853692,
|
| 5025 |
+
"eval_loss": 4.396040916442871,
|
| 5026 |
+
"eval_runtime": 3.6438,
|
| 5027 |
+
"eval_samples_per_second": 274.439,
|
| 5028 |
+
"eval_steps_per_second": 5.763,
|
| 5029 |
+
"step": 7000
|
| 5030 |
}
|
| 5031 |
],
|
| 5032 |
"logging_steps": 10,
|
|
|
|
| 5046 |
"attributes": {}
|
| 5047 |
}
|
| 5048 |
},
|
| 5049 |
+
"total_flos": 2.3411768424608563e+17,
|
| 5050 |
"train_batch_size": 48,
|
| 5051 |
"trial_name": null,
|
| 5052 |
"trial_params": null
|