Training in progress, step 7000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34c8f104effe1a88e833bb692c7b75c569bc83b156fc0482dcf0ed735fda2945
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:26c334859cc6eb4b1ef4006976a7f325a89208371148b26da8caf2a6573930ff
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a2f6f65c0c5e2316b09e8cb46abab96e8f2ae754bdffd662e804a33277263cd9
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3c87a18ccc821b756f8fecf0a1e33873b3617702f02d6f52c0042644b36bee0d
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -4669,6 +4669,364 @@
|
|
| 4669 |
"eval_samples_per_second": 248.518,
|
| 4670 |
"eval_steps_per_second": 5.219,
|
| 4671 |
"step": 6500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4672 |
}
|
| 4673 |
],
|
| 4674 |
"logging_steps": 10,
|
|
@@ -4688,7 +5046,7 @@
|
|
| 4688 |
"attributes": {}
|
| 4689 |
}
|
| 4690 |
},
|
| 4691 |
-
"total_flos": 2.
|
| 4692 |
"train_batch_size": 48,
|
| 4693 |
"trial_name": null,
|
| 4694 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.1826322013853692,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 7000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 4669 |
"eval_samples_per_second": 248.518,
|
| 4670 |
"eval_steps_per_second": 5.219,
|
| 4671 |
"step": 6500
|
| 4672 |
+
},
|
| 4673 |
+
{
|
| 4674 |
+
"epoch": 1.0998479472883933,
|
| 4675 |
+
"grad_norm": 0.4943171739578247,
|
| 4676 |
+
"learning_rate": 0.0001695828863325459,
|
| 4677 |
+
"loss": 4.467470932006836,
|
| 4678 |
+
"step": 6510
|
| 4679 |
+
},
|
| 4680 |
+
{
|
| 4681 |
+
"epoch": 1.101537421861801,
|
| 4682 |
+
"grad_norm": 0.474933385848999,
|
| 4683 |
+
"learning_rate": 0.00016910788818067434,
|
| 4684 |
+
"loss": 4.4371185302734375,
|
| 4685 |
+
"step": 6520
|
| 4686 |
+
},
|
| 4687 |
+
{
|
| 4688 |
+
"epoch": 1.1032268964352085,
|
| 4689 |
+
"grad_norm": 0.5118041634559631,
|
| 4690 |
+
"learning_rate": 0.0001686326951796907,
|
| 4691 |
+
"loss": 4.451096725463867,
|
| 4692 |
+
"step": 6530
|
| 4693 |
+
},
|
| 4694 |
+
{
|
| 4695 |
+
"epoch": 1.1049163710086163,
|
| 4696 |
+
"grad_norm": 0.5289651155471802,
|
| 4697 |
+
"learning_rate": 0.00016815731217528667,
|
| 4698 |
+
"loss": 4.448075485229492,
|
| 4699 |
+
"step": 6540
|
| 4700 |
+
},
|
| 4701 |
+
{
|
| 4702 |
+
"epoch": 1.106605845582024,
|
| 4703 |
+
"grad_norm": 0.5182890295982361,
|
| 4704 |
+
"learning_rate": 0.00016768174401509143,
|
| 4705 |
+
"loss": 4.467396926879883,
|
| 4706 |
+
"step": 6550
|
| 4707 |
+
},
|
| 4708 |
+
{
|
| 4709 |
+
"epoch": 1.1082953201554318,
|
| 4710 |
+
"grad_norm": 0.5209820866584778,
|
| 4711 |
+
"learning_rate": 0.0001672059955486223,
|
| 4712 |
+
"loss": 4.459186172485351,
|
| 4713 |
+
"step": 6560
|
| 4714 |
+
},
|
| 4715 |
+
{
|
| 4716 |
+
"epoch": 1.1099847947288393,
|
| 4717 |
+
"grad_norm": 0.48584309220314026,
|
| 4718 |
+
"learning_rate": 0.000166730071627235,
|
| 4719 |
+
"loss": 4.46546516418457,
|
| 4720 |
+
"step": 6570
|
| 4721 |
+
},
|
| 4722 |
+
{
|
| 4723 |
+
"epoch": 1.111674269302247,
|
| 4724 |
+
"grad_norm": 0.5017306804656982,
|
| 4725 |
+
"learning_rate": 0.00016625397710407487,
|
| 4726 |
+
"loss": 4.452592086791992,
|
| 4727 |
+
"step": 6580
|
| 4728 |
+
},
|
| 4729 |
+
{
|
| 4730 |
+
"epoch": 1.1133637438756547,
|
| 4731 |
+
"grad_norm": 0.46485376358032227,
|
| 4732 |
+
"learning_rate": 0.00016577771683402647,
|
| 4733 |
+
"loss": 4.46324348449707,
|
| 4734 |
+
"step": 6590
|
| 4735 |
+
},
|
| 4736 |
+
{
|
| 4737 |
+
"epoch": 1.1150532184490622,
|
| 4738 |
+
"grad_norm": 0.5154596567153931,
|
| 4739 |
+
"learning_rate": 0.00016530129567366483,
|
| 4740 |
+
"loss": 4.457768249511719,
|
| 4741 |
+
"step": 6600
|
| 4742 |
+
},
|
| 4743 |
+
{
|
| 4744 |
+
"epoch": 1.11674269302247,
|
| 4745 |
+
"grad_norm": 0.49490463733673096,
|
| 4746 |
+
"learning_rate": 0.0001648247184812054,
|
| 4747 |
+
"loss": 4.427638244628906,
|
| 4748 |
+
"step": 6610
|
| 4749 |
+
},
|
| 4750 |
+
{
|
| 4751 |
+
"epoch": 1.1184321675958777,
|
| 4752 |
+
"grad_norm": 0.4721022844314575,
|
| 4753 |
+
"learning_rate": 0.00016434799011645507,
|
| 4754 |
+
"loss": 4.4389793395996096,
|
| 4755 |
+
"step": 6620
|
| 4756 |
+
},
|
| 4757 |
+
{
|
| 4758 |
+
"epoch": 1.1201216421692854,
|
| 4759 |
+
"grad_norm": 0.4648183286190033,
|
| 4760 |
+
"learning_rate": 0.00016387111544076193,
|
| 4761 |
+
"loss": 4.460124969482422,
|
| 4762 |
+
"step": 6630
|
| 4763 |
+
},
|
| 4764 |
+
{
|
| 4765 |
+
"epoch": 1.121811116742693,
|
| 4766 |
+
"grad_norm": 0.5035665035247803,
|
| 4767 |
+
"learning_rate": 0.00016339409931696625,
|
| 4768 |
+
"loss": 4.439287185668945,
|
| 4769 |
+
"step": 6640
|
| 4770 |
+
},
|
| 4771 |
+
{
|
| 4772 |
+
"epoch": 1.1235005913161007,
|
| 4773 |
+
"grad_norm": 0.4910880923271179,
|
| 4774 |
+
"learning_rate": 0.00016291694660935065,
|
| 4775 |
+
"loss": 4.456634140014648,
|
| 4776 |
+
"step": 6650
|
| 4777 |
+
},
|
| 4778 |
+
{
|
| 4779 |
+
"epoch": 1.1251900658895084,
|
| 4780 |
+
"grad_norm": 0.48906245827674866,
|
| 4781 |
+
"learning_rate": 0.00016243966218359047,
|
| 4782 |
+
"loss": 4.428804016113281,
|
| 4783 |
+
"step": 6660
|
| 4784 |
+
},
|
| 4785 |
+
{
|
| 4786 |
+
"epoch": 1.126879540462916,
|
| 4787 |
+
"grad_norm": 0.5756556391716003,
|
| 4788 |
+
"learning_rate": 0.00016196225090670435,
|
| 4789 |
+
"loss": 4.411157608032227,
|
| 4790 |
+
"step": 6670
|
| 4791 |
+
},
|
| 4792 |
+
{
|
| 4793 |
+
"epoch": 1.1285690150363237,
|
| 4794 |
+
"grad_norm": 0.49011167883872986,
|
| 4795 |
+
"learning_rate": 0.0001614847176470043,
|
| 4796 |
+
"loss": 4.435109329223633,
|
| 4797 |
+
"step": 6680
|
| 4798 |
+
},
|
| 4799 |
+
{
|
| 4800 |
+
"epoch": 1.1302584896097314,
|
| 4801 |
+
"grad_norm": 0.4775542616844177,
|
| 4802 |
+
"learning_rate": 0.00016100706727404645,
|
| 4803 |
+
"loss": 4.428675842285156,
|
| 4804 |
+
"step": 6690
|
| 4805 |
+
},
|
| 4806 |
+
{
|
| 4807 |
+
"epoch": 1.131947964183139,
|
| 4808 |
+
"grad_norm": 0.5201391577720642,
|
| 4809 |
+
"learning_rate": 0.00016052930465858094,
|
| 4810 |
+
"loss": 4.4389808654785154,
|
| 4811 |
+
"step": 6700
|
| 4812 |
+
},
|
| 4813 |
+
{
|
| 4814 |
+
"epoch": 1.1336374387565467,
|
| 4815 |
+
"grad_norm": 0.49004724621772766,
|
| 4816 |
+
"learning_rate": 0.00016005143467250267,
|
| 4817 |
+
"loss": 4.459021377563476,
|
| 4818 |
+
"step": 6710
|
| 4819 |
+
},
|
| 4820 |
+
{
|
| 4821 |
+
"epoch": 1.1353269133299544,
|
| 4822 |
+
"grad_norm": 0.49011871218681335,
|
| 4823 |
+
"learning_rate": 0.00015957346218880124,
|
| 4824 |
+
"loss": 4.455972290039062,
|
| 4825 |
+
"step": 6720
|
| 4826 |
+
},
|
| 4827 |
+
{
|
| 4828 |
+
"epoch": 1.1370163879033621,
|
| 4829 |
+
"grad_norm": 0.5173168182373047,
|
| 4830 |
+
"learning_rate": 0.0001590953920815117,
|
| 4831 |
+
"loss": 4.443459701538086,
|
| 4832 |
+
"step": 6730
|
| 4833 |
+
},
|
| 4834 |
+
{
|
| 4835 |
+
"epoch": 1.1387058624767696,
|
| 4836 |
+
"grad_norm": 0.47700756788253784,
|
| 4837 |
+
"learning_rate": 0.00015861722922566436,
|
| 4838 |
+
"loss": 4.435110473632813,
|
| 4839 |
+
"step": 6740
|
| 4840 |
+
},
|
| 4841 |
+
{
|
| 4842 |
+
"epoch": 1.1403953370501774,
|
| 4843 |
+
"grad_norm": 0.5626063942909241,
|
| 4844 |
+
"learning_rate": 0.00015813897849723544,
|
| 4845 |
+
"loss": 4.432453536987305,
|
| 4846 |
+
"step": 6750
|
| 4847 |
+
},
|
| 4848 |
+
{
|
| 4849 |
+
"epoch": 1.142084811623585,
|
| 4850 |
+
"grad_norm": 0.49542316794395447,
|
| 4851 |
+
"learning_rate": 0.0001576606447730972,
|
| 4852 |
+
"loss": 4.4374950408935545,
|
| 4853 |
+
"step": 6760
|
| 4854 |
+
},
|
| 4855 |
+
{
|
| 4856 |
+
"epoch": 1.1437742861969928,
|
| 4857 |
+
"grad_norm": 0.5116281509399414,
|
| 4858 |
+
"learning_rate": 0.0001571822329309682,
|
| 4859 |
+
"loss": 4.423119354248047,
|
| 4860 |
+
"step": 6770
|
| 4861 |
+
},
|
| 4862 |
+
{
|
| 4863 |
+
"epoch": 1.1454637607704004,
|
| 4864 |
+
"grad_norm": 0.4868847131729126,
|
| 4865 |
+
"learning_rate": 0.00015670374784936371,
|
| 4866 |
+
"loss": 4.4402107238769535,
|
| 4867 |
+
"step": 6780
|
| 4868 |
+
},
|
| 4869 |
+
{
|
| 4870 |
+
"epoch": 1.147153235343808,
|
| 4871 |
+
"grad_norm": 0.4938635230064392,
|
| 4872 |
+
"learning_rate": 0.00015622519440754566,
|
| 4873 |
+
"loss": 4.424631881713867,
|
| 4874 |
+
"step": 6790
|
| 4875 |
+
},
|
| 4876 |
+
{
|
| 4877 |
+
"epoch": 1.1488427099172158,
|
| 4878 |
+
"grad_norm": 0.5740174651145935,
|
| 4879 |
+
"learning_rate": 0.0001557465774854732,
|
| 4880 |
+
"loss": 4.450838470458985,
|
| 4881 |
+
"step": 6800
|
| 4882 |
+
},
|
| 4883 |
+
{
|
| 4884 |
+
"epoch": 1.1505321844906233,
|
| 4885 |
+
"grad_norm": 0.4828670918941498,
|
| 4886 |
+
"learning_rate": 0.0001552679019637528,
|
| 4887 |
+
"loss": 4.438276290893555,
|
| 4888 |
+
"step": 6810
|
| 4889 |
+
},
|
| 4890 |
+
{
|
| 4891 |
+
"epoch": 1.152221659064031,
|
| 4892 |
+
"grad_norm": 0.4659689664840698,
|
| 4893 |
+
"learning_rate": 0.00015478917272358848,
|
| 4894 |
+
"loss": 4.426282501220703,
|
| 4895 |
+
"step": 6820
|
| 4896 |
+
},
|
| 4897 |
+
{
|
| 4898 |
+
"epoch": 1.1539111336374388,
|
| 4899 |
+
"grad_norm": 0.4927656352519989,
|
| 4900 |
+
"learning_rate": 0.000154310394646732,
|
| 4901 |
+
"loss": 4.464373016357422,
|
| 4902 |
+
"step": 6830
|
| 4903 |
+
},
|
| 4904 |
+
{
|
| 4905 |
+
"epoch": 1.1556006082108463,
|
| 4906 |
+
"grad_norm": 0.5161291360855103,
|
| 4907 |
+
"learning_rate": 0.00015383157261543318,
|
| 4908 |
+
"loss": 4.416297531127929,
|
| 4909 |
+
"step": 6840
|
| 4910 |
+
},
|
| 4911 |
+
{
|
| 4912 |
+
"epoch": 1.157290082784254,
|
| 4913 |
+
"grad_norm": 0.4933563768863678,
|
| 4914 |
+
"learning_rate": 0.00015335271151239,
|
| 4915 |
+
"loss": 4.420982742309571,
|
| 4916 |
+
"step": 6850
|
| 4917 |
+
},
|
| 4918 |
+
{
|
| 4919 |
+
"epoch": 1.1589795573576618,
|
| 4920 |
+
"grad_norm": 0.4847005307674408,
|
| 4921 |
+
"learning_rate": 0.00015287381622069892,
|
| 4922 |
+
"loss": 4.416022872924804,
|
| 4923 |
+
"step": 6860
|
| 4924 |
+
},
|
| 4925 |
+
{
|
| 4926 |
+
"epoch": 1.1606690319310695,
|
| 4927 |
+
"grad_norm": 0.4981960654258728,
|
| 4928 |
+
"learning_rate": 0.00015239489162380504,
|
| 4929 |
+
"loss": 4.422767639160156,
|
| 4930 |
+
"step": 6870
|
| 4931 |
+
},
|
| 4932 |
+
{
|
| 4933 |
+
"epoch": 1.162358506504477,
|
| 4934 |
+
"grad_norm": 0.5001937747001648,
|
| 4935 |
+
"learning_rate": 0.0001519159426054522,
|
| 4936 |
+
"loss": 4.4368339538574215,
|
| 4937 |
+
"step": 6880
|
| 4938 |
+
},
|
| 4939 |
+
{
|
| 4940 |
+
"epoch": 1.1640479810778848,
|
| 4941 |
+
"grad_norm": 0.5044972896575928,
|
| 4942 |
+
"learning_rate": 0.0001514369740496334,
|
| 4943 |
+
"loss": 4.411078643798828,
|
| 4944 |
+
"step": 6890
|
| 4945 |
+
},
|
| 4946 |
+
{
|
| 4947 |
+
"epoch": 1.1657374556512925,
|
| 4948 |
+
"grad_norm": 0.4734691083431244,
|
| 4949 |
+
"learning_rate": 0.00015095799084054073,
|
| 4950 |
+
"loss": 4.438079071044922,
|
| 4951 |
+
"step": 6900
|
| 4952 |
+
},
|
| 4953 |
+
{
|
| 4954 |
+
"epoch": 1.1674269302247002,
|
| 4955 |
+
"grad_norm": 0.49377161264419556,
|
| 4956 |
+
"learning_rate": 0.00015047899786251587,
|
| 4957 |
+
"loss": 4.442370986938476,
|
| 4958 |
+
"step": 6910
|
| 4959 |
+
},
|
| 4960 |
+
{
|
| 4961 |
+
"epoch": 1.1691164047981077,
|
| 4962 |
+
"grad_norm": 0.5010132193565369,
|
| 4963 |
+
"learning_rate": 0.00015,
|
| 4964 |
+
"loss": 4.442108917236328,
|
| 4965 |
+
"step": 6920
|
| 4966 |
+
},
|
| 4967 |
+
{
|
| 4968 |
+
"epoch": 1.1708058793715155,
|
| 4969 |
+
"grad_norm": 0.5035766959190369,
|
| 4970 |
+
"learning_rate": 0.0001495210021374841,
|
| 4971 |
+
"loss": 4.430604553222656,
|
| 4972 |
+
"step": 6930
|
| 4973 |
+
},
|
| 4974 |
+
{
|
| 4975 |
+
"epoch": 1.1724953539449232,
|
| 4976 |
+
"grad_norm": 0.4899141788482666,
|
| 4977 |
+
"learning_rate": 0.00014904200915945927,
|
| 4978 |
+
"loss": 4.435578918457031,
|
| 4979 |
+
"step": 6940
|
| 4980 |
+
},
|
| 4981 |
+
{
|
| 4982 |
+
"epoch": 1.1741848285183307,
|
| 4983 |
+
"grad_norm": 0.4718686044216156,
|
| 4984 |
+
"learning_rate": 0.00014856302595036663,
|
| 4985 |
+
"loss": 4.429093551635742,
|
| 4986 |
+
"step": 6950
|
| 4987 |
+
},
|
| 4988 |
+
{
|
| 4989 |
+
"epoch": 1.1758743030917385,
|
| 4990 |
+
"grad_norm": 0.4881162941455841,
|
| 4991 |
+
"learning_rate": 0.00014808405739454776,
|
| 4992 |
+
"loss": 4.408749008178711,
|
| 4993 |
+
"step": 6960
|
| 4994 |
+
},
|
| 4995 |
+
{
|
| 4996 |
+
"epoch": 1.1775637776651462,
|
| 4997 |
+
"grad_norm": 0.46740713715553284,
|
| 4998 |
+
"learning_rate": 0.00014760510837619493,
|
| 4999 |
+
"loss": 4.419464492797852,
|
| 5000 |
+
"step": 6970
|
| 5001 |
+
},
|
| 5002 |
+
{
|
| 5003 |
+
"epoch": 1.1792532522385537,
|
| 5004 |
+
"grad_norm": 0.4737609922885895,
|
| 5005 |
+
"learning_rate": 0.00014712618377930105,
|
| 5006 |
+
"loss": 4.421468353271484,
|
| 5007 |
+
"step": 6980
|
| 5008 |
+
},
|
| 5009 |
+
{
|
| 5010 |
+
"epoch": 1.1809427268119614,
|
| 5011 |
+
"grad_norm": 0.4975055754184723,
|
| 5012 |
+
"learning_rate": 0.00014664728848760996,
|
| 5013 |
+
"loss": 4.422280502319336,
|
| 5014 |
+
"step": 6990
|
| 5015 |
+
},
|
| 5016 |
+
{
|
| 5017 |
+
"epoch": 1.1826322013853692,
|
| 5018 |
+
"grad_norm": 0.4839191734790802,
|
| 5019 |
+
"learning_rate": 0.00014616842738456682,
|
| 5020 |
+
"loss": 4.395424652099609,
|
| 5021 |
+
"step": 7000
|
| 5022 |
+
},
|
| 5023 |
+
{
|
| 5024 |
+
"epoch": 1.1826322013853692,
|
| 5025 |
+
"eval_loss": 4.412718772888184,
|
| 5026 |
+
"eval_runtime": 4.0717,
|
| 5027 |
+
"eval_samples_per_second": 245.6,
|
| 5028 |
+
"eval_steps_per_second": 5.158,
|
| 5029 |
+
"step": 7000
|
| 5030 |
}
|
| 5031 |
],
|
| 5032 |
"logging_steps": 10,
|
|
|
|
| 5046 |
"attributes": {}
|
| 5047 |
}
|
| 5048 |
},
|
| 5049 |
+
"total_flos": 2.3411768424608563e+17,
|
| 5050 |
"train_batch_size": 48,
|
| 5051 |
"trial_name": null,
|
| 5052 |
"trial_params": null
|