Training in progress, step 6000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 328277848
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c74bfe809433060df3635ef406235f0717bc42781fff9acd5df0f855eb57b3f
|
| 3 |
size 328277848
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 318646859
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70990f23441c3c0fadf8ff7b5b48864178e6a3f9dbc5c1184cb7c19ddf968c0f
|
| 3 |
size 318646859
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11940f1313899a11d3e47a2d43f508134dd8e03ac7613f4eca32c754da2d1839
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5732bb4fae95fda377427872ad7c4fed0c45a84922701b3143ffa39cf761f9db
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch":
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3953,6 +3953,364 @@
|
|
| 3953 |
"eval_samples_per_second": 275.136,
|
| 3954 |
"eval_steps_per_second": 5.778,
|
| 3955 |
"step": 5500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3956 |
}
|
| 3957 |
],
|
| 3958 |
"logging_steps": 10,
|
|
@@ -3972,7 +4330,7 @@
|
|
| 3972 |
"attributes": {}
|
| 3973 |
}
|
| 3974 |
},
|
| 3975 |
-
"total_flos":
|
| 3976 |
"train_batch_size": 48,
|
| 3977 |
"trial_name": null,
|
| 3978 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 1.0136847440446022,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 6000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3953 |
"eval_samples_per_second": 275.136,
|
| 3954 |
"eval_steps_per_second": 5.778,
|
| 3955 |
"step": 5500
|
| 3956 |
+
},
|
| 3957 |
+
{
|
| 3958 |
+
"epoch": 0.9309004899476263,
|
| 3959 |
+
"grad_norm": 0.4983241558074951,
|
| 3960 |
+
"learning_rate": 0.00021527977734609537,
|
| 3961 |
+
"loss": 4.547625732421875,
|
| 3962 |
+
"step": 5510
|
| 3963 |
+
},
|
| 3964 |
+
{
|
| 3965 |
+
"epoch": 0.9325899645210339,
|
| 3966 |
+
"grad_norm": 0.5012770295143127,
|
| 3967 |
+
"learning_rate": 0.00021484818619522722,
|
| 3968 |
+
"loss": 4.557040023803711,
|
| 3969 |
+
"step": 5520
|
| 3970 |
+
},
|
| 3971 |
+
{
|
| 3972 |
+
"epoch": 0.9342794390944417,
|
| 3973 |
+
"grad_norm": 0.5078200101852417,
|
| 3974 |
+
"learning_rate": 0.00021441593376712224,
|
| 3975 |
+
"loss": 4.553184890747071,
|
| 3976 |
+
"step": 5530
|
| 3977 |
+
},
|
| 3978 |
+
{
|
| 3979 |
+
"epoch": 0.9359689136678493,
|
| 3980 |
+
"grad_norm": 0.48705384135246277,
|
| 3981 |
+
"learning_rate": 0.0002139830244695935,
|
| 3982 |
+
"loss": 4.5813232421875,
|
| 3983 |
+
"step": 5540
|
| 3984 |
+
},
|
| 3985 |
+
{
|
| 3986 |
+
"epoch": 0.937658388241257,
|
| 3987 |
+
"grad_norm": 0.5023474097251892,
|
| 3988 |
+
"learning_rate": 0.00021354946271715265,
|
| 3989 |
+
"loss": 4.552815628051758,
|
| 3990 |
+
"step": 5550
|
| 3991 |
+
},
|
| 3992 |
+
{
|
| 3993 |
+
"epoch": 0.9393478628146646,
|
| 3994 |
+
"grad_norm": 0.5058281421661377,
|
| 3995 |
+
"learning_rate": 0.00021311525293096444,
|
| 3996 |
+
"loss": 4.541165924072265,
|
| 3997 |
+
"step": 5560
|
| 3998 |
+
},
|
| 3999 |
+
{
|
| 4000 |
+
"epoch": 0.9410373373880723,
|
| 4001 |
+
"grad_norm": 0.5129496455192566,
|
| 4002 |
+
"learning_rate": 0.00021268039953880184,
|
| 4003 |
+
"loss": 4.529154968261719,
|
| 4004 |
+
"step": 5570
|
| 4005 |
+
},
|
| 4006 |
+
{
|
| 4007 |
+
"epoch": 0.94272681196148,
|
| 4008 |
+
"grad_norm": 0.5097109079360962,
|
| 4009 |
+
"learning_rate": 0.00021224490697500088,
|
| 4010 |
+
"loss": 4.535088348388672,
|
| 4011 |
+
"step": 5580
|
| 4012 |
+
},
|
| 4013 |
+
{
|
| 4014 |
+
"epoch": 0.9444162865348876,
|
| 4015 |
+
"grad_norm": 0.5103420615196228,
|
| 4016 |
+
"learning_rate": 0.00021180877968041552,
|
| 4017 |
+
"loss": 4.553527069091797,
|
| 4018 |
+
"step": 5590
|
| 4019 |
+
},
|
| 4020 |
+
{
|
| 4021 |
+
"epoch": 0.9461057611082954,
|
| 4022 |
+
"grad_norm": 0.4936409294605255,
|
| 4023 |
+
"learning_rate": 0.00021137202210237213,
|
| 4024 |
+
"loss": 4.54007568359375,
|
| 4025 |
+
"step": 5600
|
| 4026 |
+
},
|
| 4027 |
+
{
|
| 4028 |
+
"epoch": 0.947795235681703,
|
| 4029 |
+
"grad_norm": 0.5701144933700562,
|
| 4030 |
+
"learning_rate": 0.0002109346386946243,
|
| 4031 |
+
"loss": 4.558887100219726,
|
| 4032 |
+
"step": 5610
|
| 4033 |
+
},
|
| 4034 |
+
{
|
| 4035 |
+
"epoch": 0.9494847102551106,
|
| 4036 |
+
"grad_norm": 0.4890182912349701,
|
| 4037 |
+
"learning_rate": 0.00021049663391730752,
|
| 4038 |
+
"loss": 4.543179702758789,
|
| 4039 |
+
"step": 5620
|
| 4040 |
+
},
|
| 4041 |
+
{
|
| 4042 |
+
"epoch": 0.9511741848285183,
|
| 4043 |
+
"grad_norm": 0.5074143409729004,
|
| 4044 |
+
"learning_rate": 0.00021005801223689344,
|
| 4045 |
+
"loss": 4.5704292297363285,
|
| 4046 |
+
"step": 5630
|
| 4047 |
+
},
|
| 4048 |
+
{
|
| 4049 |
+
"epoch": 0.952863659401926,
|
| 4050 |
+
"grad_norm": 0.4767675995826721,
|
| 4051 |
+
"learning_rate": 0.00020961877812614458,
|
| 4052 |
+
"loss": 4.569948196411133,
|
| 4053 |
+
"step": 5640
|
| 4054 |
+
},
|
| 4055 |
+
{
|
| 4056 |
+
"epoch": 0.9545531339753337,
|
| 4057 |
+
"grad_norm": 0.5034293532371521,
|
| 4058 |
+
"learning_rate": 0.00020917893606406843,
|
| 4059 |
+
"loss": 4.524322128295898,
|
| 4060 |
+
"step": 5650
|
| 4061 |
+
},
|
| 4062 |
+
{
|
| 4063 |
+
"epoch": 0.9562426085487413,
|
| 4064 |
+
"grad_norm": 0.5619840621948242,
|
| 4065 |
+
"learning_rate": 0.0002087384905358722,
|
| 4066 |
+
"loss": 4.528865051269531,
|
| 4067 |
+
"step": 5660
|
| 4068 |
+
},
|
| 4069 |
+
{
|
| 4070 |
+
"epoch": 0.9579320831221491,
|
| 4071 |
+
"grad_norm": 0.5692474842071533,
|
| 4072 |
+
"learning_rate": 0.00020829744603291663,
|
| 4073 |
+
"loss": 4.5155292510986325,
|
| 4074 |
+
"step": 5670
|
| 4075 |
+
},
|
| 4076 |
+
{
|
| 4077 |
+
"epoch": 0.9596215576955567,
|
| 4078 |
+
"grad_norm": 0.504224419593811,
|
| 4079 |
+
"learning_rate": 0.00020785580705267047,
|
| 4080 |
+
"loss": 4.559905624389648,
|
| 4081 |
+
"step": 5680
|
| 4082 |
+
},
|
| 4083 |
+
{
|
| 4084 |
+
"epoch": 0.9613110322689643,
|
| 4085 |
+
"grad_norm": 0.563014805316925,
|
| 4086 |
+
"learning_rate": 0.00020741357809866447,
|
| 4087 |
+
"loss": 4.556017303466797,
|
| 4088 |
+
"step": 5690
|
| 4089 |
+
},
|
| 4090 |
+
{
|
| 4091 |
+
"epoch": 0.963000506842372,
|
| 4092 |
+
"grad_norm": 0.4872301518917084,
|
| 4093 |
+
"learning_rate": 0.0002069707636804457,
|
| 4094 |
+
"loss": 4.550839233398437,
|
| 4095 |
+
"step": 5700
|
| 4096 |
+
},
|
| 4097 |
+
{
|
| 4098 |
+
"epoch": 0.9646899814157797,
|
| 4099 |
+
"grad_norm": 0.5135483145713806,
|
| 4100 |
+
"learning_rate": 0.0002065273683135312,
|
| 4101 |
+
"loss": 4.550697708129883,
|
| 4102 |
+
"step": 5710
|
| 4103 |
+
},
|
| 4104 |
+
{
|
| 4105 |
+
"epoch": 0.9663794559891874,
|
| 4106 |
+
"grad_norm": 0.4852290451526642,
|
| 4107 |
+
"learning_rate": 0.00020608339651936224,
|
| 4108 |
+
"loss": 4.531842422485352,
|
| 4109 |
+
"step": 5720
|
| 4110 |
+
},
|
| 4111 |
+
{
|
| 4112 |
+
"epoch": 0.968068930562595,
|
| 4113 |
+
"grad_norm": 0.5045028924942017,
|
| 4114 |
+
"learning_rate": 0.00020563885282525802,
|
| 4115 |
+
"loss": 4.532521057128906,
|
| 4116 |
+
"step": 5730
|
| 4117 |
+
},
|
| 4118 |
+
{
|
| 4119 |
+
"epoch": 0.9697584051360028,
|
| 4120 |
+
"grad_norm": 0.530616044998169,
|
| 4121 |
+
"learning_rate": 0.00020519374176436968,
|
| 4122 |
+
"loss": 4.546891403198242,
|
| 4123 |
+
"step": 5740
|
| 4124 |
+
},
|
| 4125 |
+
{
|
| 4126 |
+
"epoch": 0.9714478797094104,
|
| 4127 |
+
"grad_norm": 0.49565091729164124,
|
| 4128 |
+
"learning_rate": 0.00020474806787563392,
|
| 4129 |
+
"loss": 4.533766555786133,
|
| 4130 |
+
"step": 5750
|
| 4131 |
+
},
|
| 4132 |
+
{
|
| 4133 |
+
"epoch": 0.973137354282818,
|
| 4134 |
+
"grad_norm": 0.5225724577903748,
|
| 4135 |
+
"learning_rate": 0.0002043018357037267,
|
| 4136 |
+
"loss": 4.542942810058594,
|
| 4137 |
+
"step": 5760
|
| 4138 |
+
},
|
| 4139 |
+
{
|
| 4140 |
+
"epoch": 0.9748268288562257,
|
| 4141 |
+
"grad_norm": 0.49189162254333496,
|
| 4142 |
+
"learning_rate": 0.00020385504979901712,
|
| 4143 |
+
"loss": 4.545899200439453,
|
| 4144 |
+
"step": 5770
|
| 4145 |
+
},
|
| 4146 |
+
{
|
| 4147 |
+
"epoch": 0.9765163034296334,
|
| 4148 |
+
"grad_norm": 0.5116291642189026,
|
| 4149 |
+
"learning_rate": 0.00020340771471752078,
|
| 4150 |
+
"loss": 4.532541656494141,
|
| 4151 |
+
"step": 5780
|
| 4152 |
+
},
|
| 4153 |
+
{
|
| 4154 |
+
"epoch": 0.9782057780030411,
|
| 4155 |
+
"grad_norm": 0.5132644772529602,
|
| 4156 |
+
"learning_rate": 0.0002029598350208534,
|
| 4157 |
+
"loss": 4.524928283691406,
|
| 4158 |
+
"step": 5790
|
| 4159 |
+
},
|
| 4160 |
+
{
|
| 4161 |
+
"epoch": 0.9798952525764487,
|
| 4162 |
+
"grad_norm": 0.4904372990131378,
|
| 4163 |
+
"learning_rate": 0.00020251141527618434,
|
| 4164 |
+
"loss": 4.532776641845703,
|
| 4165 |
+
"step": 5800
|
| 4166 |
+
},
|
| 4167 |
+
{
|
| 4168 |
+
"epoch": 0.9815847271498563,
|
| 4169 |
+
"grad_norm": 0.48598089814186096,
|
| 4170 |
+
"learning_rate": 0.00020206246005618998,
|
| 4171 |
+
"loss": 4.519465637207031,
|
| 4172 |
+
"step": 5810
|
| 4173 |
+
},
|
| 4174 |
+
{
|
| 4175 |
+
"epoch": 0.9832742017232641,
|
| 4176 |
+
"grad_norm": 0.5415476560592651,
|
| 4177 |
+
"learning_rate": 0.00020161297393900713,
|
| 4178 |
+
"loss": 4.512179565429688,
|
| 4179 |
+
"step": 5820
|
| 4180 |
+
},
|
| 4181 |
+
{
|
| 4182 |
+
"epoch": 0.9849636762966717,
|
| 4183 |
+
"grad_norm": 0.5061231255531311,
|
| 4184 |
+
"learning_rate": 0.00020116296150818623,
|
| 4185 |
+
"loss": 4.534863662719727,
|
| 4186 |
+
"step": 5830
|
| 4187 |
+
},
|
| 4188 |
+
{
|
| 4189 |
+
"epoch": 0.9866531508700794,
|
| 4190 |
+
"grad_norm": 0.5157834887504578,
|
| 4191 |
+
"learning_rate": 0.0002007124273526449,
|
| 4192 |
+
"loss": 4.50738639831543,
|
| 4193 |
+
"step": 5840
|
| 4194 |
+
},
|
| 4195 |
+
{
|
| 4196 |
+
"epoch": 0.988342625443487,
|
| 4197 |
+
"grad_norm": 0.509292483329773,
|
| 4198 |
+
"learning_rate": 0.00020026137606662077,
|
| 4199 |
+
"loss": 4.5266845703125,
|
| 4200 |
+
"step": 5850
|
| 4201 |
+
},
|
| 4202 |
+
{
|
| 4203 |
+
"epoch": 0.9900321000168948,
|
| 4204 |
+
"grad_norm": 0.5107020139694214,
|
| 4205 |
+
"learning_rate": 0.0001998098122496249,
|
| 4206 |
+
"loss": 4.533035659790039,
|
| 4207 |
+
"step": 5860
|
| 4208 |
+
},
|
| 4209 |
+
{
|
| 4210 |
+
"epoch": 0.9917215745903024,
|
| 4211 |
+
"grad_norm": 0.5432437062263489,
|
| 4212 |
+
"learning_rate": 0.00019935774050639472,
|
| 4213 |
+
"loss": 4.518278884887695,
|
| 4214 |
+
"step": 5870
|
| 4215 |
+
},
|
| 4216 |
+
{
|
| 4217 |
+
"epoch": 0.99341104916371,
|
| 4218 |
+
"grad_norm": 0.5360410213470459,
|
| 4219 |
+
"learning_rate": 0.0001989051654468473,
|
| 4220 |
+
"loss": 4.502675628662109,
|
| 4221 |
+
"step": 5880
|
| 4222 |
+
},
|
| 4223 |
+
{
|
| 4224 |
+
"epoch": 0.9951005237371178,
|
| 4225 |
+
"grad_norm": 0.5418276786804199,
|
| 4226 |
+
"learning_rate": 0.00019845209168603195,
|
| 4227 |
+
"loss": 4.5235343933105465,
|
| 4228 |
+
"step": 5890
|
| 4229 |
+
},
|
| 4230 |
+
{
|
| 4231 |
+
"epoch": 0.9967899983105254,
|
| 4232 |
+
"grad_norm": 0.5157185792922974,
|
| 4233 |
+
"learning_rate": 0.00019799852384408355,
|
| 4234 |
+
"loss": 4.524081420898438,
|
| 4235 |
+
"step": 5900
|
| 4236 |
+
},
|
| 4237 |
+
{
|
| 4238 |
+
"epoch": 0.9984794728839331,
|
| 4239 |
+
"grad_norm": 0.5043293237686157,
|
| 4240 |
+
"learning_rate": 0.00019754446654617527,
|
| 4241 |
+
"loss": 4.508223342895508,
|
| 4242 |
+
"step": 5910
|
| 4243 |
+
},
|
| 4244 |
+
{
|
| 4245 |
+
"epoch": 1.0001689474573408,
|
| 4246 |
+
"grad_norm": 0.5386601090431213,
|
| 4247 |
+
"learning_rate": 0.00019708992442247136,
|
| 4248 |
+
"loss": 4.5236083984375,
|
| 4249 |
+
"step": 5920
|
| 4250 |
+
},
|
| 4251 |
+
{
|
| 4252 |
+
"epoch": 1.0018584220307485,
|
| 4253 |
+
"grad_norm": 0.5341511368751526,
|
| 4254 |
+
"learning_rate": 0.0001966349021080801,
|
| 4255 |
+
"loss": 4.459320068359375,
|
| 4256 |
+
"step": 5930
|
| 4257 |
+
},
|
| 4258 |
+
{
|
| 4259 |
+
"epoch": 1.003547896604156,
|
| 4260 |
+
"grad_norm": 0.5038416981697083,
|
| 4261 |
+
"learning_rate": 0.0001961794042430062,
|
| 4262 |
+
"loss": 4.505880355834961,
|
| 4263 |
+
"step": 5940
|
| 4264 |
+
},
|
| 4265 |
+
{
|
| 4266 |
+
"epoch": 1.0052373711775637,
|
| 4267 |
+
"grad_norm": 0.47585076093673706,
|
| 4268 |
+
"learning_rate": 0.000195723435472104,
|
| 4269 |
+
"loss": 4.477125930786133,
|
| 4270 |
+
"step": 5950
|
| 4271 |
+
},
|
| 4272 |
+
{
|
| 4273 |
+
"epoch": 1.0069268457509715,
|
| 4274 |
+
"grad_norm": 0.49405696988105774,
|
| 4275 |
+
"learning_rate": 0.00019526700044502956,
|
| 4276 |
+
"loss": 4.483388137817383,
|
| 4277 |
+
"step": 5960
|
| 4278 |
+
},
|
| 4279 |
+
{
|
| 4280 |
+
"epoch": 1.0086163203243792,
|
| 4281 |
+
"grad_norm": 0.47832658886909485,
|
| 4282 |
+
"learning_rate": 0.0001948101038161937,
|
| 4283 |
+
"loss": 4.474266052246094,
|
| 4284 |
+
"step": 5970
|
| 4285 |
+
},
|
| 4286 |
+
{
|
| 4287 |
+
"epoch": 1.0103057948977867,
|
| 4288 |
+
"grad_norm": 0.470113068819046,
|
| 4289 |
+
"learning_rate": 0.0001943527502447141,
|
| 4290 |
+
"loss": 4.483303833007812,
|
| 4291 |
+
"step": 5980
|
| 4292 |
+
},
|
| 4293 |
+
{
|
| 4294 |
+
"epoch": 1.0119952694711944,
|
| 4295 |
+
"grad_norm": 0.4839136004447937,
|
| 4296 |
+
"learning_rate": 0.00019389494439436836,
|
| 4297 |
+
"loss": 4.453615188598633,
|
| 4298 |
+
"step": 5990
|
| 4299 |
+
},
|
| 4300 |
+
{
|
| 4301 |
+
"epoch": 1.0136847440446022,
|
| 4302 |
+
"grad_norm": 0.482327401638031,
|
| 4303 |
+
"learning_rate": 0.0001934366909335458,
|
| 4304 |
+
"loss": 4.491983413696289,
|
| 4305 |
+
"step": 6000
|
| 4306 |
+
},
|
| 4307 |
+
{
|
| 4308 |
+
"epoch": 1.0136847440446022,
|
| 4309 |
+
"eval_loss": 4.487085819244385,
|
| 4310 |
+
"eval_runtime": 4.7973,
|
| 4311 |
+
"eval_samples_per_second": 208.452,
|
| 4312 |
+
"eval_steps_per_second": 4.377,
|
| 4313 |
+
"step": 6000
|
| 4314 |
}
|
| 4315 |
],
|
| 4316 |
"logging_steps": 10,
|
|
|
|
| 4330 |
"attributes": {}
|
| 4331 |
}
|
| 4332 |
},
|
| 4333 |
+
"total_flos": 2.0067200216019763e+17,
|
| 4334 |
"train_batch_size": 48,
|
| 4335 |
"trial_name": null,
|
| 4336 |
"trial_params": null
|