Training in progress, step 600, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13587864
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4899466220e60a7be496bec2b0702ec77d7b42f7c2abb9c89d5990de66858d42
|
| 3 |
size 13587864
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 27273018
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04f03abccf9919cb7e6cb74b0991c90780af63ba8366b3ce90419ba802cfed1e
|
| 3 |
size 27273018
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3464f03cc2695b7e00d9a022811a7d39834c49f04402f78117f949150b3e4b68
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c10b97f44aaa8e8a1f8b6b752fbcc49ead2ee866b2143ac6a7831438a80daac
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.004960117861628532,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-450",
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 25,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4041,6 +4041,372 @@
|
|
| 4041 |
"eval_samples_per_second": 47.487,
|
| 4042 |
"eval_steps_per_second": 47.487,
|
| 4043 |
"step": 550
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4044 |
}
|
| 4045 |
],
|
| 4046 |
"logging_steps": 1,
|
|
@@ -4055,7 +4421,7 @@
|
|
| 4055 |
"early_stopping_threshold": 0.0
|
| 4056 |
},
|
| 4057 |
"attributes": {
|
| 4058 |
-
"early_stopping_patience_counter":
|
| 4059 |
}
|
| 4060 |
},
|
| 4061 |
"TrainerControl": {
|
|
@@ -4064,12 +4430,12 @@
|
|
| 4064 |
"should_evaluate": false,
|
| 4065 |
"should_log": false,
|
| 4066 |
"should_save": true,
|
| 4067 |
-
"should_training_stop":
|
| 4068 |
},
|
| 4069 |
"attributes": {}
|
| 4070 |
}
|
| 4071 |
},
|
| 4072 |
-
"total_flos": 3.
|
| 4073 |
"train_batch_size": 1,
|
| 4074 |
"trial_name": null,
|
| 4075 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.004960117861628532,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-450",
|
| 4 |
+
"epoch": 1.9692307692307693,
|
| 5 |
"eval_steps": 25,
|
| 6 |
+
"global_step": 600,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4041 |
"eval_samples_per_second": 47.487,
|
| 4042 |
"eval_steps_per_second": 47.487,
|
| 4043 |
"step": 550
|
| 4044 |
+
},
|
| 4045 |
+
{
|
| 4046 |
+
"epoch": 1.8084102564102564,
|
| 4047 |
+
"grad_norm": 0.0034276428632438183,
|
| 4048 |
+
"learning_rate": 2.3195015539906243e-05,
|
| 4049 |
+
"loss": 0.0049,
|
| 4050 |
+
"step": 551
|
| 4051 |
+
},
|
| 4052 |
+
{
|
| 4053 |
+
"epoch": 1.8116923076923077,
|
| 4054 |
+
"grad_norm": 0.0030685942620038986,
|
| 4055 |
+
"learning_rate": 2.3066675633983865e-05,
|
| 4056 |
+
"loss": 0.0048,
|
| 4057 |
+
"step": 552
|
| 4058 |
+
},
|
| 4059 |
+
{
|
| 4060 |
+
"epoch": 1.814974358974359,
|
| 4061 |
+
"grad_norm": 0.0046894908882677555,
|
| 4062 |
+
"learning_rate": 2.2940921796353956e-05,
|
| 4063 |
+
"loss": 0.0047,
|
| 4064 |
+
"step": 553
|
| 4065 |
+
},
|
| 4066 |
+
{
|
| 4067 |
+
"epoch": 1.8182564102564103,
|
| 4068 |
+
"grad_norm": 0.0035674276296049356,
|
| 4069 |
+
"learning_rate": 2.2817757767906625e-05,
|
| 4070 |
+
"loss": 0.0048,
|
| 4071 |
+
"step": 554
|
| 4072 |
+
},
|
| 4073 |
+
{
|
| 4074 |
+
"epoch": 1.8215384615384616,
|
| 4075 |
+
"grad_norm": 0.005269620567560196,
|
| 4076 |
+
"learning_rate": 2.2697187212491044e-05,
|
| 4077 |
+
"loss": 0.0051,
|
| 4078 |
+
"step": 555
|
| 4079 |
+
},
|
| 4080 |
+
{
|
| 4081 |
+
"epoch": 1.8248205128205128,
|
| 4082 |
+
"grad_norm": 0.008738451637327671,
|
| 4083 |
+
"learning_rate": 2.2579213716806474e-05,
|
| 4084 |
+
"loss": 0.0052,
|
| 4085 |
+
"step": 556
|
| 4086 |
+
},
|
| 4087 |
+
{
|
| 4088 |
+
"epoch": 1.828102564102564,
|
| 4089 |
+
"grad_norm": 0.008472139947116375,
|
| 4090 |
+
"learning_rate": 2.2463840790295566e-05,
|
| 4091 |
+
"loss": 0.0051,
|
| 4092 |
+
"step": 557
|
| 4093 |
+
},
|
| 4094 |
+
{
|
| 4095 |
+
"epoch": 1.8313846153846154,
|
| 4096 |
+
"grad_norm": 0.008605373091995716,
|
| 4097 |
+
"learning_rate": 2.2351071865039974e-05,
|
| 4098 |
+
"loss": 0.0051,
|
| 4099 |
+
"step": 558
|
| 4100 |
+
},
|
| 4101 |
+
{
|
| 4102 |
+
"epoch": 1.8346666666666667,
|
| 4103 |
+
"grad_norm": 0.02175315096974373,
|
| 4104 |
+
"learning_rate": 2.224091029565824e-05,
|
| 4105 |
+
"loss": 0.0053,
|
| 4106 |
+
"step": 559
|
| 4107 |
+
},
|
| 4108 |
+
{
|
| 4109 |
+
"epoch": 1.837948717948718,
|
| 4110 |
+
"grad_norm": 0.008465359918773174,
|
| 4111 |
+
"learning_rate": 2.2133359359206e-05,
|
| 4112 |
+
"loss": 0.0052,
|
| 4113 |
+
"step": 560
|
| 4114 |
+
},
|
| 4115 |
+
{
|
| 4116 |
+
"epoch": 1.8412307692307692,
|
| 4117 |
+
"grad_norm": 0.007232977543026209,
|
| 4118 |
+
"learning_rate": 2.2028422255078542e-05,
|
| 4119 |
+
"loss": 0.0052,
|
| 4120 |
+
"step": 561
|
| 4121 |
+
},
|
| 4122 |
+
{
|
| 4123 |
+
"epoch": 1.8445128205128205,
|
| 4124 |
+
"grad_norm": 0.007051311433315277,
|
| 4125 |
+
"learning_rate": 2.1926102104915553e-05,
|
| 4126 |
+
"loss": 0.0051,
|
| 4127 |
+
"step": 562
|
| 4128 |
+
},
|
| 4129 |
+
{
|
| 4130 |
+
"epoch": 1.8477948717948718,
|
| 4131 |
+
"grad_norm": 0.006151077803224325,
|
| 4132 |
+
"learning_rate": 2.182640195250835e-05,
|
| 4133 |
+
"loss": 0.005,
|
| 4134 |
+
"step": 563
|
| 4135 |
+
},
|
| 4136 |
+
{
|
| 4137 |
+
"epoch": 1.851076923076923,
|
| 4138 |
+
"grad_norm": 0.006573867984116077,
|
| 4139 |
+
"learning_rate": 2.1729324763709264e-05,
|
| 4140 |
+
"loss": 0.0051,
|
| 4141 |
+
"step": 564
|
| 4142 |
+
},
|
| 4143 |
+
{
|
| 4144 |
+
"epoch": 1.8543589743589743,
|
| 4145 |
+
"grad_norm": 0.00678396737203002,
|
| 4146 |
+
"learning_rate": 2.1634873426343427e-05,
|
| 4147 |
+
"loss": 0.0049,
|
| 4148 |
+
"step": 565
|
| 4149 |
+
},
|
| 4150 |
+
{
|
| 4151 |
+
"epoch": 1.8576410256410256,
|
| 4152 |
+
"grad_norm": 0.005578219890594482,
|
| 4153 |
+
"learning_rate": 2.1543050750122902e-05,
|
| 4154 |
+
"loss": 0.0048,
|
| 4155 |
+
"step": 566
|
| 4156 |
+
},
|
| 4157 |
+
{
|
| 4158 |
+
"epoch": 1.860923076923077,
|
| 4159 |
+
"grad_norm": 0.0040833973325788975,
|
| 4160 |
+
"learning_rate": 2.145385946656303e-05,
|
| 4161 |
+
"loss": 0.0047,
|
| 4162 |
+
"step": 567
|
| 4163 |
+
},
|
| 4164 |
+
{
|
| 4165 |
+
"epoch": 1.8642051282051282,
|
| 4166 |
+
"grad_norm": 0.004177347291260958,
|
| 4167 |
+
"learning_rate": 2.1367302228901282e-05,
|
| 4168 |
+
"loss": 0.0046,
|
| 4169 |
+
"step": 568
|
| 4170 |
+
},
|
| 4171 |
+
{
|
| 4172 |
+
"epoch": 1.8674871794871795,
|
| 4173 |
+
"grad_norm": 0.0036663906648755074,
|
| 4174 |
+
"learning_rate": 2.128338161201819e-05,
|
| 4175 |
+
"loss": 0.0047,
|
| 4176 |
+
"step": 569
|
| 4177 |
+
},
|
| 4178 |
+
{
|
| 4179 |
+
"epoch": 1.8707692307692307,
|
| 4180 |
+
"grad_norm": 0.003597427159547806,
|
| 4181 |
+
"learning_rate": 2.1202100112360894e-05,
|
| 4182 |
+
"loss": 0.0048,
|
| 4183 |
+
"step": 570
|
| 4184 |
+
},
|
| 4185 |
+
{
|
| 4186 |
+
"epoch": 1.874051282051282,
|
| 4187 |
+
"grad_norm": 0.0029398370534181595,
|
| 4188 |
+
"learning_rate": 2.1123460147868763e-05,
|
| 4189 |
+
"loss": 0.0048,
|
| 4190 |
+
"step": 571
|
| 4191 |
+
},
|
| 4192 |
+
{
|
| 4193 |
+
"epoch": 1.8773333333333333,
|
| 4194 |
+
"grad_norm": 0.003072077641263604,
|
| 4195 |
+
"learning_rate": 2.1047464057901542e-05,
|
| 4196 |
+
"loss": 0.0048,
|
| 4197 |
+
"step": 572
|
| 4198 |
+
},
|
| 4199 |
+
{
|
| 4200 |
+
"epoch": 1.8806153846153846,
|
| 4201 |
+
"grad_norm": 0.002605011221021414,
|
| 4202 |
+
"learning_rate": 2.0974114103169712e-05,
|
| 4203 |
+
"loss": 0.0048,
|
| 4204 |
+
"step": 573
|
| 4205 |
+
},
|
| 4206 |
+
{
|
| 4207 |
+
"epoch": 1.8838974358974359,
|
| 4208 |
+
"grad_norm": 0.002371675567701459,
|
| 4209 |
+
"learning_rate": 2.0903412465667293e-05,
|
| 4210 |
+
"loss": 0.0047,
|
| 4211 |
+
"step": 574
|
| 4212 |
+
},
|
| 4213 |
+
{
|
| 4214 |
+
"epoch": 1.8871794871794871,
|
| 4215 |
+
"grad_norm": 0.002911495743319392,
|
| 4216 |
+
"learning_rate": 2.0835361248606867e-05,
|
| 4217 |
+
"loss": 0.0047,
|
| 4218 |
+
"step": 575
|
| 4219 |
+
},
|
| 4220 |
+
{
|
| 4221 |
+
"epoch": 1.8871794871794871,
|
| 4222 |
+
"eval_loss": 0.0050178528763353825,
|
| 4223 |
+
"eval_runtime": 1.0828,
|
| 4224 |
+
"eval_samples_per_second": 46.176,
|
| 4225 |
+
"eval_steps_per_second": 46.176,
|
| 4226 |
+
"step": 575
|
| 4227 |
+
},
|
| 4228 |
+
{
|
| 4229 |
+
"epoch": 1.8904615384615384,
|
| 4230 |
+
"grad_norm": 0.0025259945541620255,
|
| 4231 |
+
"learning_rate": 2.0769962476357068e-05,
|
| 4232 |
+
"loss": 0.0047,
|
| 4233 |
+
"step": 576
|
| 4234 |
+
},
|
| 4235 |
+
{
|
| 4236 |
+
"epoch": 1.8937435897435897,
|
| 4237 |
+
"grad_norm": 0.0023200158029794693,
|
| 4238 |
+
"learning_rate": 2.070721809438233e-05,
|
| 4239 |
+
"loss": 0.0047,
|
| 4240 |
+
"step": 577
|
| 4241 |
+
},
|
| 4242 |
+
{
|
| 4243 |
+
"epoch": 1.897025641025641,
|
| 4244 |
+
"grad_norm": 0.0023292931728065014,
|
| 4245 |
+
"learning_rate": 2.0647129969185046e-05,
|
| 4246 |
+
"loss": 0.0048,
|
| 4247 |
+
"step": 578
|
| 4248 |
+
},
|
| 4249 |
+
{
|
| 4250 |
+
"epoch": 1.9003076923076923,
|
| 4251 |
+
"grad_norm": 0.0025951117277145386,
|
| 4252 |
+
"learning_rate": 2.058969988825001e-05,
|
| 4253 |
+
"loss": 0.0047,
|
| 4254 |
+
"step": 579
|
| 4255 |
+
},
|
| 4256 |
+
{
|
| 4257 |
+
"epoch": 1.9035897435897438,
|
| 4258 |
+
"grad_norm": 0.0026415924075990915,
|
| 4259 |
+
"learning_rate": 2.0534929559991233e-05,
|
| 4260 |
+
"loss": 0.0047,
|
| 4261 |
+
"step": 580
|
| 4262 |
+
},
|
| 4263 |
+
{
|
| 4264 |
+
"epoch": 1.9068717948717948,
|
| 4265 |
+
"grad_norm": 0.0020874382462352514,
|
| 4266 |
+
"learning_rate": 2.0482820613701192e-05,
|
| 4267 |
+
"loss": 0.0046,
|
| 4268 |
+
"step": 581
|
| 4269 |
+
},
|
| 4270 |
+
{
|
| 4271 |
+
"epoch": 1.9101538461538463,
|
| 4272 |
+
"grad_norm": 0.002052360912784934,
|
| 4273 |
+
"learning_rate": 2.043337459950229e-05,
|
| 4274 |
+
"loss": 0.0046,
|
| 4275 |
+
"step": 582
|
| 4276 |
+
},
|
| 4277 |
+
{
|
| 4278 |
+
"epoch": 1.9134358974358974,
|
| 4279 |
+
"grad_norm": 0.0021120973397046328,
|
| 4280 |
+
"learning_rate": 2.0386592988300747e-05,
|
| 4281 |
+
"loss": 0.0046,
|
| 4282 |
+
"step": 583
|
| 4283 |
+
},
|
| 4284 |
+
{
|
| 4285 |
+
"epoch": 1.9167179487179489,
|
| 4286 |
+
"grad_norm": 0.0021454044617712498,
|
| 4287 |
+
"learning_rate": 2.03424771717429e-05,
|
| 4288 |
+
"loss": 0.0047,
|
| 4289 |
+
"step": 584
|
| 4290 |
+
},
|
| 4291 |
+
{
|
| 4292 |
+
"epoch": 1.92,
|
| 4293 |
+
"grad_norm": 0.0023362315259873867,
|
| 4294 |
+
"learning_rate": 2.0301028462173774e-05,
|
| 4295 |
+
"loss": 0.0048,
|
| 4296 |
+
"step": 585
|
| 4297 |
+
},
|
| 4298 |
+
{
|
| 4299 |
+
"epoch": 1.9232820512820514,
|
| 4300 |
+
"grad_norm": 0.002209689933806658,
|
| 4301 |
+
"learning_rate": 2.0262248092598006e-05,
|
| 4302 |
+
"loss": 0.0048,
|
| 4303 |
+
"step": 586
|
| 4304 |
+
},
|
| 4305 |
+
{
|
| 4306 |
+
"epoch": 1.9265641025641025,
|
| 4307 |
+
"grad_norm": 0.0022381660528481007,
|
| 4308 |
+
"learning_rate": 2.0226137216643222e-05,
|
| 4309 |
+
"loss": 0.0048,
|
| 4310 |
+
"step": 587
|
| 4311 |
+
},
|
| 4312 |
+
{
|
| 4313 |
+
"epoch": 1.929846153846154,
|
| 4314 |
+
"grad_norm": 0.002202109433710575,
|
| 4315 |
+
"learning_rate": 2.019269690852569e-05,
|
| 4316 |
+
"loss": 0.0047,
|
| 4317 |
+
"step": 588
|
| 4318 |
+
},
|
| 4319 |
+
{
|
| 4320 |
+
"epoch": 1.933128205128205,
|
| 4321 |
+
"grad_norm": 0.0021981867030262947,
|
| 4322 |
+
"learning_rate": 2.016192816301837e-05,
|
| 4323 |
+
"loss": 0.0046,
|
| 4324 |
+
"step": 589
|
| 4325 |
+
},
|
| 4326 |
+
{
|
| 4327 |
+
"epoch": 1.9364102564102565,
|
| 4328 |
+
"grad_norm": 0.002059696475043893,
|
| 4329 |
+
"learning_rate": 2.0133831895421322e-05,
|
| 4330 |
+
"loss": 0.0047,
|
| 4331 |
+
"step": 590
|
| 4332 |
+
},
|
| 4333 |
+
{
|
| 4334 |
+
"epoch": 1.9396923076923076,
|
| 4335 |
+
"grad_norm": 0.0020739359315484762,
|
| 4336 |
+
"learning_rate": 2.0108408941534486e-05,
|
| 4337 |
+
"loss": 0.0046,
|
| 4338 |
+
"step": 591
|
| 4339 |
+
},
|
| 4340 |
+
{
|
| 4341 |
+
"epoch": 1.942974358974359,
|
| 4342 |
+
"grad_norm": 0.0024034185335040092,
|
| 4343 |
+
"learning_rate": 2.00856600576328e-05,
|
| 4344 |
+
"loss": 0.0047,
|
| 4345 |
+
"step": 592
|
| 4346 |
+
},
|
| 4347 |
+
{
|
| 4348 |
+
"epoch": 1.9462564102564102,
|
| 4349 |
+
"grad_norm": 0.0022281610872596502,
|
| 4350 |
+
"learning_rate": 2.006558592044373e-05,
|
| 4351 |
+
"loss": 0.0048,
|
| 4352 |
+
"step": 593
|
| 4353 |
+
},
|
| 4354 |
+
{
|
| 4355 |
+
"epoch": 1.9495384615384617,
|
| 4356 |
+
"grad_norm": 0.0029593328945338726,
|
| 4357 |
+
"learning_rate": 2.0048187127127092e-05,
|
| 4358 |
+
"loss": 0.0049,
|
| 4359 |
+
"step": 594
|
| 4360 |
+
},
|
| 4361 |
+
{
|
| 4362 |
+
"epoch": 1.9528205128205127,
|
| 4363 |
+
"grad_norm": 0.002573527628555894,
|
| 4364 |
+
"learning_rate": 2.003346419525735e-05,
|
| 4365 |
+
"loss": 0.0048,
|
| 4366 |
+
"step": 595
|
| 4367 |
+
},
|
| 4368 |
+
{
|
| 4369 |
+
"epoch": 1.9561025641025642,
|
| 4370 |
+
"grad_norm": 0.002822197275236249,
|
| 4371 |
+
"learning_rate": 2.002141756280818e-05,
|
| 4372 |
+
"loss": 0.0047,
|
| 4373 |
+
"step": 596
|
| 4374 |
+
},
|
| 4375 |
+
{
|
| 4376 |
+
"epoch": 1.9593846153846153,
|
| 4377 |
+
"grad_norm": 0.002600959734991193,
|
| 4378 |
+
"learning_rate": 2.001204758813944e-05,
|
| 4379 |
+
"loss": 0.0047,
|
| 4380 |
+
"step": 597
|
| 4381 |
+
},
|
| 4382 |
+
{
|
| 4383 |
+
"epoch": 1.9626666666666668,
|
| 4384 |
+
"grad_norm": 0.003187810303643346,
|
| 4385 |
+
"learning_rate": 2.0005354549986523e-05,
|
| 4386 |
+
"loss": 0.0047,
|
| 4387 |
+
"step": 598
|
| 4388 |
+
},
|
| 4389 |
+
{
|
| 4390 |
+
"epoch": 1.9659487179487178,
|
| 4391 |
+
"grad_norm": 0.0029263379983603954,
|
| 4392 |
+
"learning_rate": 2.0001338647452058e-05,
|
| 4393 |
+
"loss": 0.0048,
|
| 4394 |
+
"step": 599
|
| 4395 |
+
},
|
| 4396 |
+
{
|
| 4397 |
+
"epoch": 1.9692307692307693,
|
| 4398 |
+
"grad_norm": 0.003195718163624406,
|
| 4399 |
+
"learning_rate": 2e-05,
|
| 4400 |
+
"loss": 0.0048,
|
| 4401 |
+
"step": 600
|
| 4402 |
+
},
|
| 4403 |
+
{
|
| 4404 |
+
"epoch": 1.9692307692307693,
|
| 4405 |
+
"eval_loss": 0.004978457931429148,
|
| 4406 |
+
"eval_runtime": 1.0723,
|
| 4407 |
+
"eval_samples_per_second": 46.628,
|
| 4408 |
+
"eval_steps_per_second": 46.628,
|
| 4409 |
+
"step": 600
|
| 4410 |
}
|
| 4411 |
],
|
| 4412 |
"logging_steps": 1,
|
|
|
|
| 4421 |
"early_stopping_threshold": 0.0
|
| 4422 |
},
|
| 4423 |
"attributes": {
|
| 4424 |
+
"early_stopping_patience_counter": 3
|
| 4425 |
}
|
| 4426 |
},
|
| 4427 |
"TrainerControl": {
|
|
|
|
| 4430 |
"should_evaluate": false,
|
| 4431 |
"should_log": false,
|
| 4432 |
"should_save": true,
|
| 4433 |
+
"should_training_stop": true
|
| 4434 |
},
|
| 4435 |
"attributes": {}
|
| 4436 |
}
|
| 4437 |
},
|
| 4438 |
+
"total_flos": 3.654552359691878e+16,
|
| 4439 |
"train_batch_size": 1,
|
| 4440 |
"trial_name": null,
|
| 4441 |
"trial_params": null
|