Training in progress, step 592, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 60010048
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:727ef6499c3b2029862b640bd0783bcd2d1d77402e26d95bca288a369396ffdf
|
| 3 |
size 60010048
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 120213058
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36f40407089333f17b49c25519b1a1948c002a6cf29f0dd3e129ac1c5424f9b5
|
| 3 |
size 120213058
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da699b9d89f916da840476c4663b34c3317d4235c857b22c694b924a7e8d83f6
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e426754c87d665fc6a368f5f0109f66999a2f562e7ade96fcf16cbb316f70e2
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.493194580078125,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-550",
|
| 4 |
-
"epoch": 1.
|
| 5 |
"eval_steps": 25,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -4041,6 +4041,308 @@
|
|
| 4041 |
"eval_samples_per_second": 26.671,
|
| 4042 |
"eval_steps_per_second": 26.671,
|
| 4043 |
"step": 550
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4044 |
}
|
| 4045 |
],
|
| 4046 |
"logging_steps": 1,
|
|
@@ -4055,7 +4357,7 @@
|
|
| 4055 |
"early_stopping_threshold": 0.0
|
| 4056 |
},
|
| 4057 |
"attributes": {
|
| 4058 |
-
"early_stopping_patience_counter":
|
| 4059 |
}
|
| 4060 |
},
|
| 4061 |
"TrainerControl": {
|
|
@@ -4064,12 +4366,12 @@
|
|
| 4064 |
"should_evaluate": false,
|
| 4065 |
"should_log": false,
|
| 4066 |
"should_save": true,
|
| 4067 |
-
"should_training_stop":
|
| 4068 |
},
|
| 4069 |
"attributes": {}
|
| 4070 |
}
|
| 4071 |
},
|
| 4072 |
-
"total_flos": 1.
|
| 4073 |
"train_batch_size": 1,
|
| 4074 |
"trial_name": null,
|
| 4075 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.493194580078125,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-550",
|
| 4 |
+
"epoch": 1.2830342025059263,
|
| 5 |
"eval_steps": 25,
|
| 6 |
+
"global_step": 592,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 4041 |
"eval_samples_per_second": 26.671,
|
| 4042 |
"eval_steps_per_second": 26.671,
|
| 4043 |
"step": 550
|
| 4044 |
+
},
|
| 4045 |
+
{
|
| 4046 |
+
"epoch": 1.1941754148323738,
|
| 4047 |
+
"grad_norm": 0.745163083076477,
|
| 4048 |
+
"learning_rate": 2.2296148993937625e-05,
|
| 4049 |
+
"loss": 1.5242,
|
| 4050 |
+
"step": 551
|
| 4051 |
+
},
|
| 4052 |
+
{
|
| 4053 |
+
"epoch": 1.196342702336607,
|
| 4054 |
+
"grad_norm": 0.7508656978607178,
|
| 4055 |
+
"learning_rate": 2.2185957798587907e-05,
|
| 4056 |
+
"loss": 1.4407,
|
| 4057 |
+
"step": 552
|
| 4058 |
+
},
|
| 4059 |
+
{
|
| 4060 |
+
"epoch": 1.1985099898408398,
|
| 4061 |
+
"grad_norm": 0.7356210350990295,
|
| 4062 |
+
"learning_rate": 2.20784435397923e-05,
|
| 4063 |
+
"loss": 1.1741,
|
| 4064 |
+
"step": 553
|
| 4065 |
+
},
|
| 4066 |
+
{
|
| 4067 |
+
"epoch": 1.200677277345073,
|
| 4068 |
+
"grad_norm": 0.817733883857727,
|
| 4069 |
+
"learning_rate": 2.1973609495031733e-05,
|
| 4070 |
+
"loss": 1.2159,
|
| 4071 |
+
"step": 554
|
| 4072 |
+
},
|
| 4073 |
+
{
|
| 4074 |
+
"epoch": 1.2028445648493058,
|
| 4075 |
+
"grad_norm": 0.7189146876335144,
|
| 4076 |
+
"learning_rate": 2.18714588600831e-05,
|
| 4077 |
+
"loss": 0.9351,
|
| 4078 |
+
"step": 555
|
| 4079 |
+
},
|
| 4080 |
+
{
|
| 4081 |
+
"epoch": 1.2050118523535387,
|
| 4082 |
+
"grad_norm": 0.7796960473060608,
|
| 4083 |
+
"learning_rate": 2.177199474892181e-05,
|
| 4084 |
+
"loss": 1.0885,
|
| 4085 |
+
"step": 556
|
| 4086 |
+
},
|
| 4087 |
+
{
|
| 4088 |
+
"epoch": 1.2071791398577718,
|
| 4089 |
+
"grad_norm": 0.8709949851036072,
|
| 4090 |
+
"learning_rate": 2.1675220193626897e-05,
|
| 4091 |
+
"loss": 1.0783,
|
| 4092 |
+
"step": 557
|
| 4093 |
+
},
|
| 4094 |
+
{
|
| 4095 |
+
"epoch": 1.2093464273620047,
|
| 4096 |
+
"grad_norm": 0.8464658856391907,
|
| 4097 |
+
"learning_rate": 2.1581138144288525e-05,
|
| 4098 |
+
"loss": 0.9859,
|
| 4099 |
+
"step": 558
|
| 4100 |
+
},
|
| 4101 |
+
{
|
| 4102 |
+
"epoch": 1.2115137148662378,
|
| 4103 |
+
"grad_norm": 0.9255772829055786,
|
| 4104 |
+
"learning_rate": 2.148975146891817e-05,
|
| 4105 |
+
"loss": 0.94,
|
| 4106 |
+
"step": 559
|
| 4107 |
+
},
|
| 4108 |
+
{
|
| 4109 |
+
"epoch": 1.2136810023704707,
|
| 4110 |
+
"grad_norm": 0.9168251156806946,
|
| 4111 |
+
"learning_rate": 2.140106295336103e-05,
|
| 4112 |
+
"loss": 0.6344,
|
| 4113 |
+
"step": 560
|
| 4114 |
+
},
|
| 4115 |
+
{
|
| 4116 |
+
"epoch": 1.2158482898747036,
|
| 4117 |
+
"grad_norm": 0.9316290020942688,
|
| 4118 |
+
"learning_rate": 2.1315075301211272e-05,
|
| 4119 |
+
"loss": 0.6167,
|
| 4120 |
+
"step": 561
|
| 4121 |
+
},
|
| 4122 |
+
{
|
| 4123 |
+
"epoch": 1.2180155773789367,
|
| 4124 |
+
"grad_norm": 0.5710924863815308,
|
| 4125 |
+
"learning_rate": 2.123179113372946e-05,
|
| 4126 |
+
"loss": 1.06,
|
| 4127 |
+
"step": 562
|
| 4128 |
+
},
|
| 4129 |
+
{
|
| 4130 |
+
"epoch": 1.2201828648831696,
|
| 4131 |
+
"grad_norm": 0.4053063690662384,
|
| 4132 |
+
"learning_rate": 2.1151212989762815e-05,
|
| 4133 |
+
"loss": 2.0825,
|
| 4134 |
+
"step": 563
|
| 4135 |
+
},
|
| 4136 |
+
{
|
| 4137 |
+
"epoch": 1.2223501523874027,
|
| 4138 |
+
"grad_norm": 0.3882395923137665,
|
| 4139 |
+
"learning_rate": 2.1073343325667632e-05,
|
| 4140 |
+
"loss": 1.9561,
|
| 4141 |
+
"step": 564
|
| 4142 |
+
},
|
| 4143 |
+
{
|
| 4144 |
+
"epoch": 1.2245174398916356,
|
| 4145 |
+
"grad_norm": 0.40940290689468384,
|
| 4146 |
+
"learning_rate": 2.0998184515234558e-05,
|
| 4147 |
+
"loss": 1.8549,
|
| 4148 |
+
"step": 565
|
| 4149 |
+
},
|
| 4150 |
+
{
|
| 4151 |
+
"epoch": 1.2266847273958685,
|
| 4152 |
+
"grad_norm": 0.4803662896156311,
|
| 4153 |
+
"learning_rate": 2.0925738849616136e-05,
|
| 4154 |
+
"loss": 1.9017,
|
| 4155 |
+
"step": 566
|
| 4156 |
+
},
|
| 4157 |
+
{
|
| 4158 |
+
"epoch": 1.2288520149001017,
|
| 4159 |
+
"grad_norm": 0.4790099561214447,
|
| 4160 |
+
"learning_rate": 2.0856008537257003e-05,
|
| 4161 |
+
"loss": 1.8168,
|
| 4162 |
+
"step": 567
|
| 4163 |
+
},
|
| 4164 |
+
{
|
| 4165 |
+
"epoch": 1.2310193024043345,
|
| 4166 |
+
"grad_norm": 0.4733673334121704,
|
| 4167 |
+
"learning_rate": 2.078899570382657e-05,
|
| 4168 |
+
"loss": 1.9583,
|
| 4169 |
+
"step": 568
|
| 4170 |
+
},
|
| 4171 |
+
{
|
| 4172 |
+
"epoch": 1.2331865899085677,
|
| 4173 |
+
"grad_norm": 0.48890408873558044,
|
| 4174 |
+
"learning_rate": 2.0724702392154168e-05,
|
| 4175 |
+
"loss": 1.9115,
|
| 4176 |
+
"step": 569
|
| 4177 |
+
},
|
| 4178 |
+
{
|
| 4179 |
+
"epoch": 1.2353538774128006,
|
| 4180 |
+
"grad_norm": 0.4990405738353729,
|
| 4181 |
+
"learning_rate": 2.0663130562166867e-05,
|
| 4182 |
+
"loss": 1.6618,
|
| 4183 |
+
"step": 570
|
| 4184 |
+
},
|
| 4185 |
+
{
|
| 4186 |
+
"epoch": 1.2375211649170335,
|
| 4187 |
+
"grad_norm": 0.5173394083976746,
|
| 4188 |
+
"learning_rate": 2.0604282090829626e-05,
|
| 4189 |
+
"loss": 1.62,
|
| 4190 |
+
"step": 571
|
| 4191 |
+
},
|
| 4192 |
+
{
|
| 4193 |
+
"epoch": 1.2396884524212666,
|
| 4194 |
+
"grad_norm": 0.47291800379753113,
|
| 4195 |
+
"learning_rate": 2.0548158772088183e-05,
|
| 4196 |
+
"loss": 1.6012,
|
| 4197 |
+
"step": 572
|
| 4198 |
+
},
|
| 4199 |
+
{
|
| 4200 |
+
"epoch": 1.2418557399254995,
|
| 4201 |
+
"grad_norm": 0.5543546676635742,
|
| 4202 |
+
"learning_rate": 2.0494762316814265e-05,
|
| 4203 |
+
"loss": 2.2266,
|
| 4204 |
+
"step": 573
|
| 4205 |
+
},
|
| 4206 |
+
{
|
| 4207 |
+
"epoch": 1.2440230274297326,
|
| 4208 |
+
"grad_norm": 0.5711101293563843,
|
| 4209 |
+
"learning_rate": 2.044409435275349e-05,
|
| 4210 |
+
"loss": 2.1061,
|
| 4211 |
+
"step": 574
|
| 4212 |
+
},
|
| 4213 |
+
{
|
| 4214 |
+
"epoch": 1.2461903149339655,
|
| 4215 |
+
"grad_norm": 0.5323336720466614,
|
| 4216 |
+
"learning_rate": 2.0396156424475758e-05,
|
| 4217 |
+
"loss": 1.7618,
|
| 4218 |
+
"step": 575
|
| 4219 |
+
},
|
| 4220 |
+
{
|
| 4221 |
+
"epoch": 1.2461903149339655,
|
| 4222 |
+
"eval_loss": 1.4941222667694092,
|
| 4223 |
+
"eval_runtime": 1.9256,
|
| 4224 |
+
"eval_samples_per_second": 25.965,
|
| 4225 |
+
"eval_steps_per_second": 25.965,
|
| 4226 |
+
"step": 575
|
| 4227 |
+
},
|
| 4228 |
+
{
|
| 4229 |
+
"epoch": 1.2483576024381984,
|
| 4230 |
+
"grad_norm": 0.5417742729187012,
|
| 4231 |
+
"learning_rate": 2.035094999332813e-05,
|
| 4232 |
+
"loss": 1.9826,
|
| 4233 |
+
"step": 576
|
| 4234 |
+
},
|
| 4235 |
+
{
|
| 4236 |
+
"epoch": 1.2505248899424315,
|
| 4237 |
+
"grad_norm": 0.5711143016815186,
|
| 4238 |
+
"learning_rate": 2.0308476437390292e-05,
|
| 4239 |
+
"loss": 2.1701,
|
| 4240 |
+
"step": 577
|
| 4241 |
+
},
|
| 4242 |
+
{
|
| 4243 |
+
"epoch": 1.2526921774466644,
|
| 4244 |
+
"grad_norm": 0.5723181962966919,
|
| 4245 |
+
"learning_rate": 2.0268737051432534e-05,
|
| 4246 |
+
"loss": 1.8376,
|
| 4247 |
+
"step": 578
|
| 4248 |
+
},
|
| 4249 |
+
{
|
| 4250 |
+
"epoch": 1.2548594649508975,
|
| 4251 |
+
"grad_norm": 0.6304724216461182,
|
| 4252 |
+
"learning_rate": 2.0231733046876336e-05,
|
| 4253 |
+
"loss": 2.2109,
|
| 4254 |
+
"step": 579
|
| 4255 |
+
},
|
| 4256 |
+
{
|
| 4257 |
+
"epoch": 1.2570267524551304,
|
| 4258 |
+
"grad_norm": 0.49579569697380066,
|
| 4259 |
+
"learning_rate": 2.019746555175737e-05,
|
| 4260 |
+
"loss": 1.8671,
|
| 4261 |
+
"step": 580
|
| 4262 |
+
},
|
| 4263 |
+
{
|
| 4264 |
+
"epoch": 1.2591940399593633,
|
| 4265 |
+
"grad_norm": 0.5718152523040771,
|
| 4266 |
+
"learning_rate": 2.0165935610691136e-05,
|
| 4267 |
+
"loss": 2.0108,
|
| 4268 |
+
"step": 581
|
| 4269 |
+
},
|
| 4270 |
+
{
|
| 4271 |
+
"epoch": 1.2613613274635964,
|
| 4272 |
+
"grad_norm": 0.5495701432228088,
|
| 4273 |
+
"learning_rate": 2.0137144184841137e-05,
|
| 4274 |
+
"loss": 1.7419,
|
| 4275 |
+
"step": 582
|
| 4276 |
+
},
|
| 4277 |
+
{
|
| 4278 |
+
"epoch": 1.2635286149678293,
|
| 4279 |
+
"grad_norm": 0.5827396512031555,
|
| 4280 |
+
"learning_rate": 2.0111092151889548e-05,
|
| 4281 |
+
"loss": 2.0878,
|
| 4282 |
+
"step": 583
|
| 4283 |
+
},
|
| 4284 |
+
{
|
| 4285 |
+
"epoch": 1.2656959024720624,
|
| 4286 |
+
"grad_norm": 0.5941032767295837,
|
| 4287 |
+
"learning_rate": 2.00877803060105e-05,
|
| 4288 |
+
"loss": 2.0261,
|
| 4289 |
+
"step": 584
|
| 4290 |
+
},
|
| 4291 |
+
{
|
| 4292 |
+
"epoch": 1.2678631899762953,
|
| 4293 |
+
"grad_norm": 0.6075966954231262,
|
| 4294 |
+
"learning_rate": 2.006720935784581e-05,
|
| 4295 |
+
"loss": 1.9424,
|
| 4296 |
+
"step": 585
|
| 4297 |
+
},
|
| 4298 |
+
{
|
| 4299 |
+
"epoch": 1.2700304774805282,
|
| 4300 |
+
"grad_norm": 0.6470325589179993,
|
| 4301 |
+
"learning_rate": 2.0049379934483398e-05,
|
| 4302 |
+
"loss": 2.0216,
|
| 4303 |
+
"step": 586
|
| 4304 |
+
},
|
| 4305 |
+
{
|
| 4306 |
+
"epoch": 1.2721977649847613,
|
| 4307 |
+
"grad_norm": 0.5846126675605774,
|
| 4308 |
+
"learning_rate": 2.0034292579438092e-05,
|
| 4309 |
+
"loss": 1.9047,
|
| 4310 |
+
"step": 587
|
| 4311 |
+
},
|
| 4312 |
+
{
|
| 4313 |
+
"epoch": 1.2743650524889942,
|
| 4314 |
+
"grad_norm": 0.6144323945045471,
|
| 4315 |
+
"learning_rate": 2.00219477526351e-05,
|
| 4316 |
+
"loss": 1.897,
|
| 4317 |
+
"step": 588
|
| 4318 |
+
},
|
| 4319 |
+
{
|
| 4320 |
+
"epoch": 1.2765323399932273,
|
| 4321 |
+
"grad_norm": 0.5959208607673645,
|
| 4322 |
+
"learning_rate": 2.0012345830396012e-05,
|
| 4323 |
+
"loss": 1.7803,
|
| 4324 |
+
"step": 589
|
| 4325 |
+
},
|
| 4326 |
+
{
|
| 4327 |
+
"epoch": 1.2786996274974602,
|
| 4328 |
+
"grad_norm": 0.6328864097595215,
|
| 4329 |
+
"learning_rate": 2.0005487105427258e-05,
|
| 4330 |
+
"loss": 1.8311,
|
| 4331 |
+
"step": 590
|
| 4332 |
+
},
|
| 4333 |
+
{
|
| 4334 |
+
"epoch": 1.2808669150016931,
|
| 4335 |
+
"grad_norm": 0.6484797596931458,
|
| 4336 |
+
"learning_rate": 2.0001371786811258e-05,
|
| 4337 |
+
"loss": 1.9282,
|
| 4338 |
+
"step": 591
|
| 4339 |
+
},
|
| 4340 |
+
{
|
| 4341 |
+
"epoch": 1.2830342025059263,
|
| 4342 |
+
"grad_norm": 0.6196519136428833,
|
| 4343 |
+
"learning_rate": 2e-05,
|
| 4344 |
+
"loss": 1.7637,
|
| 4345 |
+
"step": 592
|
| 4346 |
}
|
| 4347 |
],
|
| 4348 |
"logging_steps": 1,
|
|
|
|
| 4357 |
"early_stopping_threshold": 0.0
|
| 4358 |
},
|
| 4359 |
"attributes": {
|
| 4360 |
+
"early_stopping_patience_counter": 1
|
| 4361 |
}
|
| 4362 |
},
|
| 4363 |
"TrainerControl": {
|
|
|
|
| 4366 |
"should_evaluate": false,
|
| 4367 |
"should_log": false,
|
| 4368 |
"should_save": true,
|
| 4369 |
+
"should_training_stop": true
|
| 4370 |
},
|
| 4371 |
"attributes": {}
|
| 4372 |
}
|
| 4373 |
},
|
| 4374 |
+
"total_flos": 1.943692451244933e+17,
|
| 4375 |
"train_batch_size": 1,
|
| 4376 |
"trial_name": null,
|
| 4377 |
"trial_params": null
|