Training in progress, step 41612, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 83945296
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa5577fd1b503ec752baeafdf2de06e57a83deb0e1e57a41cda45bf9427c95e9
|
| 3 |
size 83945296
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 168150738
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e723c05be219624f96a2ddaa266b6270ac90d0bba2a1e5f85e21a833ea30c99b
|
| 3 |
size 168150738
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:45deb07b720f6c59d34e29c1d4b4f3b955196381388fc9d375a97ed58b1dc34e
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1000
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:733fb082b772b237d84bcc94add0fd700f3286fcadb7278845beeba39bafa7fe
|
| 3 |
size 1000
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 4619,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -291184,6 +291184,293 @@
|
|
| 291184 |
"eval_test_samples_per_second": 12.589,
|
| 291185 |
"eval_test_steps_per_second": 0.787,
|
| 291186 |
"step": 41571
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291187 |
}
|
| 291188 |
],
|
| 291189 |
"logging_steps": 1,
|
|
@@ -291198,12 +291485,12 @@
|
|
| 291198 |
"should_evaluate": false,
|
| 291199 |
"should_log": false,
|
| 291200 |
"should_save": true,
|
| 291201 |
-
"should_training_stop":
|
| 291202 |
},
|
| 291203 |
"attributes": {}
|
| 291204 |
}
|
| 291205 |
},
|
| 291206 |
-
"total_flos": 1.
|
| 291207 |
"train_batch_size": 16,
|
| 291208 |
"trial_name": null,
|
| 291209 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 4.0,
|
| 5 |
"eval_steps": 4619,
|
| 6 |
+
"global_step": 41612,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 291184 |
"eval_test_samples_per_second": 12.589,
|
| 291185 |
"eval_test_steps_per_second": 0.787,
|
| 291186 |
"step": 41571
|
| 291187 |
+
},
|
| 291188 |
+
{
|
| 291189 |
+
"epoch": 3.996154955301355,
|
| 291190 |
+
"grad_norm": 1.3619403839111328,
|
| 291191 |
+
"learning_rate": 3.00083601127463e-06,
|
| 291192 |
+
"loss": 1.9584,
|
| 291193 |
+
"step": 41572
|
| 291194 |
+
},
|
| 291195 |
+
{
|
| 291196 |
+
"epoch": 3.9962510814188215,
|
| 291197 |
+
"grad_norm": 1.1561639308929443,
|
| 291198 |
+
"learning_rate": 3.0007947332547655e-06,
|
| 291199 |
+
"loss": 1.902,
|
| 291200 |
+
"step": 41573
|
| 291201 |
+
},
|
| 291202 |
+
{
|
| 291203 |
+
"epoch": 3.996347207536288,
|
| 291204 |
+
"grad_norm": 1.2157913446426392,
|
| 291205 |
+
"learning_rate": 3.000754500244385e-06,
|
| 291206 |
+
"loss": 1.8448,
|
| 291207 |
+
"step": 41574
|
| 291208 |
+
},
|
| 291209 |
+
{
|
| 291210 |
+
"epoch": 3.9964433336537537,
|
| 291211 |
+
"grad_norm": 1.290919542312622,
|
| 291212 |
+
"learning_rate": 3.000715312243752e-06,
|
| 291213 |
+
"loss": 2.1262,
|
| 291214 |
+
"step": 41575
|
| 291215 |
+
},
|
| 291216 |
+
{
|
| 291217 |
+
"epoch": 3.9965394597712196,
|
| 291218 |
+
"grad_norm": 1.218934416770935,
|
| 291219 |
+
"learning_rate": 3.00067716925318e-06,
|
| 291220 |
+
"loss": 1.875,
|
| 291221 |
+
"step": 41576
|
| 291222 |
+
},
|
| 291223 |
+
{
|
| 291224 |
+
"epoch": 3.996635585888686,
|
| 291225 |
+
"grad_norm": 1.2158639430999756,
|
| 291226 |
+
"learning_rate": 3.0006400712729e-06,
|
| 291227 |
+
"loss": 1.8464,
|
| 291228 |
+
"step": 41577
|
| 291229 |
+
},
|
| 291230 |
+
{
|
| 291231 |
+
"epoch": 3.9967317120061523,
|
| 291232 |
+
"grad_norm": 1.2294507026672363,
|
| 291233 |
+
"learning_rate": 3.0006040183031917e-06,
|
| 291234 |
+
"loss": 1.8543,
|
| 291235 |
+
"step": 41578
|
| 291236 |
+
},
|
| 291237 |
+
{
|
| 291238 |
+
"epoch": 3.996827838123618,
|
| 291239 |
+
"grad_norm": 1.2768683433532715,
|
| 291240 |
+
"learning_rate": 3.0005690103443025e-06,
|
| 291241 |
+
"loss": 1.9559,
|
| 291242 |
+
"step": 41579
|
| 291243 |
+
},
|
| 291244 |
+
{
|
| 291245 |
+
"epoch": 3.996923964241084,
|
| 291246 |
+
"grad_norm": 1.1993067264556885,
|
| 291247 |
+
"learning_rate": 3.000535047396497e-06,
|
| 291248 |
+
"loss": 1.8008,
|
| 291249 |
+
"step": 41580
|
| 291250 |
+
},
|
| 291251 |
+
{
|
| 291252 |
+
"epoch": 3.9970200903585504,
|
| 291253 |
+
"grad_norm": 1.1862679719924927,
|
| 291254 |
+
"learning_rate": 3.000502129459989e-06,
|
| 291255 |
+
"loss": 1.7558,
|
| 291256 |
+
"step": 41581
|
| 291257 |
+
},
|
| 291258 |
+
{
|
| 291259 |
+
"epoch": 3.997116216476017,
|
| 291260 |
+
"grad_norm": 1.3213326930999756,
|
| 291261 |
+
"learning_rate": 3.000470256535026e-06,
|
| 291262 |
+
"loss": 1.879,
|
| 291263 |
+
"step": 41582
|
| 291264 |
+
},
|
| 291265 |
+
{
|
| 291266 |
+
"epoch": 3.9972123425934827,
|
| 291267 |
+
"grad_norm": 1.3456659317016602,
|
| 291268 |
+
"learning_rate": 3.0004394286218213e-06,
|
| 291269 |
+
"loss": 2.033,
|
| 291270 |
+
"step": 41583
|
| 291271 |
+
},
|
| 291272 |
+
{
|
| 291273 |
+
"epoch": 3.9973084687109486,
|
| 291274 |
+
"grad_norm": 1.140081524848938,
|
| 291275 |
+
"learning_rate": 3.0004096457205907e-06,
|
| 291276 |
+
"loss": 1.8206,
|
| 291277 |
+
"step": 41584
|
| 291278 |
+
},
|
| 291279 |
+
{
|
| 291280 |
+
"epoch": 3.997404594828415,
|
| 291281 |
+
"grad_norm": 1.2168374061584473,
|
| 291282 |
+
"learning_rate": 3.0003809078315644e-06,
|
| 291283 |
+
"loss": 1.7936,
|
| 291284 |
+
"step": 41585
|
| 291285 |
+
},
|
| 291286 |
+
{
|
| 291287 |
+
"epoch": 3.9975007209458813,
|
| 291288 |
+
"grad_norm": 1.3345106840133667,
|
| 291289 |
+
"learning_rate": 3.0003532149549234e-06,
|
| 291290 |
+
"loss": 1.8446,
|
| 291291 |
+
"step": 41586
|
| 291292 |
+
},
|
| 291293 |
+
{
|
| 291294 |
+
"epoch": 3.997596847063347,
|
| 291295 |
+
"grad_norm": 1.368496298789978,
|
| 291296 |
+
"learning_rate": 3.0003265670908657e-06,
|
| 291297 |
+
"loss": 1.8578,
|
| 291298 |
+
"step": 41587
|
| 291299 |
+
},
|
| 291300 |
+
{
|
| 291301 |
+
"epoch": 3.997692973180813,
|
| 291302 |
+
"grad_norm": 1.1256635189056396,
|
| 291303 |
+
"learning_rate": 3.00030096423959e-06,
|
| 291304 |
+
"loss": 1.6587,
|
| 291305 |
+
"step": 41588
|
| 291306 |
+
},
|
| 291307 |
+
{
|
| 291308 |
+
"epoch": 3.9977890992982794,
|
| 291309 |
+
"grad_norm": 1.1468359231948853,
|
| 291310 |
+
"learning_rate": 3.0002764064012764e-06,
|
| 291311 |
+
"loss": 1.8116,
|
| 291312 |
+
"step": 41589
|
| 291313 |
+
},
|
| 291314 |
+
{
|
| 291315 |
+
"epoch": 3.9978852254157453,
|
| 291316 |
+
"grad_norm": 1.3567719459533691,
|
| 291317 |
+
"learning_rate": 3.0002528935760904e-06,
|
| 291318 |
+
"loss": 2.0766,
|
| 291319 |
+
"step": 41590
|
| 291320 |
+
},
|
| 291321 |
+
{
|
| 291322 |
+
"epoch": 3.9979813515332117,
|
| 291323 |
+
"grad_norm": 1.2336657047271729,
|
| 291324 |
+
"learning_rate": 3.0002304257641807e-06,
|
| 291325 |
+
"loss": 1.8646,
|
| 291326 |
+
"step": 41591
|
| 291327 |
+
},
|
| 291328 |
+
{
|
| 291329 |
+
"epoch": 3.9980774776506776,
|
| 291330 |
+
"grad_norm": 1.2067503929138184,
|
| 291331 |
+
"learning_rate": 3.0002090029657445e-06,
|
| 291332 |
+
"loss": 1.7902,
|
| 291333 |
+
"step": 41592
|
| 291334 |
+
},
|
| 291335 |
+
{
|
| 291336 |
+
"epoch": 3.998173603768144,
|
| 291337 |
+
"grad_norm": 1.170020580291748,
|
| 291338 |
+
"learning_rate": 3.0001886251808972e-06,
|
| 291339 |
+
"loss": 1.8235,
|
| 291340 |
+
"step": 41593
|
| 291341 |
+
},
|
| 291342 |
+
{
|
| 291343 |
+
"epoch": 3.99826972988561,
|
| 291344 |
+
"grad_norm": 1.202685832977295,
|
| 291345 |
+
"learning_rate": 3.000169292409788e-06,
|
| 291346 |
+
"loss": 1.8446,
|
| 291347 |
+
"step": 41594
|
| 291348 |
+
},
|
| 291349 |
+
{
|
| 291350 |
+
"epoch": 3.998365856003076,
|
| 291351 |
+
"grad_norm": 1.1868846416473389,
|
| 291352 |
+
"learning_rate": 3.0001510046525807e-06,
|
| 291353 |
+
"loss": 1.9727,
|
| 291354 |
+
"step": 41595
|
| 291355 |
+
},
|
| 291356 |
+
{
|
| 291357 |
+
"epoch": 3.998461982120542,
|
| 291358 |
+
"grad_norm": 1.2645318508148193,
|
| 291359 |
+
"learning_rate": 3.0001337619093587e-06,
|
| 291360 |
+
"loss": 1.8983,
|
| 291361 |
+
"step": 41596
|
| 291362 |
+
},
|
| 291363 |
+
{
|
| 291364 |
+
"epoch": 3.9985581082380084,
|
| 291365 |
+
"grad_norm": 1.2477843761444092,
|
| 291366 |
+
"learning_rate": 3.000117564180286e-06,
|
| 291367 |
+
"loss": 1.9145,
|
| 291368 |
+
"step": 41597
|
| 291369 |
+
},
|
| 291370 |
+
{
|
| 291371 |
+
"epoch": 3.9986542343554743,
|
| 291372 |
+
"grad_norm": 1.0654802322387695,
|
| 291373 |
+
"learning_rate": 3.0001024114654626e-06,
|
| 291374 |
+
"loss": 1.8887,
|
| 291375 |
+
"step": 41598
|
| 291376 |
+
},
|
| 291377 |
+
{
|
| 291378 |
+
"epoch": 3.9987503604729406,
|
| 291379 |
+
"grad_norm": 1.2192732095718384,
|
| 291380 |
+
"learning_rate": 3.000088303764986e-06,
|
| 291381 |
+
"loss": 2.0114,
|
| 291382 |
+
"step": 41599
|
| 291383 |
+
},
|
| 291384 |
+
{
|
| 291385 |
+
"epoch": 3.9988464865904065,
|
| 291386 |
+
"grad_norm": 1.243283987045288,
|
| 291387 |
+
"learning_rate": 3.0000752410789563e-06,
|
| 291388 |
+
"loss": 1.9751,
|
| 291389 |
+
"step": 41600
|
| 291390 |
+
},
|
| 291391 |
+
{
|
| 291392 |
+
"epoch": 3.998942612707873,
|
| 291393 |
+
"grad_norm": 1.2228025197982788,
|
| 291394 |
+
"learning_rate": 3.0000632234074715e-06,
|
| 291395 |
+
"loss": 2.0104,
|
| 291396 |
+
"step": 41601
|
| 291397 |
+
},
|
| 291398 |
+
{
|
| 291399 |
+
"epoch": 3.999038738825339,
|
| 291400 |
+
"grad_norm": 1.0860533714294434,
|
| 291401 |
+
"learning_rate": 3.000052250750632e-06,
|
| 291402 |
+
"loss": 1.5139,
|
| 291403 |
+
"step": 41602
|
| 291404 |
+
},
|
| 291405 |
+
{
|
| 291406 |
+
"epoch": 3.999134864942805,
|
| 291407 |
+
"grad_norm": 1.1760326623916626,
|
| 291408 |
+
"learning_rate": 3.0000423231084854e-06,
|
| 291409 |
+
"loss": 1.7311,
|
| 291410 |
+
"step": 41603
|
| 291411 |
+
},
|
| 291412 |
+
{
|
| 291413 |
+
"epoch": 3.999230991060271,
|
| 291414 |
+
"grad_norm": 1.3011332750320435,
|
| 291415 |
+
"learning_rate": 3.0000334404811154e-06,
|
| 291416 |
+
"loss": 1.8016,
|
| 291417 |
+
"step": 41604
|
| 291418 |
+
},
|
| 291419 |
+
{
|
| 291420 |
+
"epoch": 3.999327117177737,
|
| 291421 |
+
"grad_norm": 1.3964388370513916,
|
| 291422 |
+
"learning_rate": 3.0000256028685704e-06,
|
| 291423 |
+
"loss": 1.9478,
|
| 291424 |
+
"step": 41605
|
| 291425 |
+
},
|
| 291426 |
+
{
|
| 291427 |
+
"epoch": 3.9994232432952033,
|
| 291428 |
+
"grad_norm": 1.4257408380508423,
|
| 291429 |
+
"learning_rate": 3.0000188102709344e-06,
|
| 291430 |
+
"loss": 1.9333,
|
| 291431 |
+
"step": 41606
|
| 291432 |
+
},
|
| 291433 |
+
{
|
| 291434 |
+
"epoch": 3.9995193694126696,
|
| 291435 |
+
"grad_norm": 1.2944358587265015,
|
| 291436 |
+
"learning_rate": 3.0000130626882386e-06,
|
| 291437 |
+
"loss": 1.9648,
|
| 291438 |
+
"step": 41607
|
| 291439 |
+
},
|
| 291440 |
+
{
|
| 291441 |
+
"epoch": 3.9996154955301355,
|
| 291442 |
+
"grad_norm": 1.1968276500701904,
|
| 291443 |
+
"learning_rate": 3.0000083601205175e-06,
|
| 291444 |
+
"loss": 1.7864,
|
| 291445 |
+
"step": 41608
|
| 291446 |
+
},
|
| 291447 |
+
{
|
| 291448 |
+
"epoch": 3.9997116216476014,
|
| 291449 |
+
"grad_norm": 1.2650930881500244,
|
| 291450 |
+
"learning_rate": 3.000004702567804e-06,
|
| 291451 |
+
"loss": 2.0297,
|
| 291452 |
+
"step": 41609
|
| 291453 |
+
},
|
| 291454 |
+
{
|
| 291455 |
+
"epoch": 3.9998077477650678,
|
| 291456 |
+
"grad_norm": 1.3876334428787231,
|
| 291457 |
+
"learning_rate": 3.0000020900301457e-06,
|
| 291458 |
+
"loss": 1.9798,
|
| 291459 |
+
"step": 41610
|
| 291460 |
+
},
|
| 291461 |
+
{
|
| 291462 |
+
"epoch": 3.999903873882534,
|
| 291463 |
+
"grad_norm": 1.294616460800171,
|
| 291464 |
+
"learning_rate": 3.0000005225075443e-06,
|
| 291465 |
+
"loss": 1.8283,
|
| 291466 |
+
"step": 41611
|
| 291467 |
+
},
|
| 291468 |
+
{
|
| 291469 |
+
"epoch": 4.0,
|
| 291470 |
+
"grad_norm": 1.340853214263916,
|
| 291471 |
+
"learning_rate": 2.9999999999999997e-06,
|
| 291472 |
+
"loss": 1.9542,
|
| 291473 |
+
"step": 41612
|
| 291474 |
}
|
| 291475 |
],
|
| 291476 |
"logging_steps": 1,
|
|
|
|
| 291485 |
"should_evaluate": false,
|
| 291486 |
"should_log": false,
|
| 291487 |
"should_save": true,
|
| 291488 |
+
"should_training_stop": true
|
| 291489 |
},
|
| 291490 |
"attributes": {}
|
| 291491 |
}
|
| 291492 |
},
|
| 291493 |
+
"total_flos": 1.0537773454858715e+19,
|
| 291494 |
"train_batch_size": 16,
|
| 291495 |
"trial_name": null,
|
| 291496 |
"trial_params": null
|