Training in progress, step 359, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 60010048
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cd7a11dd960decfde159b9ffedcb277804a1627b5b44f99755257d42961884c
|
| 3 |
size 60010048
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 30428180
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85eceb4a829aa4047fbf635b04070cf32f5480f395bbe1d6b5df070c2e3a1aac
|
| 3 |
size 30428180
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7dcd05dfbbdeba643c656ed11b36a8a6487d3151c9ac5ff333ebbd78351d6657
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2422,6 +2422,104 @@
|
|
| 2422 |
"learning_rate": 1.4393939393939396e-05,
|
| 2423 |
"loss": 1.1526,
|
| 2424 |
"step": 345
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2425 |
}
|
| 2426 |
],
|
| 2427 |
"logging_steps": 1,
|
|
@@ -2441,7 +2539,7 @@
|
|
| 2441 |
"attributes": {}
|
| 2442 |
}
|
| 2443 |
},
|
| 2444 |
-
"total_flos": 4.
|
| 2445 |
"train_batch_size": 4,
|
| 2446 |
"trial_name": null,
|
| 2447 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.9849108367626886,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 359,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2422 |
"learning_rate": 1.4393939393939396e-05,
|
| 2423 |
"loss": 1.1526,
|
| 2424 |
"step": 345
|
| 2425 |
+
},
|
| 2426 |
+
{
|
| 2427 |
+
"epoch": 0.9492455418381345,
|
| 2428 |
+
"grad_norm": 0.14177238941192627,
|
| 2429 |
+
"learning_rate": 1.3636363636363637e-05,
|
| 2430 |
+
"loss": 1.132,
|
| 2431 |
+
"step": 346
|
| 2432 |
+
},
|
| 2433 |
+
{
|
| 2434 |
+
"epoch": 0.9519890260631001,
|
| 2435 |
+
"grad_norm": 0.13835884630680084,
|
| 2436 |
+
"learning_rate": 1.287878787878788e-05,
|
| 2437 |
+
"loss": 1.1599,
|
| 2438 |
+
"step": 347
|
| 2439 |
+
},
|
| 2440 |
+
{
|
| 2441 |
+
"epoch": 0.9547325102880658,
|
| 2442 |
+
"grad_norm": 0.14390669763088226,
|
| 2443 |
+
"learning_rate": 1.2121212121212122e-05,
|
| 2444 |
+
"loss": 1.15,
|
| 2445 |
+
"step": 348
|
| 2446 |
+
},
|
| 2447 |
+
{
|
| 2448 |
+
"epoch": 0.9574759945130316,
|
| 2449 |
+
"grad_norm": 0.14811821281909943,
|
| 2450 |
+
"learning_rate": 1.1363636363636365e-05,
|
| 2451 |
+
"loss": 1.0759,
|
| 2452 |
+
"step": 349
|
| 2453 |
+
},
|
| 2454 |
+
{
|
| 2455 |
+
"epoch": 0.9602194787379973,
|
| 2456 |
+
"grad_norm": 0.14959345757961273,
|
| 2457 |
+
"learning_rate": 1.0606060606060607e-05,
|
| 2458 |
+
"loss": 1.126,
|
| 2459 |
+
"step": 350
|
| 2460 |
+
},
|
| 2461 |
+
{
|
| 2462 |
+
"epoch": 0.9629629629629629,
|
| 2463 |
+
"grad_norm": 0.14656995236873627,
|
| 2464 |
+
"learning_rate": 9.848484848484848e-06,
|
| 2465 |
+
"loss": 1.1341,
|
| 2466 |
+
"step": 351
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"epoch": 0.9657064471879286,
|
| 2470 |
+
"grad_norm": 0.14695106446743011,
|
| 2471 |
+
"learning_rate": 9.090909090909091e-06,
|
| 2472 |
+
"loss": 1.1259,
|
| 2473 |
+
"step": 352
|
| 2474 |
+
},
|
| 2475 |
+
{
|
| 2476 |
+
"epoch": 0.9684499314128944,
|
| 2477 |
+
"grad_norm": 0.14155460894107819,
|
| 2478 |
+
"learning_rate": 8.333333333333334e-06,
|
| 2479 |
+
"loss": 1.1503,
|
| 2480 |
+
"step": 353
|
| 2481 |
+
},
|
| 2482 |
+
{
|
| 2483 |
+
"epoch": 0.9711934156378601,
|
| 2484 |
+
"grad_norm": 0.1382407397031784,
|
| 2485 |
+
"learning_rate": 7.5757575757575764e-06,
|
| 2486 |
+
"loss": 1.1417,
|
| 2487 |
+
"step": 354
|
| 2488 |
+
},
|
| 2489 |
+
{
|
| 2490 |
+
"epoch": 0.9739368998628258,
|
| 2491 |
+
"grad_norm": 0.14089229702949524,
|
| 2492 |
+
"learning_rate": 6.818181818181818e-06,
|
| 2493 |
+
"loss": 1.1551,
|
| 2494 |
+
"step": 355
|
| 2495 |
+
},
|
| 2496 |
+
{
|
| 2497 |
+
"epoch": 0.9766803840877915,
|
| 2498 |
+
"grad_norm": 0.14886945486068726,
|
| 2499 |
+
"learning_rate": 6.060606060606061e-06,
|
| 2500 |
+
"loss": 1.0973,
|
| 2501 |
+
"step": 356
|
| 2502 |
+
},
|
| 2503 |
+
{
|
| 2504 |
+
"epoch": 0.9794238683127572,
|
| 2505 |
+
"grad_norm": 0.1485728621482849,
|
| 2506 |
+
"learning_rate": 5.303030303030304e-06,
|
| 2507 |
+
"loss": 1.1028,
|
| 2508 |
+
"step": 357
|
| 2509 |
+
},
|
| 2510 |
+
{
|
| 2511 |
+
"epoch": 0.9821673525377229,
|
| 2512 |
+
"grad_norm": 0.1496025174856186,
|
| 2513 |
+
"learning_rate": 4.5454545454545455e-06,
|
| 2514 |
+
"loss": 1.0941,
|
| 2515 |
+
"step": 358
|
| 2516 |
+
},
|
| 2517 |
+
{
|
| 2518 |
+
"epoch": 0.9849108367626886,
|
| 2519 |
+
"grad_norm": 0.1394403725862503,
|
| 2520 |
+
"learning_rate": 3.7878787878787882e-06,
|
| 2521 |
+
"loss": 1.1452,
|
| 2522 |
+
"step": 359
|
| 2523 |
}
|
| 2524 |
],
|
| 2525 |
"logging_steps": 1,
|
|
|
|
| 2539 |
"attributes": {}
|
| 2540 |
}
|
| 2541 |
},
|
| 2542 |
+
"total_flos": 4.3742608796698214e+17,
|
| 2543 |
"train_batch_size": 4,
|
| 2544 |
"trial_name": null,
|
| 2545 |
"trial_params": null
|