Training in progress, step 2550, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 527048968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16a48e299ffea4a1305480644e0252c4aab5e02d9191b6e951f282025c021c36
|
| 3 |
size 527048968
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1054135994
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccb7656d0058945a544e068c4c181cbcbae03cf7066df76778cb4579b9066242
|
| 3 |
size 1054135994
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33335d8e454fa636f80bfcf35b73daf17e2d9682ef2741d0ad1097e25ee4742d
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f98b757d648be3e63607e2156858bad579d6a12d490a3cb2c8748d9ae2cce45
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -2327,6 +2327,151 @@
|
|
| 2327 |
"EMA_steps_per_second": 24.683,
|
| 2328 |
"epoch": 104.34782608695652,
|
| 2329 |
"step": 2400
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2330 |
}
|
| 2331 |
],
|
| 2332 |
"logging_steps": 10,
|
|
@@ -2346,7 +2491,7 @@
|
|
| 2346 |
"attributes": {}
|
| 2347 |
}
|
| 2348 |
},
|
| 2349 |
-
"total_flos": 6.
|
| 2350 |
"train_batch_size": 4,
|
| 2351 |
"trial_name": null,
|
| 2352 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.7177689671516418,
|
| 3 |
"best_model_checkpoint": "./output/checkpoint-450",
|
| 4 |
+
"epoch": 110.8695652173913,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 2550,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 2327 |
"EMA_steps_per_second": 24.683,
|
| 2328 |
"epoch": 104.34782608695652,
|
| 2329 |
"step": 2400
|
| 2330 |
+
},
|
| 2331 |
+
{
|
| 2332 |
+
"epoch": 104.78260869565217,
|
| 2333 |
+
"grad_norm": 2.10827898979187,
|
| 2334 |
+
"learning_rate": 7.486913639731043e-06,
|
| 2335 |
+
"loss": 0.2495,
|
| 2336 |
+
"step": 2410
|
| 2337 |
+
},
|
| 2338 |
+
{
|
| 2339 |
+
"epoch": 105.21739130434783,
|
| 2340 |
+
"grad_norm": 2.025355815887451,
|
| 2341 |
+
"learning_rate": 7.48685947932756e-06,
|
| 2342 |
+
"loss": 0.2637,
|
| 2343 |
+
"step": 2420
|
| 2344 |
+
},
|
| 2345 |
+
{
|
| 2346 |
+
"epoch": 105.65217391304348,
|
| 2347 |
+
"grad_norm": 1.9276680946350098,
|
| 2348 |
+
"learning_rate": 7.4868038353494355e-06,
|
| 2349 |
+
"loss": 0.2603,
|
| 2350 |
+
"step": 2430
|
| 2351 |
+
},
|
| 2352 |
+
{
|
| 2353 |
+
"epoch": 106.08695652173913,
|
| 2354 |
+
"grad_norm": 2.324167490005493,
|
| 2355 |
+
"learning_rate": 7.486746707818724e-06,
|
| 2356 |
+
"loss": 0.2141,
|
| 2357 |
+
"step": 2440
|
| 2358 |
+
},
|
| 2359 |
+
{
|
| 2360 |
+
"epoch": 106.52173913043478,
|
| 2361 |
+
"grad_norm": 1.4006412029266357,
|
| 2362 |
+
"learning_rate": 7.486688096758069e-06,
|
| 2363 |
+
"loss": 0.2816,
|
| 2364 |
+
"step": 2450
|
| 2365 |
+
},
|
| 2366 |
+
{
|
| 2367 |
+
"epoch": 106.95652173913044,
|
| 2368 |
+
"grad_norm": 1.8922216892242432,
|
| 2369 |
+
"learning_rate": 7.486628002190702e-06,
|
| 2370 |
+
"loss": 0.2444,
|
| 2371 |
+
"step": 2460
|
| 2372 |
+
},
|
| 2373 |
+
{
|
| 2374 |
+
"epoch": 107.3913043478261,
|
| 2375 |
+
"grad_norm": 2.3611834049224854,
|
| 2376 |
+
"learning_rate": 7.486566424140442e-06,
|
| 2377 |
+
"loss": 0.3039,
|
| 2378 |
+
"step": 2470
|
| 2379 |
+
},
|
| 2380 |
+
{
|
| 2381 |
+
"epoch": 107.82608695652173,
|
| 2382 |
+
"grad_norm": 2.2470717430114746,
|
| 2383 |
+
"learning_rate": 7.486503362631699e-06,
|
| 2384 |
+
"loss": 0.2188,
|
| 2385 |
+
"step": 2480
|
| 2386 |
+
},
|
| 2387 |
+
{
|
| 2388 |
+
"epoch": 108.26086956521739,
|
| 2389 |
+
"grad_norm": 2.0604355335235596,
|
| 2390 |
+
"learning_rate": 7.486438817689465e-06,
|
| 2391 |
+
"loss": 0.2706,
|
| 2392 |
+
"step": 2490
|
| 2393 |
+
},
|
| 2394 |
+
{
|
| 2395 |
+
"epoch": 108.69565217391305,
|
| 2396 |
+
"grad_norm": 1.6355359554290771,
|
| 2397 |
+
"learning_rate": 7.486372789339326e-06,
|
| 2398 |
+
"loss": 0.2454,
|
| 2399 |
+
"step": 2500
|
| 2400 |
+
},
|
| 2401 |
+
{
|
| 2402 |
+
"epoch": 109.1304347826087,
|
| 2403 |
+
"grad_norm": 1.6156138181686401,
|
| 2404 |
+
"learning_rate": 7.486305277607452e-06,
|
| 2405 |
+
"loss": 0.2437,
|
| 2406 |
+
"step": 2510
|
| 2407 |
+
},
|
| 2408 |
+
{
|
| 2409 |
+
"epoch": 109.56521739130434,
|
| 2410 |
+
"grad_norm": 1.3432440757751465,
|
| 2411 |
+
"learning_rate": 7.486236282520606e-06,
|
| 2412 |
+
"loss": 0.2309,
|
| 2413 |
+
"step": 2520
|
| 2414 |
+
},
|
| 2415 |
+
{
|
| 2416 |
+
"epoch": 110.0,
|
| 2417 |
+
"grad_norm": 3.2272891998291016,
|
| 2418 |
+
"learning_rate": 7.48616580410613e-06,
|
| 2419 |
+
"loss": 0.2874,
|
| 2420 |
+
"step": 2530
|
| 2421 |
+
},
|
| 2422 |
+
{
|
| 2423 |
+
"epoch": 110.43478260869566,
|
| 2424 |
+
"grad_norm": 1.7123788595199585,
|
| 2425 |
+
"learning_rate": 7.486093842391963e-06,
|
| 2426 |
+
"loss": 0.2452,
|
| 2427 |
+
"step": 2540
|
| 2428 |
+
},
|
| 2429 |
+
{
|
| 2430 |
+
"epoch": 110.8695652173913,
|
| 2431 |
+
"grad_norm": 1.8407248258590698,
|
| 2432 |
+
"learning_rate": 7.486020397406629e-06,
|
| 2433 |
+
"loss": 0.2698,
|
| 2434 |
+
"step": 2550
|
| 2435 |
+
},
|
| 2436 |
+
{
|
| 2437 |
+
"epoch": 110.8695652173913,
|
| 2438 |
+
"eval_loss": 0.9264782071113586,
|
| 2439 |
+
"eval_runtime": 0.5916,
|
| 2440 |
+
"eval_samples_per_second": 16.903,
|
| 2441 |
+
"eval_steps_per_second": 16.903,
|
| 2442 |
+
"step": 2550
|
| 2443 |
+
},
|
| 2444 |
+
{
|
| 2445 |
+
"Start_State_loss": 0.861186683177948,
|
| 2446 |
+
"Start_State_runtime": 0.4032,
|
| 2447 |
+
"Start_State_samples_per_second": 24.805,
|
| 2448 |
+
"Start_State_steps_per_second": 24.805,
|
| 2449 |
+
"epoch": 110.8695652173913,
|
| 2450 |
+
"step": 2550
|
| 2451 |
+
},
|
| 2452 |
+
{
|
| 2453 |
+
"Raw_Model_loss": 0.9264782071113586,
|
| 2454 |
+
"Raw_Model_runtime": 0.4014,
|
| 2455 |
+
"Raw_Model_samples_per_second": 24.913,
|
| 2456 |
+
"Raw_Model_steps_per_second": 24.913,
|
| 2457 |
+
"epoch": 110.8695652173913,
|
| 2458 |
+
"step": 2550
|
| 2459 |
+
},
|
| 2460 |
+
{
|
| 2461 |
+
"SWA_loss": 0.759516716003418,
|
| 2462 |
+
"SWA_runtime": 0.403,
|
| 2463 |
+
"SWA_samples_per_second": 24.814,
|
| 2464 |
+
"SWA_steps_per_second": 24.814,
|
| 2465 |
+
"epoch": 110.8695652173913,
|
| 2466 |
+
"step": 2550
|
| 2467 |
+
},
|
| 2468 |
+
{
|
| 2469 |
+
"EMA_loss": 0.8597530126571655,
|
| 2470 |
+
"EMA_runtime": 0.415,
|
| 2471 |
+
"EMA_samples_per_second": 24.098,
|
| 2472 |
+
"EMA_steps_per_second": 24.098,
|
| 2473 |
+
"epoch": 110.8695652173913,
|
| 2474 |
+
"step": 2550
|
| 2475 |
}
|
| 2476 |
],
|
| 2477 |
"logging_steps": 10,
|
|
|
|
| 2491 |
"attributes": {}
|
| 2492 |
}
|
| 2493 |
},
|
| 2494 |
+
"total_flos": 6.568246037407334e+16,
|
| 2495 |
"train_batch_size": 4,
|
| 2496 |
"trial_name": null,
|
| 2497 |
"trial_params": null
|