Training in progress, step 72
Browse files- adapter_model.safetensors +1 -1
- debug.log +62 -1
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 456206152
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ac9868b2ecbab71a1d30f8c96c600df5c6f3cf7a15b7a08fa634e05f3862072
|
| 3 |
size 456206152
|
debug.log
CHANGED
|
@@ -413,4 +413,65 @@ trainable params: 114,032,640 || all params: 12,361,835,520 || trainable%: 0.922
|
|
| 413 |
|
| 414 |
[2025-10-07 13:25:32,526] [INFO] [axolotl.core.trainers.base._save:671] [PID:8314] Saving model checkpoint to ckpts-mmarv/checkpoint-48
|
| 415 |
|
| 416 |
21%|βββββββββ | 49/232 [1:36:14<8:41:09, 170.87s/it]
|
| 417 |
|
| 418 |
-
|
| 419 |
21%|βββββββββ | 49/232 [1:36:14<8:41:09, 170.87s/it]
|
|
|
|
| 420 |
21%|βββββββββ | 49/232 [1:36:14<8:41:09, 170.87s/it]
|
| 421 |
22%|ββββββββββ | 50/232 [1:38:00<7:39:21, 151.44s/it]
|
| 422 |
|
|
|
|
| 423 |
22%|ββββββββββ | 50/232 [1:38:00<7:39:21, 151.44s/it]
|
| 424 |
22%|ββββββββββ | 51/232 [1:39:45<6:54:29, 137.40s/it]
|
| 425 |
|
|
|
|
| 426 |
22%|ββββββββββ | 51/232 [1:39:45<6:54:29, 137.40s/it]
|
| 427 |
22%|ββββββββββ | 52/232 [1:41:30<6:23:05, 127.69s/it]
|
| 428 |
|
|
|
|
| 429 |
22%|ββββββββββ | 52/232 [1:41:30<6:23:05, 127.69s/it]
|
| 430 |
23%|ββββββββββ | 53/232 [1:43:14<5:59:58, 120.66s/it]
|
| 431 |
|
|
|
|
| 432 |
23%|ββββββββββ | 53/232 [1:43:14<5:59:58, 120.66s/it]
|
| 433 |
23%|ββββββββββ | 54/232 [1:44:59<5:43:55, 115.93s/it]
|
| 434 |
|
|
|
|
| 435 |
23%|ββββββββββ | 54/232 [1:44:59<5:43:55, 115.93s/it]
|
| 436 |
24%|βββββββββββ | 55/232 [1:46:44<5:32:07, 112.59s/it]
|
| 437 |
|
|
|
|
| 438 |
24%|βββββββββββ | 55/232 [1:46:44<5:32:07, 112.59s/it]
|
| 439 |
24%|βββββββββββ | 56/232 [1:48:28<5:23:05, 110.14s/it]
|
| 440 |
|
|
|
|
| 441 |
24%|βββββββββββ | 56/232 [1:48:28<5:23:05, 110.14s/it]
|
| 442 |
25%|βββββββββββ | 57/232 [1:50:13<5:16:41, 108.58s/it]
|
| 443 |
|
|
|
|
| 444 |
25%|βββββββββββ | 57/232 [1:50:13<5:16:41, 108.58s/it]
|
| 445 |
25%|βββββββββββ | 58/232 [1:51:58<5:11:49, 107.53s/it]
|
| 446 |
|
|
|
|
| 447 |
25%|βββββββββββ | 58/232 [1:51:58<5:11:49, 107.53s/it]
|
| 448 |
25%|βββββββββββ | 59/232 [1:53:43<5:07:28, 106.64s/it]
|
| 449 |
|
|
|
|
| 450 |
25%|βββββββββββ | 59/232 [1:53:43<5:07:28, 106.64s/it]
|
| 451 |
26%|βββββββββββ | 60/232 [1:55:29<5:05:06, 106.43s/it]
|
| 452 |
|
|
|
|
| 453 |
26%|βββββββββββ | 60/232 [1:55:29<5:05:06, 106.43s/it]
|
| 454 |
26%|ββββββββββββ | 61/232 [1:57:13<5:01:41, 105.86s/it]
|
| 455 |
|
|
|
|
| 456 |
26%|ββββββββββββ | 61/232 [1:57:13<5:01:41, 105.86s/it]
|
| 457 |
27%|ββββββββββββ | 62/232 [1:58:58<4:59:07, 105.57s/it]
|
| 458 |
|
|
|
|
| 459 |
27%|ββββββββββββ | 62/232 [1:58:58<4:59:07, 105.57s/it]
|
| 460 |
27%|ββββββββββββ | 63/232 [2:00:43<4:56:30, 105.27s/it]
|
| 461 |
|
|
|
|
| 462 |
27%|ββββββββββββ | 63/232 [2:00:43<4:56:30, 105.27s/it]
|
| 463 |
28%|ββββββββββββ | 64/232 [2:02:27<4:54:27, 105.16s/it]
|
| 464 |
|
|
|
|
| 465 |
28%|ββββββββββββ | 64/232 [2:02:27<4:54:27, 105.16s/it]
|
| 466 |
28%|ββββββββββββ | 65/232 [2:04:12<4:52:23, 105.05s/it]
|
| 467 |
|
|
|
|
| 468 |
28%|ββββββββββββ | 65/232 [2:04:12<4:52:23, 105.05s/it]
|
| 469 |
28%|βββββββββββββ | 66/232 [2:05:55<4:48:48, 104.39s/it]
|
| 470 |
|
|
|
|
| 471 |
28%|βββββββββββββ | 66/232 [2:05:55<4:48:48, 104.39s/it]
|
| 472 |
29%|βββββββββββββ | 67/232 [2:07:40<4:47:28, 104.53s/it]
|
| 473 |
|
|
|
|
| 474 |
29%|βββββββββββββ | 67/232 [2:07:40<4:47:28, 104.53s/it]
|
| 475 |
29%|βββββββββββββ | 68/232 [2:09:25<4:45:52, 104.59s/it]
|
| 476 |
|
|
|
|
| 477 |
29%|βββββββββββββ | 68/232 [2:09:25<4:45:52, 104.59s/it]
|
| 478 |
30%|βββββββββββββ | 69/232 [2:11:09<4:43:56, 104.52s/it]
|
| 479 |
|
|
|
|
| 480 |
30%|βββββββββββββ | 69/232 [2:11:09<4:43:56, 104.52s/it]
|
| 481 |
30%|βββββββββββββ | 70/232 [2:12:55<4:43:37, 105.04s/it]
|
| 482 |
|
|
|
|
| 483 |
30%|βββββββββββββ | 70/232 [2:12:55<4:43:37, 105.04s/it]
|
| 484 |
31%|ββββββββββββββ | 71/232 [2:14:40<4:41:28, 104.90s/it]
|
| 485 |
|
|
|
|
| 486 |
31%|ββββββββββββββ | 71/232 [2:14:40<4:41:28, 104.90s/it]
|
| 487 |
31%|ββββββββββββββ | 72/232 [2:16:26<4:40:27, 105.17s/it]
|
| 488 |
|
|
|
|
| 489 |
31%|ββββββββββββββ | 72/232 [2:16:26<4:40:27, 105.17s/it][2025-10-07 14:07:44,423] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:8314] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
0%| | 0/23 [00:00<?, ?it/s][A
|
|
|
|
| 491 |
9%|βββββ | 2/23 [00:08<01:25, 4.07s/it][A
|
|
|
|
| 492 |
13%|βββββββ | 3/23 [00:16<01:55, 5.76s/it][A
|
|
|
|
| 493 |
17%|βββββββββ | 4/23 [00:24<02:06, 6.67s/it][A
|
|
|
|
| 494 |
22%|βββββββββββ | 5/23 [00:32<02:10, 7.23s/it][A
|
|
|
|
| 495 |
26%|βββββββββββββ | 6/23 [00:40<02:08, 7.56s/it][A
|
|
|
|
| 496 |
30%|ββββββββββββββββ | 7/23 [00:49<02:03, 7.75s/it][A
|
|
|
|
| 497 |
35%|ββββββββββββββββββ | 8/23 [00:57<01:58, 7.87s/it][A
|
|
|
|
| 498 |
39%|ββββββββββββββββββββ | 9/23 [01:05<01:52, 8.02s/it][A
|
|
|
|
| 499 |
43%|ββββββββββββββββββββββ | 10/23 [01:13<01:44, 8.05s/it][A
|
|
|
|
| 500 |
48%|ββββββββββββββββββββββββ | 11/23 [01:21<01:36, 8.08s/it][A
|
|
|
|
| 501 |
52%|ββββββββββββββββββββββββββ | 12/23 [01:29<01:29, 8.09s/it][A
|
|
|
|
| 502 |
57%|ββββββββββββββββββββββββββββ | 13/23 [01:38<01:21, 8.17s/it][A
|
|
|
|
| 503 |
61%|ββββββββββββββββββββββββββββββ | 14/23 [01:46<01:13, 8.16s/it][A
|
|
|
|
| 504 |
65%|ββββββββββββββββββββββββββββββββ | 15/23 [01:54<01:05, 8.16s/it][A
|
|
|
|
| 505 |
70%|ββββββββββββββββββββββββββββββββββ | 16/23 [02:01<00:55, 7.89s/it][A
|
|
|
|
| 506 |
74%|βββββββββββββββββββββββββββββββββββββ | 17/23 [02:10<00:48, 8.00s/it][A
|
|
|
|
| 507 |
78%|βββββββββββββββββββββββββββββββββββββββ | 18/23 [02:18<00:40, 8.06s/it][A
|
|
|
|
| 508 |
83%|βββββββββββββββββββββββββββββββββββββββββ | 19/23 [02:26<00:32, 8.09s/it][A
|
|
|
|
| 509 |
87%|βββββββββββββββββββββββββββββββββββββββββββ | 20/23 [02:34<00:24, 8.11s/it][A
|
|
|
|
| 510 |
91%|βββββββββββββββββββββββββββββββββββββββββββββ | 21/23 [02:42<00:16, 8.18s/it][A
|
|
|
|
| 511 |
96%|βββββββββββββββββββββββββββββββββββββββββββββββ | 22/23 [02:51<00:08, 8.18s/it][A
|
|
|
|
| 512 |
|
|
|
|
| 513 |
|
|
|
|
| 514 |
31%|ββββββββββββββ | 72/232 [2:19:39<4:40:27, 105.17s/it]
|
|
|
|
|
|
|
| 515 |
[A[2025-10-07 14:10:57,960] [WARNING] [py.warnings._showwarnmsg:110] [PID:8314] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:680: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
|
|
|
|
|
|
|
|
|
|
|
|
| 516 |
31%|ββββββββββββββ | 73/232 [2:21:50<7:32:42, 170.84s/it]
|
| 517 |
|
|
|
|
| 518 |
31%|ββββββββββββββ | 73/232 [2:21:50<7:32:42, 170.84s/it]
|
|
|
|
| 413 |
|
| 414 |
[2025-10-07 13:25:32,526] [INFO] [axolotl.core.trainers.base._save:671] [PID:8314] Saving model checkpoint to ckpts-mmarv/checkpoint-48
|
| 415 |
|
| 416 |
21%|βββββββββ | 49/232 [1:36:14<8:41:09, 170.87s/it]
|
| 417 |
|
|
|
|
| 418 |
21%|βββββββββ | 49/232 [1:36:14<8:41:09, 170.87s/it]
|
| 419 |
+
|
| 420 |
21%|βββββββββ | 49/232 [1:36:14<8:41:09, 170.87s/it]
|
| 421 |
22%|ββββββββββ | 50/232 [1:38:00<7:39:21, 151.44s/it]
|
| 422 |
|
| 423 |
+
|
| 424 |
22%|ββββββββββ | 50/232 [1:38:00<7:39:21, 151.44s/it]
|
| 425 |
22%|ββββββββββ | 51/232 [1:39:45<6:54:29, 137.40s/it]
|
| 426 |
|
| 427 |
+
|
| 428 |
22%|ββββββββββ | 51/232 [1:39:45<6:54:29, 137.40s/it]
|
| 429 |
22%|ββββββββββ | 52/232 [1:41:30<6:23:05, 127.69s/it]
|
| 430 |
|
| 431 |
+
|
| 432 |
22%|ββββββββββ | 52/232 [1:41:30<6:23:05, 127.69s/it]
|
| 433 |
23%|ββββββββββ | 53/232 [1:43:14<5:59:58, 120.66s/it]
|
| 434 |
|
| 435 |
+
|
| 436 |
23%|ββββββββββ | 53/232 [1:43:14<5:59:58, 120.66s/it]
|
| 437 |
23%|ββββββββββ | 54/232 [1:44:59<5:43:55, 115.93s/it]
|
| 438 |
|
| 439 |
+
|
| 440 |
23%|ββββββββββ | 54/232 [1:44:59<5:43:55, 115.93s/it]
|
| 441 |
24%|βββββββββββ | 55/232 [1:46:44<5:32:07, 112.59s/it]
|
| 442 |
|
| 443 |
+
|
| 444 |
24%|βββββββββββ | 55/232 [1:46:44<5:32:07, 112.59s/it]
|
| 445 |
24%|βββββββββββ | 56/232 [1:48:28<5:23:05, 110.14s/it]
|
| 446 |
|
| 447 |
+
|
| 448 |
24%|βββββββββββ | 56/232 [1:48:28<5:23:05, 110.14s/it]
|
| 449 |
25%|βββββββββββ | 57/232 [1:50:13<5:16:41, 108.58s/it]
|
| 450 |
|
| 451 |
+
|
| 452 |
25%|βββββββββββ | 57/232 [1:50:13<5:16:41, 108.58s/it]
|
| 453 |
25%|βββββββββββ | 58/232 [1:51:58<5:11:49, 107.53s/it]
|
| 454 |
|
| 455 |
+
|
| 456 |
25%|βββββββββββ | 58/232 [1:51:58<5:11:49, 107.53s/it]
|
| 457 |
25%|βββββββββββ | 59/232 [1:53:43<5:07:28, 106.64s/it]
|
| 458 |
|
| 459 |
+
|
| 460 |
25%|βββββββββββ | 59/232 [1:53:43<5:07:28, 106.64s/it]
|
| 461 |
26%|βββββββββββ | 60/232 [1:55:29<5:05:06, 106.43s/it]
|
| 462 |
|
| 463 |
+
|
| 464 |
26%|βββββββββββ | 60/232 [1:55:29<5:05:06, 106.43s/it]
|
| 465 |
26%|ββββββββββββ | 61/232 [1:57:13<5:01:41, 105.86s/it]
|
| 466 |
|
| 467 |
+
|
| 468 |
26%|ββββββββββββ | 61/232 [1:57:13<5:01:41, 105.86s/it]
|
| 469 |
27%|ββββββββββββ | 62/232 [1:58:58<4:59:07, 105.57s/it]
|
| 470 |
|
| 471 |
+
|
| 472 |
27%|ββββββββββββ | 62/232 [1:58:58<4:59:07, 105.57s/it]
|
| 473 |
27%|ββββββββββββ | 63/232 [2:00:43<4:56:30, 105.27s/it]
|
| 474 |
|
| 475 |
+
|
| 476 |
27%|ββββββββββββ | 63/232 [2:00:43<4:56:30, 105.27s/it]
|
| 477 |
28%|ββββββββββββ | 64/232 [2:02:27<4:54:27, 105.16s/it]
|
| 478 |
|
| 479 |
+
|
| 480 |
28%|ββββββββββββ | 64/232 [2:02:27<4:54:27, 105.16s/it]
|
| 481 |
28%|ββββββββββββ | 65/232 [2:04:12<4:52:23, 105.05s/it]
|
| 482 |
|
| 483 |
+
|
| 484 |
28%|ββββββββββββ | 65/232 [2:04:12<4:52:23, 105.05s/it]
|
| 485 |
28%|βββββββββββββ | 66/232 [2:05:55<4:48:48, 104.39s/it]
|
| 486 |
|
| 487 |
+
|
| 488 |
28%|βββββββββββββ | 66/232 [2:05:55<4:48:48, 104.39s/it]
|
| 489 |
29%|βββββββββββββ | 67/232 [2:07:40<4:47:28, 104.53s/it]
|
| 490 |
|
| 491 |
+
|
| 492 |
29%|βββββββββββββ | 67/232 [2:07:40<4:47:28, 104.53s/it]
|
| 493 |
29%|βββββββββββββ | 68/232 [2:09:25<4:45:52, 104.59s/it]
|
| 494 |
|
| 495 |
+
|
| 496 |
29%|βββββββββββββ | 68/232 [2:09:25<4:45:52, 104.59s/it]
|
| 497 |
30%|βββββββββββββ | 69/232 [2:11:09<4:43:56, 104.52s/it]
|
| 498 |
|
| 499 |
+
|
| 500 |
30%|βββββββββββββ | 69/232 [2:11:09<4:43:56, 104.52s/it]
|
| 501 |
30%|βββββββββββββ | 70/232 [2:12:55<4:43:37, 105.04s/it]
|
| 502 |
|
| 503 |
+
|
| 504 |
30%|βββββββββββββ | 70/232 [2:12:55<4:43:37, 105.04s/it]
|
| 505 |
31%|ββββββββββββββ | 71/232 [2:14:40<4:41:28, 104.90s/it]
|
| 506 |
|
| 507 |
+
|
| 508 |
31%|ββββββββββββββ | 71/232 [2:14:40<4:41:28, 104.90s/it]
|
| 509 |
31%|ββββββββββββββ | 72/232 [2:16:26<4:40:27, 105.17s/it]
|
| 510 |
|
| 511 |
+
|
| 512 |
31%|ββββββββββββββ | 72/232 [2:16:26<4:40:27, 105.17s/it][2025-10-07 14:07:44,423] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:8314] Running evaluation step...
|
| 513 |
+
[2025-10-07 14:07:46,706] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.0272562503814697
|
| 514 |
+
[2025-10-07 14:07:47,758] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.0513060092926025
|
| 515 |
+
[2025-10-07 14:07:48,796] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.038541555404663
|
| 516 |
+
[2025-10-07 14:07:49,813] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.0161314010620117
|
| 517 |
+
[2025-10-07 14:07:49,814] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:434] [PID:8314] gather_len_batches: [47, 47]
|
| 518 |
+
|
| 519 |
+
|
| 520 |
0%| | 0/23 [00:00<?, ?it/s][A
|
| 521 |
+
|
| 522 |
9%|βββββ | 2/23 [00:08<01:25, 4.07s/it][A
|
| 523 |
+
|
| 524 |
13%|βββββββ | 3/23 [00:16<01:55, 5.76s/it][A
|
| 525 |
+
|
| 526 |
17%|βββββββββ | 4/23 [00:24<02:06, 6.67s/it][A
|
| 527 |
+
|
| 528 |
22%|βββββββββββ | 5/23 [00:32<02:10, 7.23s/it][A
|
| 529 |
+
|
| 530 |
26%|βββββββββββββ | 6/23 [00:40<02:08, 7.56s/it][A
|
| 531 |
+
|
| 532 |
30%|ββββββββββββββββ | 7/23 [00:49<02:03, 7.75s/it][A
|
| 533 |
+
|
| 534 |
35%|ββββββββββββββββββ | 8/23 [00:57<01:58, 7.87s/it][A
|
| 535 |
+
|
| 536 |
39%|ββββββββββββββββββββ | 9/23 [01:05<01:52, 8.02s/it][A
|
| 537 |
+
|
| 538 |
43%|ββββββββββββββββββββββ | 10/23 [01:13<01:44, 8.05s/it][A
|
| 539 |
+
|
| 540 |
48%|ββββββββββββββββββββββββ | 11/23 [01:21<01:36, 8.08s/it][A
|
| 541 |
+
|
| 542 |
52%|ββββββββββββββββββββββββββ | 12/23 [01:29<01:29, 8.09s/it][A
|
| 543 |
+
|
| 544 |
57%|ββββββββββββββββββββββββββββ | 13/23 [01:38<01:21, 8.17s/it][A
|
| 545 |
+
|
| 546 |
61%|ββββββββββββββββββββββββββββββ | 14/23 [01:46<01:13, 8.16s/it][A
|
| 547 |
+
|
| 548 |
65%|ββββββββββββββββββββββββββββββββ | 15/23 [01:54<01:05, 8.16s/it][A
|
| 549 |
+
|
| 550 |
70%|ββββββββββββββββββββββββββββββββββ | 16/23 [02:01<00:55, 7.89s/it][A
|
| 551 |
+
|
| 552 |
74%|βββββββββββββββββββββββββββββββββββββ | 17/23 [02:10<00:48, 8.00s/it][A
|
| 553 |
+
|
| 554 |
78%|βββββββββββββββββββββββββββββββββββββββ | 18/23 [02:18<00:40, 8.06s/it][A
|
| 555 |
+
|
| 556 |
83%|βββββββββββββββββββββββββββββββββββββββββ | 19/23 [02:26<00:32, 8.09s/it][A
|
| 557 |
+
|
| 558 |
87%|βββββββββββββββββββββββββββββββββββββββββββ | 20/23 [02:34<00:24, 8.11s/it][A
|
| 559 |
+
|
| 560 |
91%|βββββββββββββββββββββββββββββββββββββββββββββ | 21/23 [02:42<00:16, 8.18s/it][A
|
| 561 |
+
|
| 562 |
96%|βββββββββββββββββββββββββββββββββββββββββββββββ | 22/23 [02:51<00:08, 8.18s/it][A
|
| 563 |
+
|
| 564 |
|
| 565 |
+
|
| 566 |
|
| 567 |
+
|
| 568 |
31%|ββββββββββββββ | 72/232 [2:19:39<4:40:27, 105.17s/it]
|
| 569 |
+
|
| 570 |
+
|
| 571 |
[A[2025-10-07 14:10:57,960] [WARNING] [py.warnings._showwarnmsg:110] [PID:8314] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:680: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
| 572 |
+
warnings.warn(
|
| 573 |
+
|
| 574 |
+
[2025-10-07 14:11:08,752] [INFO] [axolotl.core.trainers.base._save:671] [PID:8314] Saving model checkpoint to ckpts-mmarv/checkpoint-72
|
| 575 |
+
|
| 576 |
31%|ββββββββββββββ | 73/232 [2:21:50<7:32:42, 170.84s/it]
|
| 577 |
|
| 578 |
+
|
| 579 |
31%|ββββββββββββββ | 73/232 [2:21:50<7:32:42, 170.84s/it]
|