Training in progress, step 216
Browse files- adapter_model.safetensors +1 -1
- debug.log +62 -1
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 456206152
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:067a4403180bf4f00d083a5cf9181f4e00a58a51a5dc6566431e184fb0b9386f
|
| 3 |
size 456206152
|
debug.log
CHANGED
|
@@ -779,4 +779,65 @@ trainable params: 114,032,640 || all params: 12,361,835,520 || trainable%: 0.922
|
|
| 779 |
|
| 780 |
[2025-10-07 17:58:54,750] [INFO] [axolotl.core.trainers.base._save:671] [PID:8314] Saving model checkpoint to ckpts-mmarv/checkpoint-192
|
| 781 |
|
| 782 |
83%|βββββββββββββββββββββββββββββββββββ | 193/232 [6:09:32<1:50:27, 169.92s/it]
|
| 783 |
|
| 784 |
-
|
| 785 |
83%|βββββββββββββββββββββββββββββββββββ | 193/232 [6:09:32<1:50:27, 169.92s/it]
|
|
|
|
| 786 |
83%|βββββββββββββββββββββββββββββββββββ | 193/232 [6:09:32<1:50:27, 169.92s/it]
|
| 787 |
84%|βββββββββββββββββββββββββββββββββββ | 194/232 [6:11:15<1:34:52, 149.80s/it]
|
| 788 |
|
|
|
|
| 789 |
84%|βββββββββββββββββββββββββββββββββββ | 194/232 [6:11:15<1:34:52, 149.80s/it]
|
| 790 |
84%|ββββββββββββββββββββββββββββββββββββ | 195/232 [6:13:00<1:24:07, 136.42s/it]
|
| 791 |
|
|
|
|
| 792 |
84%|ββββββββββββββββββββββββββββββββββββ | 195/232 [6:13:00<1:24:07, 136.42s/it]
|
| 793 |
84%|ββββββββββββββββββββββββββββββββββββ | 196/232 [6:14:45<1:16:08, 126.89s/it]
|
| 794 |
|
|
|
|
| 795 |
84%|ββββββββββββββββββββββββββββββββββββ | 196/232 [6:14:45<1:16:08, 126.89s/it]
|
| 796 |
85%|ββββββββββββββββββββββββββββββββββββ | 197/232 [6:16:30<1:10:11, 120.34s/it]
|
| 797 |
|
|
|
|
| 798 |
85%|ββββββββββββββββββββββββββββββββββββ | 197/232 [6:16:30<1:10:11, 120.34s/it]
|
| 799 |
85%|ββββββββββββββββββββββββββββββββββββ | 198/232 [6:18:15<1:05:37, 115.82s/it]
|
| 800 |
|
|
|
|
| 801 |
85%|ββββββββββββββββββββββββββββββββββββ | 198/232 [6:18:15<1:05:37, 115.82s/it]
|
| 802 |
86%|ββββββββββββββββββββββββββββββββββββ | 199/232 [6:20:00<1:01:52, 112.50s/it]
|
| 803 |
|
|
|
|
| 804 |
86%|ββββββββββββββββββββββββββββββββββββ | 199/232 [6:20:00<1:01:52, 112.50s/it]
|
| 805 |
86%|ββββββββββββββββββββββββοΏ½οΏ½βββββββββββββ | 200/232 [6:21:46<58:59, 110.62s/it]
|
| 806 |
|
|
|
|
| 807 |
86%|ββββββββββββββββββββββββββββββββββββββ | 200/232 [6:21:46<58:59, 110.62s/it]
|
| 808 |
87%|ββββββββββββββββββββββββββββββββββββββ | 201/232 [6:23:31<56:15, 108.89s/it]
|
| 809 |
|
|
|
|
| 810 |
87%|ββββββββββββββββββββββββββββββββββββββ | 201/232 [6:23:31<56:15, 108.89s/it]
|
| 811 |
87%|βββββββββββββββββββββββββββββββββββββββ | 202/232 [6:25:16<53:52, 107.76s/it]
|
| 812 |
|
|
|
|
| 813 |
87%|βββββββββββββββββββββββββββββββββββββββ | 202/232 [6:25:16<53:52, 107.76s/it]
|
| 814 |
88%|βββββββββββββββββββββββββββββββββββββββ | 203/232 [6:27:01<51:38, 106.83s/it]
|
| 815 |
|
|
|
|
| 816 |
88%|βββββββββββββββββββββββββββββββββββββββ | 203/232 [6:27:01<51:38, 106.83s/it]
|
| 817 |
88%|βββββββββββββββββββββββββββββββββββββββ | 204/232 [6:28:46<49:36, 106.31s/it]
|
| 818 |
|
|
|
|
| 819 |
88%|βββββββββββββββββββββββββββββββββββββββ | 204/232 [6:28:46<49:36, 106.31s/it]
|
| 820 |
88%|βββββββββββββββββββββββββββββββββββββββ | 205/232 [6:30:31<47:40, 105.93s/it]
|
| 821 |
|
|
|
|
| 822 |
88%|βββββββββββββββββββββββββββββββββββββββ | 205/232 [6:30:31<47:40, 105.93s/it]
|
| 823 |
89%|βββββββββββββββββββββββββββββββββββββββ | 206/232 [6:32:16<45:44, 105.54s/it]
|
| 824 |
|
|
|
|
| 825 |
89%|βββββββββββββββββββββββββββββββββββββββ | 206/232 [6:32:16<45:44, 105.54s/it]
|
| 826 |
89%|ββββββββββββββββββββββββββββββββββββββββ | 207/232 [6:34:01<43:55, 105.42s/it]
|
| 827 |
|
|
|
|
| 828 |
89%|ββββββββββββββββββββββββββββββββββββββββ | 207/232 [6:34:01<43:55, 105.42s/it]
|
| 829 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 208/232 [6:35:46<42:08, 105.34s/it]
|
| 830 |
|
|
|
|
| 831 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 208/232 [6:35:46<42:08, 105.34s/it]
|
| 832 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 209/232 [6:37:31<40:18, 105.14s/it]
|
| 833 |
|
|
|
|
| 834 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 209/232 [6:37:31<40:18, 105.14s/it]
|
| 835 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 210/232 [6:39:17<38:38, 105.40s/it]
|
| 836 |
|
|
|
|
| 837 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 210/232 [6:39:17<38:38, 105.40s/it]
|
| 838 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 211/232 [6:41:01<36:49, 105.19s/it]
|
| 839 |
|
|
|
|
| 840 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 211/232 [6:41:01<36:49, 105.19s/it]
|
| 841 |
91%|βββββββββββββββββββββββββββββββββββββββββ | 212/232 [6:42:46<35:03, 105.16s/it]
|
| 842 |
|
|
|
|
| 843 |
91%|βββββββββββββββββββββββββββββββββββββββββ | 212/232 [6:42:47<35:03, 105.16s/it]
|
| 844 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 213/232 [6:44:32<33:17, 105.15s/it]
|
| 845 |
|
|
|
|
| 846 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 213/232 [6:44:32<33:17, 105.15s/it]
|
| 847 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 214/232 [6:46:17<31:31, 105.09s/it]
|
| 848 |
|
|
|
|
| 849 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 214/232 [6:46:17<31:31, 105.09s/it]
|
| 850 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 215/232 [6:48:02<29:47, 105.15s/it]
|
| 851 |
|
|
|
|
| 852 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 215/232 [6:48:02<29:47, 105.15s/it]
|
| 853 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 216/232 [6:49:47<28:04, 105.28s/it]
|
| 854 |
|
|
|
|
| 855 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 216/232 [6:49:47<28:04, 105.28s/it][2025-10-07 18:41:06,193] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:8314] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 856 |
0%| | 0/23 [00:00<?, ?it/s][A
|
|
|
|
| 857 |
9%|βββββ | 2/23 [00:08<01:25, 4.06s/it][A
|
|
|
|
| 858 |
13%|βββββββ | 3/23 [00:16<01:55, 5.76s/it][A
|
|
|
|
| 859 |
17%|βββββββββ | 4/23 [00:24<02:06, 6.66s/it][A
|
|
|
|
| 860 |
22%|βββββββββββ | 5/23 [00:32<02:10, 7.23s/it][A
|
|
|
|
| 861 |
26%|βββββββββββββ | 6/23 [00:40<02:08, 7.56s/it][A
|
|
|
|
| 862 |
30%|ββββββββββββββββ | 7/23 [00:49<02:03, 7.74s/it][A
|
|
|
|
| 863 |
35%|ββββββββββββββββββ | 8/23 [00:57<01:58, 7.87s/it][A
|
|
|
|
| 864 |
39%|ββββββββββββββββββββ | 9/23 [01:05<01:52, 8.02s/it][A
|
|
|
|
| 865 |
43%|ββββββββββββββββββββββ | 10/23 [01:13<01:44, 8.06s/it][A
|
|
|
|
| 866 |
48%|ββββββββββββββββββββββββ | 11/23 [01:21<01:37, 8.09s/it][A
|
|
|
|
| 867 |
52%|ββββββββββββββββββββββββββ | 12/23 [01:30<01:29, 8.11s/it][A
|
|
|
|
| 868 |
57%|ββββββββββββββββββββββββββββ | 13/23 [01:38<01:21, 8.18s/it][A
|
|
|
|
| 869 |
61%|ββββββββββββββββββββββββββββββ | 14/23 [01:46<01:13, 8.17s/it][A
|
|
|
|
| 870 |
65%|ββββββββββββββββββββββββββββββββ | 15/23 [01:54<01:05, 8.16s/it][A
|
|
|
|
| 871 |
70%|ββββββββββββββββββββββββββββββββββ | 16/23 [02:01<00:55, 7.90s/it][A
|
|
|
|
| 872 |
74%|βββββββββββββββββββββββββββββββββββββ | 17/23 [02:10<00:48, 8.01s/it][A
|
|
|
|
| 873 |
78%|βββββββββββββββββββββββββββββββββββββββ | 18/23 [02:18<00:40, 8.06s/it][A
|
|
|
|
| 874 |
83%|βββββββββββββββββββββββββββββββββββββββββ | 19/23 [02:26<00:32, 8.09s/it][A
|
|
|
|
| 875 |
87%|βββββββββββββββββββββββββββββββββββββββββββ | 20/23 [02:34<00:24, 8.11s/it][A
|
|
|
|
| 876 |
91%|βββββββββββββββββββββββββββββββββββββββββββββ | 21/23 [02:43<00:16, 8.17s/it][A
|
|
|
|
| 877 |
96%|βββββββββββββββββββββββββββββββββββββββββββββββ | 22/23 [02:51<00:08, 8.18s/it][A
|
|
|
|
| 878 |
|
|
|
|
| 879 |
|
|
|
|
| 880 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 216/232 [6:53:01<28:04, 105.28s/it]
|
|
|
|
|
|
|
| 881 |
[A[2025-10-07 18:44:19,920] [WARNING] [py.warnings._showwarnmsg:110] [PID:8314] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:680: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
|
|
|
|
|
|
|
|
|
|
|
|
| 882 |
94%|ββββββββββββββββββββββββββββββββββββββββββ | 217/232 [6:55:11<42:40, 170.68s/it]
|
| 883 |
|
|
|
|
| 884 |
94%|ββββββββββββββββββββββββββββββββββββββββββ | 217/232 [6:55:11<42:40, 170.68s/it]
|
|
|
|
| 779 |
|
| 780 |
[2025-10-07 17:58:54,750] [INFO] [axolotl.core.trainers.base._save:671] [PID:8314] Saving model checkpoint to ckpts-mmarv/checkpoint-192
|
| 781 |
|
| 782 |
83%|βββββββββββββββββββββββββββββββββββ | 193/232 [6:09:32<1:50:27, 169.92s/it]
|
| 783 |
|
|
|
|
| 784 |
83%|βββββββββββββββββββββββββββββββββββ | 193/232 [6:09:32<1:50:27, 169.92s/it]
|
| 785 |
+
|
| 786 |
83%|βββββββββββββββββββββββββββββββββββ | 193/232 [6:09:32<1:50:27, 169.92s/it]
|
| 787 |
84%|βββββββββββββββββββββββββββββββββββ | 194/232 [6:11:15<1:34:52, 149.80s/it]
|
| 788 |
|
| 789 |
+
|
| 790 |
84%|βββββββββββββββββββββββββββββββββββ | 194/232 [6:11:15<1:34:52, 149.80s/it]
|
| 791 |
84%|ββββββββββββββββββββββββββββββββββββ | 195/232 [6:13:00<1:24:07, 136.42s/it]
|
| 792 |
|
| 793 |
+
|
| 794 |
84%|ββββββββββββββββββββββββββββββββββββ | 195/232 [6:13:00<1:24:07, 136.42s/it]
|
| 795 |
84%|ββββββββββββββββββββββββββββββββββββ | 196/232 [6:14:45<1:16:08, 126.89s/it]
|
| 796 |
|
| 797 |
+
|
| 798 |
84%|ββββββββββββββββββββββββββββββββββββ | 196/232 [6:14:45<1:16:08, 126.89s/it]
|
| 799 |
85%|ββββββββββββββββββββββββββββββββββββ | 197/232 [6:16:30<1:10:11, 120.34s/it]
|
| 800 |
|
| 801 |
+
|
| 802 |
85%|ββββββββββββββββββββββββββββββββββββ | 197/232 [6:16:30<1:10:11, 120.34s/it]
|
| 803 |
85%|ββββββββββββββββββββββββββββββββββββ | 198/232 [6:18:15<1:05:37, 115.82s/it]
|
| 804 |
|
| 805 |
+
|
| 806 |
85%|ββββββββββββββββββββββββββββββββββββ | 198/232 [6:18:15<1:05:37, 115.82s/it]
|
| 807 |
86%|ββββββββββββββββββββββββββββββββββββ | 199/232 [6:20:00<1:01:52, 112.50s/it]
|
| 808 |
|
| 809 |
+
|
| 810 |
86%|ββββββββββββββββββββββββββββββββββββ | 199/232 [6:20:00<1:01:52, 112.50s/it]
|
| 811 |
86%|ββββββββββββββββββββββββοΏ½οΏ½βββββββββββββ | 200/232 [6:21:46<58:59, 110.62s/it]
|
| 812 |
|
| 813 |
+
|
| 814 |
86%|ββββββββββββββββββββββββββββββββββββββ | 200/232 [6:21:46<58:59, 110.62s/it]
|
| 815 |
87%|ββββββββββββββββββββββββββββββββββββββ | 201/232 [6:23:31<56:15, 108.89s/it]
|
| 816 |
|
| 817 |
+
|
| 818 |
87%|ββββββββββββββββββββββββββββββββββββββ | 201/232 [6:23:31<56:15, 108.89s/it]
|
| 819 |
87%|βββββββββββββββββββββββββββββββββββββββ | 202/232 [6:25:16<53:52, 107.76s/it]
|
| 820 |
|
| 821 |
+
|
| 822 |
87%|βββββββββββββββββββββββββββββββββββββββ | 202/232 [6:25:16<53:52, 107.76s/it]
|
| 823 |
88%|βββββββββββββββββββββββββββββββββββββββ | 203/232 [6:27:01<51:38, 106.83s/it]
|
| 824 |
|
| 825 |
+
|
| 826 |
88%|βββββββββββββββββββββββββββββββββββββββ | 203/232 [6:27:01<51:38, 106.83s/it]
|
| 827 |
88%|βββββββββββββββββββββββββββββββββββββββ | 204/232 [6:28:46<49:36, 106.31s/it]
|
| 828 |
|
| 829 |
+
|
| 830 |
88%|βββββββββββββββββββββββββββββββββββββββ | 204/232 [6:28:46<49:36, 106.31s/it]
|
| 831 |
88%|βββββββββββββββββββββββββββββββββββββββ | 205/232 [6:30:31<47:40, 105.93s/it]
|
| 832 |
|
| 833 |
+
|
| 834 |
88%|βββββββββββββββββββββββββββββββββββββββ | 205/232 [6:30:31<47:40, 105.93s/it]
|
| 835 |
89%|βββββββββββββββββββββββββββββββββββββββ | 206/232 [6:32:16<45:44, 105.54s/it]
|
| 836 |
|
| 837 |
+
|
| 838 |
89%|βββββββββββββββββββββββββββββββββββββββ | 206/232 [6:32:16<45:44, 105.54s/it]
|
| 839 |
89%|ββββββββββββββββββββββββββββββββββββββββ | 207/232 [6:34:01<43:55, 105.42s/it]
|
| 840 |
|
| 841 |
+
|
| 842 |
89%|ββββββββββββββββββββββββββββββββββββββββ | 207/232 [6:34:01<43:55, 105.42s/it]
|
| 843 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 208/232 [6:35:46<42:08, 105.34s/it]
|
| 844 |
|
| 845 |
+
|
| 846 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 208/232 [6:35:46<42:08, 105.34s/it]
|
| 847 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 209/232 [6:37:31<40:18, 105.14s/it]
|
| 848 |
|
| 849 |
+
|
| 850 |
90%|ββββββββββββββββββββββββββββββββββββββββ | 209/232 [6:37:31<40:18, 105.14s/it]
|
| 851 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 210/232 [6:39:17<38:38, 105.40s/it]
|
| 852 |
|
| 853 |
+
|
| 854 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 210/232 [6:39:17<38:38, 105.40s/it]
|
| 855 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 211/232 [6:41:01<36:49, 105.19s/it]
|
| 856 |
|
| 857 |
+
|
| 858 |
91%|ββββββββββββββββββββββββββββββββββββββββ | 211/232 [6:41:01<36:49, 105.19s/it]
|
| 859 |
91%|βββββββββββββββββββββββββββββββββββββββββ | 212/232 [6:42:46<35:03, 105.16s/it]
|
| 860 |
|
| 861 |
+
|
| 862 |
91%|βββββββββββββββββββββββββββββββββββββββββ | 212/232 [6:42:47<35:03, 105.16s/it]
|
| 863 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 213/232 [6:44:32<33:17, 105.15s/it]
|
| 864 |
|
| 865 |
+
|
| 866 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 213/232 [6:44:32<33:17, 105.15s/it]
|
| 867 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 214/232 [6:46:17<31:31, 105.09s/it]
|
| 868 |
|
| 869 |
+
|
| 870 |
92%|βββββββββββββββββββββββββββββββββββββββββ | 214/232 [6:46:17<31:31, 105.09s/it]
|
| 871 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 215/232 [6:48:02<29:47, 105.15s/it]
|
| 872 |
|
| 873 |
+
|
| 874 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 215/232 [6:48:02<29:47, 105.15s/it]
|
| 875 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 216/232 [6:49:47<28:04, 105.28s/it]
|
| 876 |
|
| 877 |
+
|
| 878 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 216/232 [6:49:47<28:04, 105.28s/it][2025-10-07 18:41:06,193] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:8314] Running evaluation step...
|
| 879 |
+
[2025-10-07 18:41:08,512] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.050823450088501
|
| 880 |
+
[2025-10-07 18:41:09,574] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.061586856842041
|
| 881 |
+
[2025-10-07 18:41:10,635] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.060603380203247
|
| 882 |
+
[2025-10-07 18:41:11,689] [DEBUG] [axolotl.utils.samplers.multipack.__len__:458] [PID:8314] generate_batches time: 1.0533573627471924
|
| 883 |
+
[2025-10-07 18:41:11,690] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:434] [PID:8314] gather_len_batches: [47, 47]
|
| 884 |
+
|
| 885 |
+
|
| 886 |
0%| | 0/23 [00:00<?, ?it/s][A
|
| 887 |
+
|
| 888 |
9%|βββββ | 2/23 [00:08<01:25, 4.06s/it][A
|
| 889 |
+
|
| 890 |
13%|βββββββ | 3/23 [00:16<01:55, 5.76s/it][A
|
| 891 |
+
|
| 892 |
17%|βββββββββ | 4/23 [00:24<02:06, 6.66s/it][A
|
| 893 |
+
|
| 894 |
22%|βββββββββββ | 5/23 [00:32<02:10, 7.23s/it][A
|
| 895 |
+
|
| 896 |
26%|βββββββββββββ | 6/23 [00:40<02:08, 7.56s/it][A
|
| 897 |
+
|
| 898 |
30%|ββββββββββββββββ | 7/23 [00:49<02:03, 7.74s/it][A
|
| 899 |
+
|
| 900 |
35%|ββββββββββββββββββ | 8/23 [00:57<01:58, 7.87s/it][A
|
| 901 |
+
|
| 902 |
39%|ββββββββββββββββββββ | 9/23 [01:05<01:52, 8.02s/it][A
|
| 903 |
+
|
| 904 |
43%|ββββββββββββββββββββββ | 10/23 [01:13<01:44, 8.06s/it][A
|
| 905 |
+
|
| 906 |
48%|ββββββββββββββββββββββββ | 11/23 [01:21<01:37, 8.09s/it][A
|
| 907 |
+
|
| 908 |
52%|ββββββββββββββββββββββββββ | 12/23 [01:30<01:29, 8.11s/it][A
|
| 909 |
+
|
| 910 |
57%|ββββββββββββββββββββββββββββ | 13/23 [01:38<01:21, 8.18s/it][A
|
| 911 |
+
|
| 912 |
61%|ββββββββββββββββββββββββββββββ | 14/23 [01:46<01:13, 8.17s/it][A
|
| 913 |
+
|
| 914 |
65%|ββββββββββββββββββββββββββββββββ | 15/23 [01:54<01:05, 8.16s/it][A
|
| 915 |
+
|
| 916 |
70%|ββββββββββββββββββββββββββββββββββ | 16/23 [02:01<00:55, 7.90s/it][A
|
| 917 |
+
|
| 918 |
74%|βββββββββββββββββββββββββββββββββββββ | 17/23 [02:10<00:48, 8.01s/it][A
|
| 919 |
+
|
| 920 |
78%|βββββββββββββββββββββββββββββββββββββββ | 18/23 [02:18<00:40, 8.06s/it][A
|
| 921 |
+
|
| 922 |
83%|βββββββββββββββββββββββββββββββββββββββββ | 19/23 [02:26<00:32, 8.09s/it][A
|
| 923 |
+
|
| 924 |
87%|βββββββββββββββββββββββββββββββββββββββββββ | 20/23 [02:34<00:24, 8.11s/it][A
|
| 925 |
+
|
| 926 |
91%|βββββββββββββββββββββββββββββββββββββββββββββ | 21/23 [02:43<00:16, 8.17s/it][A
|
| 927 |
+
|
| 928 |
96%|βββββββββββββββββββββββββββββββββββββββββββββββ | 22/23 [02:51<00:08, 8.18s/it][A
|
| 929 |
+
|
| 930 |
|
| 931 |
+
|
| 932 |
|
| 933 |
+
|
| 934 |
93%|βββββββββββββββββββββββββββββββββββββββββ | 216/232 [6:53:01<28:04, 105.28s/it]
|
| 935 |
+
|
| 936 |
+
|
| 937 |
[A[2025-10-07 18:44:19,920] [WARNING] [py.warnings._showwarnmsg:110] [PID:8314] /root/miniconda3/envs/py3.11/lib/python3.11/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py:680: FutureWarning: FSDP.state_dict_type() and FSDP.set_state_dict_type() are being deprecated. Please use APIs, get_state_dict() and set_state_dict(), which can support different parallelisms, FSDP1, FSDP2, DDP. API doc: https://pytorch.org/docs/stable/distributed.checkpoint.html#torch.distributed.checkpoint.state_dict.get_state_dict .Tutorial: https://pytorch.org/tutorials/recipes/distributed_checkpoint_recipe.html .
|
| 938 |
+
warnings.warn(
|
| 939 |
+
|
| 940 |
+
[2025-10-07 18:44:30,660] [INFO] [axolotl.core.trainers.base._save:671] [PID:8314] Saving model checkpoint to ckpts-mmarv/checkpoint-216
|
| 941 |
+
|
| 942 |
94%|ββββββββββββββββββββββββββββββββββββββββββ | 217/232 [6:55:11<42:40, 170.68s/it]
|
| 943 |
|
| 944 |
+
|
| 945 |
94%|ββββββββββββββββββββββββββββββββββββββββββ | 217/232 [6:55:11<42:40, 170.68s/it]
|