zypchn commited on
Commit
203ed33
·
verified ·
1 Parent(s): 9b289c1

Training in progress, epoch 1

Browse files
adapter_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "nferruz/ProtGPT2",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": null,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 16,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": null,
22
+ "peft_type": "LORA",
23
+ "qalora_group_size": 16,
24
+ "r": 32,
25
+ "rank_pattern": {},
26
+ "revision": null,
27
+ "target_modules": [
28
+ "c_attn",
29
+ "c_proj",
30
+ "c_fc"
31
+ ],
32
+ "target_parameters": [],
33
+ "task_type": "CAUSAL_LM",
34
+ "trainable_token_indices": null,
35
+ "use_dora": false,
36
+ "use_qalora": false,
37
+ "use_rslora": false
38
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab1a9a92ffff873459d1f10e2eb9bc52bee197b09bdcbc4b89dd4e69d5cb082
3
+ size 94409168
config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "architectures": [
4
+ "GPT2LMHeadModel"
5
+ ],
6
+ "attn_pdrop": 0.1,
7
+ "bos_token_id": 0,
8
+ "dtype": "float16",
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 0,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 1280,
16
+ "n_head": 20,
17
+ "n_inner": null,
18
+ "n_layer": 36,
19
+ "n_positions": 2048,
20
+ "quantization_config": {
21
+ "_load_in_4bit": true,
22
+ "_load_in_8bit": false,
23
+ "bnb_4bit_compute_dtype": "float16",
24
+ "bnb_4bit_quant_storage": "bfloat16",
25
+ "bnb_4bit_quant_type": "nf4",
26
+ "bnb_4bit_use_double_quant": true,
27
+ "llm_int8_enable_fp32_cpu_offload": false,
28
+ "llm_int8_has_fp16_weight": false,
29
+ "llm_int8_skip_modules": null,
30
+ "llm_int8_threshold": 6.0,
31
+ "load_in_4bit": true,
32
+ "load_in_8bit": false,
33
+ "quant_method": "bitsandbytes"
34
+ },
35
+ "reorder_and_upcast_attn": false,
36
+ "resid_pdrop": 0.1,
37
+ "scale_attn_by_inverse_layer_idx": false,
38
+ "scale_attn_weights": true,
39
+ "summary_activation": null,
40
+ "summary_first_dropout": 0.1,
41
+ "summary_proj_to_labels": true,
42
+ "summary_type": "cls_index",
43
+ "summary_use_proj": true,
44
+ "task_specific_params": {
45
+ "text-generation": {
46
+ "do_sample": true,
47
+ "max_length": 50
48
+ }
49
+ },
50
+ "transformers_version": "4.57.0",
51
+ "use_cache": false,
52
+ "vocab_size": 50257
53
+ }
debug.log ADDED
@@ -0,0 +1,640 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0
  0% 0/549 [00:00<?, ?it/s][2025-10-10 13:11:51,161] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
1
  0% 0/4 [00:00<?, ?it/s]
 
2
  50% 2/4 [00:00<00:00, 3.33it/s]
 
3
  75% 3/4 [00:01<00:00, 1.68it/s]
 
4
 
 
5
 
 
6
  0% 0/549 [00:14<?, ?it/s]
 
 
7
  
8
  0% 1/549 [00:21<3:16:49, 21.55s/it]
9
  0% 2/549 [00:25<1:40:35, 11.03s/it]
10
  1% 3/549 [00:28<1:09:56, 7.69s/it]
11
  1% 4/549 [00:32<55:34, 6.12s/it]
12
  1% 5/549 [00:36<47:37, 5.25s/it]
13
  1% 6/549 [00:40<42:50, 4.73s/it][2025-10-10 13:12:31,245] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
14
  0% 0/4 [00:00<?, ?it/s]
 
15
  50% 2/4 [00:01<00:01, 1.90it/s]
 
16
  75% 3/4 [00:02<00:00, 1.36it/s]
 
17
 
 
18
 
 
19
  1% 6/549 [00:52<42:50, 4.73s/it]
 
 
20
  
21
  1% 7/549 [00:56<1:17:17, 8.56s/it]
22
  1% 8/549 [01:00<1:03:22, 7.03s/it]
23
  2% 9/549 [01:04<54:04, 6.01s/it]
24
  2% 10/549 [01:07<47:47, 5.32s/it]
25
  2% 11/549 [01:11<43:30, 4.85s/it]
26
  2% 12/549 [01:15<40:33, 4.53s/it][2025-10-10 13:13:06,562] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
27
  0% 0/4 [00:00<?, ?it/s]
 
28
  50% 2/4 [00:01<00:01, 1.86it/s]
 
29
  75% 3/4 [00:02<00:00, 1.34it/s]
 
30
 
 
31
 
 
32
  2% 12/549 [01:28<40:33, 4.53s/it]
 
 
33
  
34
  2% 13/549 [01:31<1:12:45, 8.14s/it]
35
  3% 14/549 [01:35<1:00:59, 6.84s/it]
36
  3% 15/549 [01:39<52:49, 5.93s/it]
37
  3% 16/549 [01:43<47:08, 5.31s/it]
38
  3% 17/549 [01:47<43:11, 4.87s/it]
39
  3% 18/549 [01:51<40:28, 4.57s/it][2025-10-10 13:13:42,268] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
40
  0% 0/4 [00:00<?, ?it/s]
 
41
  50% 2/4 [00:01<00:01, 1.83it/s]
 
42
  75% 3/4 [00:02<00:00, 1.32it/s]
 
43
 
 
44
 
 
45
  3% 18/549 [02:03<40:28, 4.57s/it]
 
 
46
  
47
  3% 19/549 [02:07<1:12:15, 8.18s/it]
48
  4% 20/549 [02:11<1:00:54, 6.91s/it]
49
  4% 21/549 [02:15<52:54, 6.01s/it]
50
  4% 22/549 [02:19<47:19, 5.39s/it]
51
  4% 23/549 [02:23<43:26, 4.96s/it]
52
  4% 24/549 [02:27<40:38, 4.64s/it][2025-10-10 13:14:18,514] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
53
  0% 0/4 [00:00<?, ?it/s]
 
54
  50% 2/4 [00:01<00:01, 1.85it/s]
 
55
  75% 3/4 [00:02<00:00, 1.33it/s]
 
56
 
 
57
 
 
58
  4% 24/549 [02:39<40:38, 4.64s/it]
 
 
59
  
60
  5% 25/549 [02:43<1:10:34, 8.08s/it]
61
  5% 26/549 [02:47<59:22, 6.81s/it]
62
  5% 27/549 [02:51<51:30, 5.92s/it]
63
  5% 28/549 [02:54<45:59, 5.30s/it]
64
  5% 29/549 [02:58<42:11, 4.87s/it]
65
  5% 30/549 [03:02<39:29, 4.57s/it][2025-10-10 13:14:53,875] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
66
  0% 0/4 [00:00<?, ?it/s]
 
67
  50% 2/4 [00:01<00:01, 1.85it/s]
 
68
  75% 3/4 [00:02<00:00, 1.32it/s]
 
69
 
 
70
 
 
71
  5% 30/549 [03:14<39:29, 4.57s/it]
 
 
72
  
73
  6% 31/549 [03:18<1:08:40, 7.95s/it]
74
  6% 32/549 [03:22<57:59, 6.73s/it]
75
  6% 33/549 [03:26<50:32, 5.88s/it]
76
  6% 34/549 [03:30<45:20, 5.28s/it]
77
  6% 35/549 [03:34<41:45, 4.87s/it]
78
  7% 36/549 [03:38<39:13, 4.59s/it][2025-10-10 13:15:29,235] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
79
  0% 0/4 [00:00<?, ?it/s]
 
80
  50% 2/4 [00:01<00:01, 1.84it/s]
 
81
  75% 3/4 [00:02<00:00, 1.32it/s]
 
82
 
 
83
 
 
84
  7% 36/549 [03:50<39:13, 4.59s/it]
 
 
85
  
86
  7% 37/549 [03:53<1:08:00, 7.97s/it]
87
  7% 38/549 [03:57<57:23, 6.74s/it]
88
  7% 39/549 [04:01<50:02, 5.89s/it]
89
  7% 40/549 [04:05<44:51, 5.29s/it]
90
  7% 41/549 [04:09<41:10, 4.86s/it]
91
  8% 42/549 [04:13<38:40, 4.58s/it][2025-10-10 13:16:04,533] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
92
  0% 0/4 [00:00<?, ?it/s]
 
93
  50% 2/4 [00:01<00:01, 1.85it/s]
 
94
  75% 3/4 [00:02<00:00, 1.33it/s]
 
95
 
 
96
 
 
97
  8% 42/549 [04:25<38:40, 4.58s/it]
 
 
98
  
99
  8% 43/549 [04:29<1:07:37, 8.02s/it]
100
  8% 44/549 [04:33<56:57, 6.77s/it]
101
  8% 45/549 [04:37<49:35, 5.90s/it]
102
  8% 46/549 [04:41<44:25, 5.30s/it]
103
  9% 47/549 [04:44<40:49, 4.88s/it]
104
  9% 48/549 [04:48<38:22, 4.60s/it][2025-10-10 13:16:40,037] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
105
  0% 0/4 [00:00<?, ?it/s]
 
106
  50% 2/4 [00:01<00:01, 1.84it/s]
 
107
  75% 3/4 [00:02<00:00, 1.32it/s]
 
108
 
 
109
 
 
110
  9% 48/549 [05:01<38:22, 4.60s/it]
 
 
111
  
112
  9% 49/549 [05:05<1:07:36, 8.11s/it]
113
  9% 50/549 [05:09<56:51, 6.84s/it]
114
  9% 51/549 [05:12<49:28, 5.96s/it]
115
  9% 52/549 [05:16<44:15, 5.34s/it]
116
  10% 53/549 [05:20<40:31, 4.90s/it]
117
  10% 54/549 [05:24<36:22, 4.41s/it][2025-10-10 13:17:15,168] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
118
  0% 0/4 [00:00<?, ?it/s]
 
119
  50% 2/4 [00:01<00:01, 1.85it/s]
 
120
  75% 3/4 [00:02<00:00, 1.33it/s]
 
121
 
 
122
 
 
123
  10% 54/549 [05:36<36:22, 4.41s/it]
 
 
124
  
125
  10% 55/549 [05:40<1:06:03, 8.02s/it]
126
  10% 56/549 [05:44<55:34, 6.76s/it]
127
  10% 57/549 [05:48<48:18, 5.89s/it]
128
  11% 58/549 [05:52<43:15, 5.29s/it]
129
  11% 59/549 [05:55<39:46, 4.87s/it]
130
  11% 60/549 [05:59<37:18, 4.58s/it][2025-10-10 13:17:50,975] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
131
  0% 0/4 [00:00<?, ?it/s]
 
132
  50% 2/4 [00:01<00:01, 1.85it/s]
 
133
  75% 3/4 [00:02<00:00, 1.33it/s]
 
134
 
 
135
 
 
136
  11% 60/549 [06:13<37:18, 4.58s/it]
 
 
137
  
138
  11% 61/549 [06:17<1:09:20, 8.52s/it]
139
  11% 62/549 [06:21<57:52, 7.13s/it]
140
  11% 63/549 [06:25<49:53, 6.16s/it]
141
  12% 64/549 [06:29<44:24, 5.49s/it]
142
  12% 65/549 [06:33<40:32, 5.03s/it]
143
  12% 66/549 [06:37<37:50, 4.70s/it][2025-10-10 13:18:28,296] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
144
  0% 0/4 [00:00<?, ?it/s]
 
145
  50% 2/4 [00:01<00:01, 1.80it/s]
 
146
  75% 3/4 [00:02<00:00, 1.31it/s]
 
147
 
 
148
 
 
149
  12% 66/549 [06:53<37:50, 4.70s/it]
 
 
150
  
151
  12% 67/549 [06:57<1:14:42, 9.30s/it]
152
  12% 68/549 [07:01<1:01:33, 7.68s/it]
153
  13% 69/549 [07:04<52:22, 6.55s/it]
154
  13% 70/549 [07:08<45:55, 5.75s/it]
155
  13% 71/549 [07:12<41:24, 5.20s/it]
156
  13% 72/549 [07:16<38:15, 4.81s/it][2025-10-10 13:19:07,848] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
157
  0% 0/4 [00:00<?, ?it/s]
 
158
  50% 2/4 [00:01<00:01, 1.84it/s]
 
159
  75% 3/4 [00:02<00:00, 1.33it/s]
 
160
 
 
161
 
 
162
  13% 72/549 [07:29<38:15, 4.81s/it]
 
 
163
  
164
  13% 73/549 [07:33<1:05:59, 8.32s/it]
165
  13% 74/549 [07:36<53:50, 6.80s/it]
166
  14% 75/549 [07:40<46:47, 5.92s/it]
167
  14% 76/549 [07:44<41:57, 5.32s/it]
168
  14% 77/549 [07:48<38:32, 4.90s/it]
169
  14% 78/549 [07:52<36:06, 4.60s/it][2025-10-10 13:19:43,212] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
170
  0% 0/4 [00:00<?, ?it/s]
 
171
  50% 2/4 [00:01<00:01, 1.85it/s]
 
172
  75% 3/4 [00:02<00:00, 1.33it/s]
 
173
 
 
174
 
 
175
  14% 78/549 [08:04<36:06, 4.60s/it]
 
 
176
  
177
  14% 79/549 [08:08<1:03:59, 8.17s/it]
178
  15% 80/549 [08:12<53:44, 6.88s/it]
179
  15% 81/549 [08:16<46:34, 5.97s/it]
180
  15% 82/549 [08:20<41:37, 5.35s/it]
181
  15% 83/549 [08:24<38:10, 4.91s/it]
182
  15% 84/549 [08:27<35:43, 4.61s/it][2025-10-10 13:20:19,124] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
183
  0% 0/4 [00:00<?, ?it/s]
 
184
  50% 2/4 [00:01<00:01, 1.85it/s]
 
185
  75% 3/4 [00:02<00:00, 1.33it/s]
 
186
 
 
187
 
 
188
  15% 84/549 [08:40<35:43, 4.61s/it]
 
 
189
  
190
  15% 85/549 [08:44<1:03:06, 8.16s/it]
191
  16% 86/549 [08:48<53:01, 6.87s/it]
192
  16% 87/549 [08:52<45:57, 5.97s/it]
193
  16% 88/549 [08:56<41:08, 5.35s/it]
194
  16% 89/549 [08:59<37:43, 4.92s/it]
195
  16% 90/549 [09:03<35:17, 4.61s/it][2025-10-10 13:20:55,024] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
196
  0% 0/4 [00:00<?, ?it/s]
 
197
  50% 2/4 [00:01<00:01, 1.85it/s]
 
198
  75% 3/4 [00:02<00:00, 1.33it/s]
 
199
 
 
200
 
 
201
  16% 90/549 [09:16<35:17, 4.61s/it]
 
 
202
  
203
  17% 91/549 [09:20<1:02:22, 8.17s/it]
204
  17% 92/549 [09:24<52:22, 6.88s/it]
205
  17% 93/549 [09:28<45:23, 5.97s/it]
206
  17% 94/549 [09:31<40:32, 5.35s/it]
207
  17% 95/549 [09:35<37:09, 4.91s/it]
208
  17% 96/549 [09:39<34:46, 4.61s/it][2025-10-10 13:21:30,890] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
209
  0% 0/4 [00:00<?, ?it/s]
 
210
  50% 2/4 [00:01<00:01, 1.85it/s]
 
211
  75% 3/4 [00:02<00:00, 1.33it/s]
 
212
 
 
213
 
 
214
  17% 96/549 [09:52<34:46, 4.61s/it]
 
 
215
  
216
  18% 97/549 [09:56<1:02:02, 8.24s/it]
217
  18% 98/549 [10:00<52:01, 6.92s/it]
218
  18% 99/549 [10:04<45:02, 6.00s/it]
219
  18% 100/549 [10:08<40:10, 5.37s/it]
220
 
 
221
  18% 100/549 [10:08<40:10, 5.37s/it]
222
  18% 101/549 [10:12<36:58, 4.95s/it]
223
  19% 102/549 [10:15<34:32, 4.64s/it][2025-10-10 13:22:07,077] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
224
  0% 0/4 [00:00<?, ?it/s]
 
225
  50% 2/4 [00:01<00:01, 1.85it/s]
 
226
  75% 3/4 [00:02<00:00, 1.33it/s]
 
227
 
 
228
 
 
229
  19% 102/549 [10:28<34:32, 4.64s/it]
 
 
230
  
231
  19% 103/549 [10:32<1:00:52, 8.19s/it]
232
  19% 104/549 [10:36<51:10, 6.90s/it]
233
  19% 105/549 [10:40<44:23, 6.00s/it]
234
  19% 106/549 [10:44<39:36, 5.36s/it]
235
  19% 107/549 [10:48<36:21, 4.94s/it]
236
  20% 108/549 [10:51<34:00, 4.63s/it][2025-10-10 13:22:43,069] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
237
  0% 0/4 [00:00<?, ?it/s]
 
238
  50% 2/4 [00:01<00:01, 1.85it/s]
 
239
  75% 3/4 [00:02<00:00, 1.33it/s]
 
240
 
 
241
 
 
242
  20% 108/549 [11:04<34:00, 4.63s/it]
 
 
243
  
244
  20% 109/549 [11:08<59:10, 8.07s/it]
245
  20% 110/549 [11:11<49:49, 6.81s/it]
246
  20% 111/549 [11:15<43:16, 5.93s/it]
247
  20% 112/549 [11:19<38:43, 5.32s/it]
248
  21% 113/549 [11:23<35:29, 4.89s/it]
249
  21% 114/549 [11:27<33:16, 4.59s/it][2025-10-10 13:23:18,582] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
250
  0% 0/4 [00:00<?, ?it/s]
 
251
  50% 2/4 [00:01<00:01, 1.85it/s]
 
252
  75% 3/4 [00:02<00:00, 1.32it/s]
 
253
 
 
254
 
 
255
  21% 114/549 [11:39<33:16, 4.59s/it]
 
 
256
  
257
  21% 115/549 [11:43<57:40, 7.97s/it]
258
  21% 116/549 [11:47<48:37, 6.74s/it]
259
  21% 117/549 [11:51<42:19, 5.88s/it]
260
  21% 118/549 [11:54<37:57, 5.28s/it]
261
  22% 119/549 [11:58<34:52, 4.87s/it]
262
  22% 120/549 [12:02<32:40, 4.57s/it][2025-10-10 13:23:53,849] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
263
  0% 0/4 [00:00<?, ?it/s]
 
264
  50% 2/4 [00:01<00:01, 1.85it/s]
 
265
  75% 3/4 [00:02<00:00, 1.33it/s]
 
266
 
 
267
 
 
268
  22% 120/549 [12:14<32:40, 4.57s/it]
 
 
269
  
270
  22% 121/549 [12:18<56:50, 7.97s/it]
271
  22% 122/549 [12:22<47:53, 6.73s/it]
272
  22% 123/549 [12:26<41:44, 5.88s/it]
273
  23% 124/549 [12:30<37:24, 5.28s/it]
274
  23% 125/549 [12:34<34:22, 4.86s/it]
275
  23% 126/549 [12:38<32:18, 4.58s/it][2025-10-10 13:24:29,185] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
276
  0% 0/4 [00:00<?, ?it/s]
 
277
  50% 2/4 [00:01<00:01, 1.85it/s]
 
278
  75% 3/4 [00:02<00:00, 1.33it/s]
 
279
 
 
280
 
 
281
  23% 126/549 [12:50<32:18, 4.58s/it]
 
 
282
  
283
  23% 127/549 [12:54<56:47, 8.08s/it]
284
  23% 128/549 [12:58<47:47, 6.81s/it]
285
  23% 129/549 [13:02<41:35, 5.94s/it]
286
  24% 130/549 [13:05<37:15, 5.33s/it]
287
  24% 131/549 [13:09<34:11, 4.91s/it]
288
  24% 132/549 [13:13<32:04, 4.61s/it][2025-10-10 13:25:04,943] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
289
  0% 0/4 [00:00<?, ?it/s]
 
290
  50% 2/4 [00:01<00:01, 1.85it/s]
 
291
  75% 3/4 [00:02<00:00, 1.33it/s]
 
292
 
 
293
 
 
294
  24% 132/549 [13:26<32:04, 4.61s/it]
 
 
295
  
296
  24% 133/549 [13:30<56:33, 8.16s/it]
297
  24% 134/549 [13:34<47:29, 6.87s/it]
298
  25% 135/549 [13:37<41:08, 5.96s/it]
299
  25% 136/549 [13:41<36:43, 5.34s/it]
300
  25% 137/549 [13:45<33:38, 4.90s/it]
301
  25% 138/549 [13:49<31:27, 4.59s/it][2025-10-10 13:25:40,704] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
302
  0% 0/4 [00:00<?, ?it/s]
 
303
  50% 2/4 [00:01<00:01, 1.85it/s]
 
304
  75% 3/4 [00:02<00:00, 1.33it/s]
 
305
 
 
306
 
 
307
  25% 138/549 [14:02<31:27, 4.59s/it]
 
 
308
  
309
  25% 139/549 [14:06<55:44, 8.16s/it]
310
  26% 140/549 [14:09<46:48, 6.87s/it]
311
  26% 141/549 [14:13<40:35, 5.97s/it]
312
  26% 142/549 [14:17<36:14, 5.34s/it]
313
  26% 143/549 [14:21<33:13, 4.91s/it]
314
  26% 144/549 [14:25<31:06, 4.61s/it][2025-10-10 13:26:16,597] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
315
  0% 0/4 [00:00<?, ?it/s]
 
316
  50% 2/4 [00:01<00:01, 1.85it/s]
 
317
  75% 3/4 [00:02<00:00, 1.33it/s]
 
318
 
 
319
 
 
320
  26% 144/549 [14:38<31:06, 4.61s/it]
 
 
321
  
322
  26% 145/549 [14:41<55:08, 8.19s/it]
323
  27% 146/549 [14:45<46:16, 6.89s/it]
324
  27% 147/549 [14:49<40:05, 5.98s/it]
325
  27% 148/549 [14:53<35:50, 5.36s/it]
326
  27% 149/549 [14:57<32:50, 4.93s/it]
327
  27% 150/549 [15:01<30:40, 4.61s/it][2025-10-10 13:26:52,575] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
328
  0% 0/4 [00:00<?, ?it/s]
 
329
  50% 2/4 [00:01<00:01, 1.85it/s]
 
330
  75% 3/4 [00:02<00:00, 1.33it/s]
 
331
 
 
332
 
 
333
  27% 150/549 [15:14<30:40, 4.61s/it]
 
 
334
  
335
  28% 151/549 [15:17<54:17, 8.18s/it]
336
  28% 152/549 [15:21<45:33, 6.89s/it]
337
  28% 153/549 [15:25<39:27, 5.98s/it]
338
  28% 154/549 [15:29<35:14, 5.35s/it]
339
  28% 155/549 [15:33<32:17, 4.92s/it]
340
  28% 156/549 [15:36<28:59, 4.43s/it][2025-10-10 13:27:27,880] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
341
  0% 0/4 [00:00<?, ?it/s]
 
342
  50% 2/4 [00:01<00:01, 1.85it/s]
 
343
  75% 3/4 [00:02<00:00, 1.33it/s]
 
344
 
 
345
 
 
346
  28% 156/549 [15:49<28:59, 4.43s/it]
 
 
347
  
348
  29% 157/549 [15:53<52:50, 8.09s/it]
349
  29% 158/549 [15:57<44:26, 6.82s/it]
350
  29% 159/549 [16:01<38:37, 5.94s/it]
351
  29% 160/549 [16:04<34:31, 5.33s/it]
352
  29% 161/549 [16:08<31:41, 4.90s/it]
353
  30% 162/549 [16:12<29:40, 4.60s/it][2025-10-10 13:28:03,961] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
354
  0% 0/4 [00:00<?, ?it/s]
 
355
  50% 2/4 [00:01<00:01, 1.85it/s]
 
356
  75% 3/4 [00:02<00:00, 1.33it/s]
 
357
 
 
358
 
 
359
  30% 162/549 [16:25<29:40, 4.60s/it]
 
 
360
  
361
  30% 163/549 [16:29<52:53, 8.22s/it]
362
  30% 164/549 [16:33<44:20, 6.91s/it]
363
  30% 165/549 [16:37<38:21, 5.99s/it]
364
  30% 166/549 [16:41<34:12, 5.36s/it]
365
  30% 167/549 [16:44<31:21, 4.92s/it]
366
  31% 168/549 [16:48<29:18, 4.62s/it][2025-10-10 13:28:40,029] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
367
  0% 0/4 [00:00<?, ?it/s]
 
368
  50% 2/4 [00:01<00:01, 1.85it/s]
 
369
  75% 3/4 [00:02<00:00, 1.33it/s]
 
370
 
 
371
 
 
372
  31% 168/549 [17:01<29:18, 4.62s/it]
 
 
373
  
374
  31% 169/549 [17:05<52:01, 8.21s/it]
375
  31% 170/549 [17:09<43:37, 6.91s/it]
376
  31% 171/549 [17:13<37:46, 6.00s/it]
377
  31% 172/549 [17:17<33:43, 5.37s/it]
378
  32% 173/549 [17:21<30:52, 4.93s/it]
379
  32% 174/549 [17:24<28:52, 4.62s/it][2025-10-10 13:29:16,068] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
380
  0% 0/4 [00:00<?, ?it/s]
 
381
  50% 2/4 [00:01<00:01, 1.85it/s]
 
382
  75% 3/4 [00:02<00:00, 1.33it/s]
 
383
 
 
384
 
 
385
  32% 174/549 [17:37<28:52, 4.62s/it]
 
 
386
  
387
  32% 175/549 [17:41<51:37, 8.28s/it]
388
  32% 176/549 [17:45<43:14, 6.96s/it]
389
  32% 177/549 [17:49<37:23, 6.03s/it]
390
  32% 178/549 [17:53<33:19, 5.39s/it]
391
  33% 179/549 [17:57<30:29, 4.94s/it]
392
  33% 180/549 [18:01<28:28, 4.63s/it][2025-10-10 13:29:52,329] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
 
 
 
 
 
 
 
393
  0% 0/4 [00:00<?, ?it/s]
 
394
  50% 2/4 [00:01<00:01, 1.85it/s]
 
395
  75% 3/4 [00:02<00:00, 1.33it/s]
 
396
 
 
397
 
 
398
  33% 180/549 [18:13<28:28, 4.63s/it]
 
 
399
  
400
  33% 181/549 [18:17<49:55, 8.14s/it]
401
  33% 182/549 [18:21<41:59, 6.86s/it]
402
  33% 183/549 [18:25<36:37, 6.00s/it][2025-10-10 13:30:16,541] [INFO] [axolotl.core.trainers.base._save:671] [PID:24741] Saving model checkpoint to ./qlora-out/checkpoint-183
 
1
+ [2025-10-10 13:10:41,462] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:24741] baseline 0.000GB (+0.000GB allocated, +0.002GB reserved)
2
+ [2025-10-10 13:10:41,462] [INFO] [axolotl.cli.config.load_cfg:248] [PID:24741] config:
3
+ {
4
+ "activation_offloading": false,
5
+ "adapter": "qlora",
6
+ "axolotl_config_path": "config.yaml",
7
+ "base_model": "nferruz/ProtGPT2",
8
+ "base_model_config": "nferruz/ProtGPT2",
9
+ "batch_size": 2,
10
+ "bf16": false,
11
+ "capabilities": {
12
+ "bf16": true,
13
+ "compute_capability": "sm_75",
14
+ "fp8": false,
15
+ "n_gpu": 1,
16
+ "n_node": 1
17
+ },
18
+ "context_parallel_size": 1,
19
+ "dataloader_num_workers": 1,
20
+ "dataloader_pin_memory": true,
21
+ "dataloader_prefetch_factor": 256,
22
+ "dataset_processes": 2,
23
+ "datasets": [
24
+ {
25
+ "ds_type": "json",
26
+ "message_property_mappings": {
27
+ "content": "content",
28
+ "role": "role"
29
+ },
30
+ "path": "/content/sequences_tokenized.jsonl",
31
+ "trust_remote_code": false
32
+ }
33
+ ],
34
+ "ddp": false,
35
+ "device": "cuda:0",
36
+ "dion_rank_fraction": 1.0,
37
+ "dion_rank_multiple_of": 1,
38
+ "env_capabilities": {
39
+ "torch_version": "2.8.0"
40
+ },
41
+ "eval_batch_size": 2,
42
+ "eval_causal_lm_metrics": [
43
+ "sacrebleu",
44
+ "comet",
45
+ "ter",
46
+ "chrf"
47
+ ],
48
+ "eval_max_new_tokens": 128,
49
+ "eval_sample_packing": true,
50
+ "eval_steps": 0.01,
51
+ "eval_table_size": 0,
52
+ "experimental_skip_move_to_device": true,
53
+ "fp16": true,
54
+ "gradient_accumulation_steps": 1,
55
+ "gradient_checkpointing": true,
56
+ "gradient_checkpointing_kwargs": {
57
+ "use_reentrant": true
58
+ },
59
+ "group_by_length": false,
60
+ "hub_model_id": "ProtGPT2-Oxido",
61
+ "include_tkps": true,
62
+ "is_falcon_derived_model": false,
63
+ "is_llama_derived_model": false,
64
+ "is_mistral_derived_model": false,
65
+ "learning_rate": 0.002,
66
+ "lisa_layers_attribute": "model.layers",
67
+ "load_best_model_at_end": false,
68
+ "load_in_4bit": true,
69
+ "load_in_8bit": false,
70
+ "local_rank": 0,
71
+ "logging_steps": 100,
72
+ "lora_alpha": 16,
73
+ "lora_dropout": 0.05,
74
+ "lora_r": 32,
75
+ "lora_target_linear": true,
76
+ "loraplus_lr_embedding": 1e-06,
77
+ "lr_scheduler": "cosine",
78
+ "mean_resizing_embeddings": false,
79
+ "micro_batch_size": 2,
80
+ "model_config_type": "gpt2",
81
+ "num_epochs": 3.0,
82
+ "optimizer": "paged_adamw_32bit",
83
+ "output_dir": "./qlora-out",
84
+ "pad_to_sequence_len": true,
85
+ "pretrain_multipack_attn": true,
86
+ "profiler_steps_start": 0,
87
+ "qlora_sharded_model_loading": false,
88
+ "ray_num_workers": 1,
89
+ "resources_per_worker": {
90
+ "GPU": 1
91
+ },
92
+ "sample_packing": true,
93
+ "sample_packing_bin_size": 200,
94
+ "sample_packing_group_size": 100000,
95
+ "save_only_model": false,
96
+ "save_safetensors": true,
97
+ "save_strategy": "epoch",
98
+ "sequence_len": 2048,
99
+ "shuffle_before_merging_datasets": false,
100
+ "shuffle_merged_datasets": true,
101
+ "skip_prepare_dataset": false,
102
+ "special_tokens": {
103
+ "eos_token": "<|endoftext|>",
104
+ "pad_token": "<|endoftext|>"
105
+ },
106
+ "streaming_multipack_buffer_size": 10000,
107
+ "strict": false,
108
+ "tensor_parallel_size": 1,
109
+ "tf32": false,
110
+ "tiled_mlp_use_original_mlp": true,
111
+ "tokenizer_config": "nferruz/ProtGPT2",
112
+ "tokenizer_save_jinja_files": true,
113
+ "tokenizer_type": "AutoTokenizer",
114
+ "torch_dtype": "torch.float16",
115
+ "train_on_inputs": false,
116
+ "trl": {
117
+ "log_completions": false,
118
+ "mask_truncated_completions": false,
119
+ "ref_model_mixup_alpha": 0.9,
120
+ "ref_model_sync_steps": 64,
121
+ "scale_rewards": true,
122
+ "sync_ref_model": false,
123
+ "use_vllm": false,
124
+ "vllm_server_host": "0.0.0.0",
125
+ "vllm_server_port": 8000
126
+ },
127
+ "type_of_model": "AutoModelForCausalLM",
128
+ "use_ray": false,
129
+ "val_set_size": 0.02,
130
+ "vllm": {
131
+ "device": "auto",
132
+ "dtype": "auto",
133
+ "gpu_memory_utilization": 0.9,
134
+ "host": "0.0.0.0",
135
+ "port": 8000
136
+ },
137
+ "warmup_steps": 100,
138
+ "weight_decay": 0.0,
139
+ "world_size": 1
140
+ }
141
+ [2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:24741] EOS: 0 / <|endoftext|>
142
+ [2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:24741] BOS: 0 / <|endoftext|>
143
+ [2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:24741] PAD: 0 / <|endoftext|>
144
+ [2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:24741] UNK: 0 / <|endoftext|>
145
+ [2025-10-10 13:10:42,465] [INFO] [axolotl.loaders.tokenizer.load_tokenizer:295] [PID:24741] No Chat template selected. Consider adding a chat template for easier inference.
146
+ [2025-10-10 13:10:42,466] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:476] [PID:24741] Unable to find prepared dataset in last_run_prepared/120d8e2ed44f3c537dc9a20773f86561
147
+ [2025-10-10 13:10:42,466] [INFO] [axolotl.utils.data.sft._load_raw_datasets:320] [PID:24741] Loading raw datasets...
148
+ [2025-10-10 13:10:42,466] [WARNING] [axolotl.utils.data.sft._load_raw_datasets:322] [PID:24741] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset using `axolotl preprocess path/to/config.yml`.
149
+ [2025-10-10 13:10:42,802] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:24741] Loading dataset: /content/sequences_tokenized.jsonl with base_type: None and prompt_style: None
150
+ [2025-10-10 13:10:42,820] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:218] [PID:24741] min_input_len: 6
151
+ [2025-10-10 13:10:42,821] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:220] [PID:24741] max_input_len: 512
152
+
153
+
154
+
155
+
156
+ [2025-10-10 13:10:45,620] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:406] [PID:24741] total_num_tokens: 16_570
157
+ [2025-10-10 13:10:45,622] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:424] [PID:24741] `total_supervised_tokens: 16_570`
158
+ [2025-10-10 13:10:48,083] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9218025207519531
159
+ [2025-10-10 13:10:49,020] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9363107681274414
160
+ [2025-10-10 13:10:49,934] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9140019416809082
161
+ [2025-10-10 13:10:50,862] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9271283149719238
162
+ [2025-10-10 13:10:50,882] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
163
+ [2025-10-10 13:10:50,882] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:483] [PID:24741] data_loader_len: 4
164
+ [2025-10-10 13:10:50,883] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:499] [PID:24741] sample_packing_eff_est across ranks: [0.8989800347222222]
165
+ [2025-10-10 13:10:50,883] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:511] [PID:24741] sample_packing_eff_est: None
166
+ [2025-10-10 13:10:50,883] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:522] [PID:24741] total_num_steps: 12
167
+ [2025-10-10 13:10:50,893] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:406] [PID:24741] total_num_tokens: 746_874
168
+ [2025-10-10 13:10:50,932] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:424] [PID:24741] `total_supervised_tokens: 746_874`
169
+ [2025-10-10 13:10:52,871] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9358129501342773
170
+ [2025-10-10 13:10:53,781] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9106135368347168
171
+ [2025-10-10 13:10:55,014] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2319858074188232
172
+ [2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2722358703613281
173
+ [2025-10-10 13:10:56,287] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [183]
174
+ [2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:483] [PID:24741] data_loader_len: 183
175
+ [2025-10-10 13:10:56,287] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:499] [PID:24741] sample_packing_eff_est across ranks: [0.9936909272820164]
176
+ [2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:511] [PID:24741] sample_packing_eff_est: 1.0
177
+ [2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:522] [PID:24741] total_num_steps: 549
178
+ [2025-10-10 13:10:56,287] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:24741] Maximum number of steps set at 549
179
+ [2025-10-10 13:10:56,297] [DEBUG] [axolotl.train.setup_model_and_tokenizer:65] [PID:24741] Loading tokenizer... nferruz/ProtGPT2
180
+ [2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:24741] EOS: 0 / <|endoftext|>
181
+ [2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:24741] BOS: 0 / <|endoftext|>
182
+ [2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:24741] PAD: 0 / <|endoftext|>
183
+ [2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:24741] UNK: 0 / <|endoftext|>
184
+ [2025-10-10 13:10:57,214] [INFO] [axolotl.loaders.tokenizer.load_tokenizer:295] [PID:24741] No Chat template selected. Consider adding a chat template for easier inference.
185
+ [2025-10-10 13:10:57,215] [DEBUG] [axolotl.train.setup_model_and_tokenizer:74] [PID:24741] Loading model
186
+ [2025-10-10 13:10:57,333] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:24741] Patched Trainer.evaluation_loop with nanmean loss calculation
187
+ [2025-10-10 13:10:57,334] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:24741] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
188
+ [2025-10-10 13:10:57,335] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:301] [PID:24741] Applying multipack dataloader patch for sample packing...
189
+ [2025-10-10 13:11:27,461] [WARNING] [axolotl.loaders.model._adjust_model_config:273] [PID:24741] increasing model.config.max_position_embeddings from 1024 to 2048
190
+ [2025-10-10 13:11:27,467] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:863] [PID:24741] converting PEFT model w/ prepare_model_for_kbit_training
191
+ [2025-10-10 13:11:27,479] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:345] [PID:24741] Converting modules to torch.float16
192
+ [2025-10-10 13:11:27,481] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:24741] Memory usage after model load 0.849GB (+0.849GB allocated, +0.918GB reserved)
193
+ [2025-10-10 13:11:27,482] [INFO] [axolotl.loaders.adapter.load_lora:80] [PID:24741] found linear modules: ['c_attn', 'c_fc', 'c_proj']
194
+ trainable params: 23,592,960 || all params: 797,623,040 || trainable%: 2.9579
195
+ [2025-10-10 13:11:27,888] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:24741] after adapters 0.618GB (+0.618GB allocated, +1.012GB reserved)
196
+ [2025-10-10 13:11:39,738] [INFO] [axolotl.train.save_initial_configs:398] [PID:24741] Pre-saving adapter config to ./qlora-out...
197
+ [2025-10-10 13:11:39,738] [INFO] [axolotl.train.save_initial_configs:402] [PID:24741] Pre-saving tokenizer to ./qlora-out...
198
+ [2025-10-10 13:11:39,828] [INFO] [axolotl.train.save_initial_configs:407] [PID:24741] Pre-saving model config to ./qlora-out...
199
+ [2025-10-10 13:11:39,836] [INFO] [axolotl.train.execute_training:196] [PID:24741] Starting trainer...
200
+ [2025-10-10 13:11:45,415] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8461699485778809
201
+ [2025-10-10 13:11:47,771] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 2.3554582595825195
202
+ [2025-10-10 13:11:49,329] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5573019981384277
203
+ [2025-10-10 13:11:51,006] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6764421463012695
204
+ [2025-10-10 13:11:51,006] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [183]
205
+
206
  0% 0/549 [00:00<?, ?it/s][2025-10-10 13:11:51,161] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
207
+ [2025-10-10 13:11:54,146] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2840440273284912
208
+ [2025-10-10 13:11:55,365] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.218794584274292
209
+ [2025-10-10 13:11:56,599] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2343621253967285
210
+ [2025-10-10 13:11:58,099] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4991233348846436
211
+ [2025-10-10 13:11:58,099] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
212
+
213
+
214
  0% 0/4 [00:00<?, ?it/s]
215
+
216
  50% 2/4 [00:00<00:00, 3.33it/s]
217
+
218
  75% 3/4 [00:01<00:00, 1.68it/s]
219
+
220
 
221
+
222
 
223
+
224
  0% 0/549 [00:14<?, ?it/s]
225
+
226
+
227
  
228
  0% 1/549 [00:21<3:16:49, 21.55s/it]
229
  0% 2/549 [00:25<1:40:35, 11.03s/it]
230
  1% 3/549 [00:28<1:09:56, 7.69s/it]
231
  1% 4/549 [00:32<55:34, 6.12s/it]
232
  1% 5/549 [00:36<47:37, 5.25s/it]
233
  1% 6/549 [00:40<42:50, 4.73s/it][2025-10-10 13:12:31,245] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
234
+ [2025-10-10 13:12:33,921] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245626449584961
235
+ [2025-10-10 13:12:35,290] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3684089183807373
236
+ [2025-10-10 13:12:37,029] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.738837718963623
237
+ [2025-10-10 13:12:38,274] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2442030906677246
238
+ [2025-10-10 13:12:38,274] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
239
+
240
+
241
  0% 0/4 [00:00<?, ?it/s]
242
+
243
  50% 2/4 [00:01<00:01, 1.90it/s]
244
+
245
  75% 3/4 [00:02<00:00, 1.36it/s]
246
+
247
 
248
+
249
 
250
+
251
  1% 6/549 [00:52<42:50, 4.73s/it]
252
+
253
+
254
  
255
  1% 7/549 [00:56<1:17:17, 8.56s/it]
256
  1% 8/549 [01:00<1:03:22, 7.03s/it]
257
  2% 9/549 [01:04<54:04, 6.01s/it]
258
  2% 10/549 [01:07<47:47, 5.32s/it]
259
  2% 11/549 [01:11<43:30, 4.85s/it]
260
  2% 12/549 [01:15<40:33, 4.53s/it][2025-10-10 13:13:06,562] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
261
+ [2025-10-10 13:13:09,050] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2255823612213135
262
+ [2025-10-10 13:13:10,291] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.240588665008545
263
+ [2025-10-10 13:13:11,581] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2900962829589844
264
+ [2025-10-10 13:13:13,335] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.753103256225586
265
+ [2025-10-10 13:13:13,335] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
266
+
267
+
268
  0% 0/4 [00:00<?, ?it/s]
269
+
270
  50% 2/4 [00:01<00:01, 1.86it/s]
271
+
272
  75% 3/4 [00:02<00:00, 1.34it/s]
273
+
274
 
275
+
276
 
277
+
278
  2% 12/549 [01:28<40:33, 4.53s/it]
279
+
280
+
281
  
282
  2% 13/549 [01:31<1:12:45, 8.14s/it]
283
  3% 14/549 [01:35<1:00:59, 6.84s/it]
284
  3% 15/549 [01:39<52:49, 5.93s/it]
285
  3% 16/549 [01:43<47:08, 5.31s/it]
286
  3% 17/549 [01:47<43:11, 4.87s/it]
287
  3% 18/549 [01:51<40:28, 4.57s/it][2025-10-10 13:13:42,268] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
288
+ [2025-10-10 13:13:44,777] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.233976125717163
289
+ [2025-10-10 13:13:45,991] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2140934467315674
290
+ [2025-10-10 13:13:47,230] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2387192249298096
291
+ [2025-10-10 13:13:48,517] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2868869304656982
292
+ [2025-10-10 13:13:48,517] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
293
+
294
+
295
  0% 0/4 [00:00<?, ?it/s]
296
+
297
  50% 2/4 [00:01<00:01, 1.83it/s]
298
+
299
  75% 3/4 [00:02<00:00, 1.32it/s]
300
+
301
 
302
+
303
 
304
+
305
  3% 18/549 [02:03<40:28, 4.57s/it]
306
+
307
+
308
  
309
  3% 19/549 [02:07<1:12:15, 8.18s/it]
310
  4% 20/549 [02:11<1:00:54, 6.91s/it]
311
  4% 21/549 [02:15<52:54, 6.01s/it]
312
  4% 22/549 [02:19<47:19, 5.39s/it]
313
  4% 23/549 [02:23<43:26, 4.96s/it]
314
  4% 24/549 [02:27<40:38, 4.64s/it][2025-10-10 13:14:18,514] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
315
+ [2025-10-10 13:14:20,985] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2237637042999268
316
+ [2025-10-10 13:14:22,224] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2387983798980713
317
+ [2025-10-10 13:14:23,447] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2227163314819336
318
+ [2025-10-10 13:14:24,682] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2346465587615967
319
+ [2025-10-10 13:14:24,682] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
320
+
321
+
322
  0% 0/4 [00:00<?, ?it/s]
323
+
324
  50% 2/4 [00:01<00:01, 1.85it/s]
325
+
326
  75% 3/4 [00:02<00:00, 1.33it/s]
327
+
328
 
329
+
330
 
331
+
332
  4% 24/549 [02:39<40:38, 4.64s/it]
333
+
334
+
335
  
336
  5% 25/549 [02:43<1:10:34, 8.08s/it]
337
  5% 26/549 [02:47<59:22, 6.81s/it]
338
  5% 27/549 [02:51<51:30, 5.92s/it]
339
  5% 28/549 [02:54<45:59, 5.30s/it]
340
  5% 29/549 [02:58<42:11, 4.87s/it]
341
  5% 30/549 [03:02<39:29, 4.57s/it][2025-10-10 13:14:53,875] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
342
+ [2025-10-10 13:14:56,364] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.229734182357788
343
+ [2025-10-10 13:14:57,586] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2214250564575195
344
+ [2025-10-10 13:14:58,806] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.219433307647705
345
+ [2025-10-10 13:15:00,136] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.329803705215454
346
+ [2025-10-10 13:15:00,136] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
347
+
348
+
349
  0% 0/4 [00:00<?, ?it/s]
350
+
351
  50% 2/4 [00:01<00:01, 1.85it/s]
352
+
353
  75% 3/4 [00:02<00:00, 1.32it/s]
354
+
355
 
356
+
357
 
358
+
359
  5% 30/549 [03:14<39:29, 4.57s/it]
360
+
361
+
362
  
363
  6% 31/549 [03:18<1:08:40, 7.95s/it]
364
  6% 32/549 [03:22<57:59, 6.73s/it]
365
  6% 33/549 [03:26<50:32, 5.88s/it]
366
  6% 34/549 [03:30<45:20, 5.28s/it]
367
  6% 35/549 [03:34<41:45, 4.87s/it]
368
  7% 36/549 [03:38<39:13, 4.59s/it][2025-10-10 13:15:29,235] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
369
+ [2025-10-10 13:15:31,722] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.212110996246338
370
+ [2025-10-10 13:15:32,954] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.231447458267212
371
+ [2025-10-10 13:15:34,205] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2508065700531006
372
+ [2025-10-10 13:15:35,438] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2330925464630127
373
+ [2025-10-10 13:15:35,439] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
374
+
375
+
376
  0% 0/4 [00:00<?, ?it/s]
377
+
378
  50% 2/4 [00:01<00:01, 1.84it/s]
379
+
380
  75% 3/4 [00:02<00:00, 1.32it/s]
381
+
382
 
383
+
384
 
385
+
386
  7% 36/549 [03:50<39:13, 4.59s/it]
387
+
388
+
389
  
390
  7% 37/549 [03:53<1:08:00, 7.97s/it]
391
  7% 38/549 [03:57<57:23, 6.74s/it]
392
  7% 39/549 [04:01<50:02, 5.89s/it]
393
  7% 40/549 [04:05<44:51, 5.29s/it]
394
  7% 41/549 [04:09<41:10, 4.86s/it]
395
  8% 42/549 [04:13<38:40, 4.58s/it][2025-10-10 13:16:04,533] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
396
+ [2025-10-10 13:16:07,240] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2179176807403564
397
+ [2025-10-10 13:16:08,479] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.239612340927124
398
+ [2025-10-10 13:16:09,694] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2147700786590576
399
+ [2025-10-10 13:16:10,944] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2497029304504395
400
+ [2025-10-10 13:16:10,945] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
401
+
402
+
403
  0% 0/4 [00:00<?, ?it/s]
404
+
405
  50% 2/4 [00:01<00:01, 1.85it/s]
406
+
407
  75% 3/4 [00:02<00:00, 1.33it/s]
408
+
409
 
410
+
411
 
412
+
413
  8% 42/549 [04:25<38:40, 4.58s/it]
414
+
415
+
416
  
417
  8% 43/549 [04:29<1:07:37, 8.02s/it]
418
  8% 44/549 [04:33<56:57, 6.77s/it]
419
  8% 45/549 [04:37<49:35, 5.90s/it]
420
  8% 46/549 [04:41<44:25, 5.30s/it]
421
  9% 47/549 [04:44<40:49, 4.88s/it]
422
  9% 48/549 [04:48<38:22, 4.60s/it][2025-10-10 13:16:40,037] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
423
+ [2025-10-10 13:16:43,053] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2188963890075684
424
+ [2025-10-10 13:16:44,293] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2395522594451904
425
+ [2025-10-10 13:16:45,513] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.21950364112854
426
+ [2025-10-10 13:16:46,763] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2497367858886719
427
+ [2025-10-10 13:16:46,763] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
428
+
429
+
430
  0% 0/4 [00:00<?, ?it/s]
431
+
432
  50% 2/4 [00:01<00:01, 1.84it/s]
433
+
434
  75% 3/4 [00:02<00:00, 1.32it/s]
435
+
436
 
437
+
438
 
439
+
440
  9% 48/549 [05:01<38:22, 4.60s/it]
441
+
442
+
443
  
444
  9% 49/549 [05:05<1:07:36, 8.11s/it]
445
  9% 50/549 [05:09<56:51, 6.84s/it]
446
  9% 51/549 [05:12<49:28, 5.96s/it]
447
  9% 52/549 [05:16<44:15, 5.34s/it]
448
  10% 53/549 [05:20<40:31, 4.90s/it]
449
  10% 54/549 [05:24<36:22, 4.41s/it][2025-10-10 13:17:15,168] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
450
+ [2025-10-10 13:17:18,275] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8323063850402832
451
+ [2025-10-10 13:17:19,609] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3336091041564941
452
+ [2025-10-10 13:17:20,827] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2181472778320312
453
+ [2025-10-10 13:17:22,034] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2064990997314453
454
+ [2025-10-10 13:17:22,034] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
455
+
456
+
457
  0% 0/4 [00:00<?, ?it/s]
458
+
459
  50% 2/4 [00:01<00:01, 1.85it/s]
460
+
461
  75% 3/4 [00:02<00:00, 1.33it/s]
462
+
463
 
464
+
465
 
466
+
467
  10% 54/549 [05:36<36:22, 4.41s/it]
468
+
469
+
470
  
471
  10% 55/549 [05:40<1:06:03, 8.02s/it]
472
  10% 56/549 [05:44<55:34, 6.76s/it]
473
  10% 57/549 [05:48<48:18, 5.89s/it]
474
  11% 58/549 [05:52<43:15, 5.29s/it]
475
  11% 59/549 [05:55<39:46, 4.87s/it]
476
  11% 60/549 [05:59<37:18, 4.58s/it][2025-10-10 13:17:50,975] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
477
+ [2025-10-10 13:17:53,534] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2939870357513428
478
+ [2025-10-10 13:17:55,286] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7515311241149902
479
+ [2025-10-10 13:17:56,595] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3087666034698486
480
+ [2025-10-10 13:17:58,392] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7966506481170654
481
+ [2025-10-10 13:17:58,392] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
482
+
483
+
484
  0% 0/4 [00:00<?, ?it/s]
485
+
486
  50% 2/4 [00:01<00:01, 1.85it/s]
487
+
488
  75% 3/4 [00:02<00:00, 1.33it/s]
489
+
490
 
491
+
492
 
493
+
494
  11% 60/549 [06:13<37:18, 4.58s/it]
495
+
496
+
497
  
498
  11% 61/549 [06:17<1:09:20, 8.52s/it]
499
  11% 62/549 [06:21<57:52, 7.13s/it]
500
  11% 63/549 [06:25<49:53, 6.16s/it]
501
  12% 64/549 [06:29<44:24, 5.49s/it]
502
  12% 65/549 [06:33<40:32, 5.03s/it]
503
  12% 66/549 [06:37<37:50, 4.70s/it][2025-10-10 13:18:28,296] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
504
+ [2025-10-10 13:18:33,461] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 2.976652145385742
505
+ [2025-10-10 13:18:35,243] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7817871570587158
506
+ [2025-10-10 13:18:36,478] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2347698211669922
507
+ [2025-10-10 13:18:37,729] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2507171630859375
508
+ [2025-10-10 13:18:37,729] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
509
+
510
+
511
  0% 0/4 [00:00<?, ?it/s]
512
+
513
  50% 2/4 [00:01<00:01, 1.80it/s]
514
+
515
  75% 3/4 [00:02<00:00, 1.31it/s]
516
+
517
 
518
+
519
 
520
+
521
  12% 66/549 [06:53<37:50, 4.70s/it]
522
+
523
+
524
  
525
  12% 67/549 [06:57<1:14:42, 9.30s/it]
526
  12% 68/549 [07:01<1:01:33, 7.68s/it]
527
  13% 69/549 [07:04<52:22, 6.55s/it]
528
  13% 70/549 [07:08<45:55, 5.75s/it]
529
  13% 71/549 [07:12<41:24, 5.20s/it]
530
  13% 72/549 [07:16<38:15, 4.81s/it][2025-10-10 13:19:07,848] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
531
+ [2025-10-10 13:19:10,905] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.743417501449585
532
+ [2025-10-10 13:19:12,274] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3681507110595703
533
+ [2025-10-10 13:19:13,507] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.232816457748413
534
+ [2025-10-10 13:19:14,734] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2264103889465332
535
+ [2025-10-10 13:19:14,734] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
536
+
537
+
538
  0% 0/4 [00:00<?, ?it/s]
539
+
540
  50% 2/4 [00:01<00:01, 1.84it/s]
541
+
542
  75% 3/4 [00:02<00:00, 1.33it/s]
543
+
544
 
545
+
546
 
547
+
548
  13% 72/549 [07:29<38:15, 4.81s/it]
549
+
550
+
551
  
552
  13% 73/549 [07:33<1:05:59, 8.32s/it]
553
  13% 74/549 [07:36<53:50, 6.80s/it]
554
  14% 75/549 [07:40<46:47, 5.92s/it]
555
  14% 76/549 [07:44<41:57, 5.32s/it]
556
  14% 77/549 [07:48<38:32, 4.90s/it]
557
  14% 78/549 [07:52<36:06, 4.60s/it][2025-10-10 13:19:43,212] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
558
+ [2025-10-10 13:19:45,690] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2276828289031982
559
+ [2025-10-10 13:19:47,405] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7151846885681152
560
+ [2025-10-10 13:19:48,875] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4696624279022217
561
+ [2025-10-10 13:19:50,118] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.24239182472229
562
+ [2025-10-10 13:19:50,118] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
563
+
564
+
565
  0% 0/4 [00:00<?, ?it/s]
566
+
567
  50% 2/4 [00:01<00:01, 1.85it/s]
568
+
569
  75% 3/4 [00:02<00:00, 1.33it/s]
570
+
571
 
572
+
573
 
574
+
575
  14% 78/549 [08:04<36:06, 4.60s/it]
576
+
577
+
578
  
579
  14% 79/549 [08:08<1:03:59, 8.17s/it]
580
  15% 80/549 [08:12<53:44, 6.88s/it]
581
  15% 81/549 [08:16<46:34, 5.97s/it]
582
  15% 82/549 [08:20<41:37, 5.35s/it]
583
  15% 83/549 [08:24<38:10, 4.91s/it]
584
  15% 84/549 [08:27<35:43, 4.61s/it][2025-10-10 13:20:19,124] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
585
+ [2025-10-10 13:20:21,612] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2227427959442139
586
+ [2025-10-10 13:20:22,917] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3052830696105957
587
+ [2025-10-10 13:20:24,696] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7787699699401855
588
+ [2025-10-10 13:20:25,947] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2503879070281982
589
+ [2025-10-10 13:20:25,947] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
590
+
591
+
592
  0% 0/4 [00:00<?, ?it/s]
593
+
594
  50% 2/4 [00:01<00:01, 1.85it/s]
595
+
596
  75% 3/4 [00:02<00:00, 1.33it/s]
597
+
598
 
599
+
600
 
601
+
602
  15% 84/549 [08:40<35:43, 4.61s/it]
603
+
604
+
605
  
606
  15% 85/549 [08:44<1:03:06, 8.16s/it]
607
  16% 86/549 [08:48<53:01, 6.87s/it]
608
  16% 87/549 [08:52<45:57, 5.97s/it]
609
  16% 88/549 [08:56<41:08, 5.35s/it]
610
  16% 89/549 [08:59<37:43, 4.92s/it]
611
  16% 90/549 [09:03<35:17, 4.61s/it][2025-10-10 13:20:55,024] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
612
+ [2025-10-10 13:20:57,527] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2476911544799805
613
+ [2025-10-10 13:20:58,765] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2383363246917725
614
+ [2025-10-10 13:21:00,176] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4102411270141602
615
+ [2025-10-10 13:21:01,923] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7466182708740234
616
+ [2025-10-10 13:21:01,923] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
617
+
618
+
619
  0% 0/4 [00:00<?, ?it/s]
620
+
621
  50% 2/4 [00:01<00:01, 1.85it/s]
622
+
623
  75% 3/4 [00:02<00:00, 1.33it/s]
624
+
625
 
626
+
627
 
628
+
629
  16% 90/549 [09:16<35:17, 4.61s/it]
630
+
631
+
632
  
633
  17% 91/549 [09:20<1:02:22, 8.17s/it]
634
  17% 92/549 [09:24<52:22, 6.88s/it]
635
  17% 93/549 [09:28<45:23, 5.97s/it]
636
  17% 94/549 [09:31<40:32, 5.35s/it]
637
  17% 95/549 [09:35<37:09, 4.91s/it]
638
  17% 96/549 [09:39<34:46, 4.61s/it][2025-10-10 13:21:30,890] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
639
+ [2025-10-10 13:21:33,421] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2245755195617676
640
+ [2025-10-10 13:21:34,670] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2479896545410156
641
+ [2025-10-10 13:21:35,919] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2494275569915771
642
+ [2025-10-10 13:21:37,555] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6355178356170654
643
+ [2025-10-10 13:21:37,555] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
644
+
645
+
646
  0% 0/4 [00:00<?, ?it/s]
647
+
648
  50% 2/4 [00:01<00:01, 1.85it/s]
649
+
650
  75% 3/4 [00:02<00:00, 1.33it/s]
651
+
652
 
653
+
654
 
655
+
656
  17% 96/549 [09:52<34:46, 4.61s/it]
657
+
658
+
659
  
660
  18% 97/549 [09:56<1:02:02, 8.24s/it]
661
  18% 98/549 [10:00<52:01, 6.92s/it]
662
  18% 99/549 [10:04<45:02, 6.00s/it]
663
  18% 100/549 [10:08<40:10, 5.37s/it]
664
 
665
+
666
  18% 100/549 [10:08<40:10, 5.37s/it]
667
  18% 101/549 [10:12<36:58, 4.95s/it]
668
  19% 102/549 [10:15<34:32, 4.64s/it][2025-10-10 13:22:07,077] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
669
+ [2025-10-10 13:22:09,610] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245088815689087
670
+ [2025-10-10 13:22:10,855] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2448465824127197
671
+ [2025-10-10 13:22:12,106] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2508001327514648
672
+ [2025-10-10 13:22:13,381] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2749567031860352
673
+ [2025-10-10 13:22:13,382] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
674
+
675
+
676
  0% 0/4 [00:00<?, ?it/s]
677
+
678
  50% 2/4 [00:01<00:01, 1.85it/s]
679
+
680
  75% 3/4 [00:02<00:00, 1.33it/s]
681
+
682
 
683
+
684
 
685
+
686
  19% 102/549 [10:28<34:32, 4.64s/it]
687
+
688
+
689
  
690
  19% 103/549 [10:32<1:00:52, 8.19s/it]
691
  19% 104/549 [10:36<51:10, 6.90s/it]
692
  19% 105/549 [10:40<44:23, 6.00s/it]
693
  19% 106/549 [10:44<39:36, 5.36s/it]
694
  19% 107/549 [10:48<36:21, 4.94s/it]
695
  20% 108/549 [10:51<34:00, 4.63s/it][2025-10-10 13:22:43,069] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
696
+ [2025-10-10 13:22:45,569] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.246150016784668
697
+ [2025-10-10 13:22:46,822] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2529652118682861
698
+ [2025-10-10 13:22:48,105] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2827684879302979
699
+ [2025-10-10 13:22:49,347] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2417457103729248
700
+ [2025-10-10 13:22:49,347] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
701
+
702
+
703
  0% 0/4 [00:00<?, ?it/s]
704
+
705
  50% 2/4 [00:01<00:01, 1.85it/s]
706
+
707
  75% 3/4 [00:02<00:00, 1.33it/s]
708
+
709
 
710
+
711
 
712
+
713
  20% 108/549 [11:04<34:00, 4.63s/it]
714
+
715
+
716
  
717
  20% 109/549 [11:08<59:10, 8.07s/it]
718
  20% 110/549 [11:11<49:49, 6.81s/it]
719
  20% 111/549 [11:15<43:16, 5.93s/it]
720
  20% 112/549 [11:19<38:43, 5.32s/it]
721
  21% 113/549 [11:23<35:29, 4.89s/it]
722
  21% 114/549 [11:27<33:16, 4.59s/it][2025-10-10 13:23:18,582] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
723
+ [2025-10-10 13:23:21,111] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2601120471954346
724
+ [2025-10-10 13:23:22,364] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2526865005493164
725
+ [2025-10-10 13:23:23,588] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.223961591720581
726
+ [2025-10-10 13:23:24,824] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2359259128570557
727
+ [2025-10-10 13:23:24,824] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
728
+
729
+
730
  0% 0/4 [00:00<?, ?it/s]
731
+
732
  50% 2/4 [00:01<00:01, 1.85it/s]
733
+
734
  75% 3/4 [00:02<00:00, 1.32it/s]
735
+
736
 
737
+
738
 
739
+
740
  21% 114/549 [11:39<33:16, 4.59s/it]
741
+
742
+
743
  
744
  21% 115/549 [11:43<57:40, 7.97s/it]
745
  21% 116/549 [11:47<48:37, 6.74s/it]
746
  21% 117/549 [11:51<42:19, 5.88s/it]
747
  21% 118/549 [11:54<37:57, 5.28s/it]
748
  22% 119/549 [11:58<34:52, 4.87s/it]
749
  22% 120/549 [12:02<32:40, 4.57s/it][2025-10-10 13:23:53,849] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
750
+ [2025-10-10 13:23:56,384] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2743051052093506
751
+ [2025-10-10 13:23:57,627] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2425775527954102
752
+ [2025-10-10 13:23:58,872] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2450978755950928
753
+ [2025-10-10 13:24:00,110] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2371137142181396
754
+ [2025-10-10 13:24:00,110] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
755
+
756
+
757
  0% 0/4 [00:00<?, ?it/s]
758
+
759
  50% 2/4 [00:01<00:01, 1.85it/s]
760
+
761
  75% 3/4 [00:02<00:00, 1.33it/s]
762
+
763
 
764
+
765
 
766
+
767
  22% 120/549 [12:14<32:40, 4.57s/it]
768
+
769
+
770
  
771
  22% 121/549 [12:18<56:50, 7.97s/it]
772
  22% 122/549 [12:22<47:53, 6.73s/it]
773
  22% 123/549 [12:26<41:44, 5.88s/it]
774
  23% 124/549 [12:30<37:24, 5.28s/it]
775
  23% 125/549 [12:34<34:22, 4.86s/it]
776
  23% 126/549 [12:38<32:18, 4.58s/it][2025-10-10 13:24:29,185] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
777
+ [2025-10-10 13:24:32,019] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2589247226715088
778
+ [2025-10-10 13:24:33,287] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2684495449066162
779
+ [2025-10-10 13:24:34,521] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2337257862091064
780
+ [2025-10-10 13:24:35,751] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2298574447631836
781
+ [2025-10-10 13:24:35,752] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
782
+
783
+
784
  0% 0/4 [00:00<?, ?it/s]
785
+
786
  50% 2/4 [00:01<00:01, 1.85it/s]
787
+
788
  75% 3/4 [00:02<00:00, 1.33it/s]
789
+
790
 
791
+
792
 
793
+
794
  23% 126/549 [12:50<32:18, 4.58s/it]
795
+
796
+
797
  
798
  23% 127/549 [12:54<56:47, 8.08s/it]
799
  23% 128/549 [12:58<47:47, 6.81s/it]
800
  23% 129/549 [13:02<41:35, 5.94s/it]
801
  24% 130/549 [13:05<37:15, 5.33s/it]
802
  24% 131/549 [13:09<34:11, 4.91s/it]
803
  24% 132/549 [13:13<32:04, 4.61s/it][2025-10-10 13:25:04,943] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
804
+ [2025-10-10 13:25:08,050] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2962446212768555
805
+ [2025-10-10 13:25:09,299] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2489659786224365
806
+ [2025-10-10 13:25:10,529] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2295067310333252
807
+ [2025-10-10 13:25:11,775] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2457163333892822
808
+ [2025-10-10 13:25:11,775] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
809
+
810
+
811
  0% 0/4 [00:00<?, ?it/s]
812
+
813
  50% 2/4 [00:01<00:01, 1.85it/s]
814
+
815
  75% 3/4 [00:02<00:00, 1.33it/s]
816
+
817
 
818
+
819
 
820
+
821
  24% 132/549 [13:26<32:04, 4.61s/it]
822
+
823
+
824
  
825
  24% 133/549 [13:30<56:33, 8.16s/it]
826
  24% 134/549 [13:34<47:29, 6.87s/it]
827
  25% 135/549 [13:37<41:08, 5.96s/it]
828
  25% 136/549 [13:41<36:43, 5.34s/it]
829
  25% 137/549 [13:45<33:38, 4.90s/it]
830
  25% 138/549 [13:49<31:27, 4.59s/it][2025-10-10 13:25:40,704] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
831
+ [2025-10-10 13:25:43,878] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.726452112197876
832
+ [2025-10-10 13:25:45,124] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245727777481079
833
+ [2025-10-10 13:25:46,372] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2481646537780762
834
+ [2025-10-10 13:25:47,600] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2278366088867188
835
+ [2025-10-10 13:25:47,601] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
836
+
837
+
838
  0% 0/4 [00:00<?, ?it/s]
839
+
840
  50% 2/4 [00:01<00:01, 1.85it/s]
841
+
842
  75% 3/4 [00:02<00:00, 1.33it/s]
843
+
844
 
845
+
846
 
847
+
848
  25% 138/549 [14:02<31:27, 4.59s/it]
849
+
850
+
851
  
852
  25% 139/549 [14:06<55:44, 8.16s/it]
853
  26% 140/549 [14:09<46:48, 6.87s/it]
854
  26% 141/549 [14:13<40:35, 5.97s/it]
855
  26% 142/549 [14:17<36:14, 5.34s/it]
856
  26% 143/549 [14:21<33:13, 4.91s/it]
857
  26% 144/549 [14:25<31:06, 4.61s/it][2025-10-10 13:26:16,597] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
858
+ [2025-10-10 13:26:19,460] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5904531478881836
859
+ [2025-10-10 13:26:21,049] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5887093544006348
860
+ [2025-10-10 13:26:22,299] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2505967617034912
861
+ [2025-10-10 13:26:23,529] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.229478120803833
862
+ [2025-10-10 13:26:23,529] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
863
+
864
+
865
  0% 0/4 [00:00<?, ?it/s]
866
+
867
  50% 2/4 [00:01<00:01, 1.85it/s]
868
+
869
  75% 3/4 [00:02<00:00, 1.33it/s]
870
+
871
 
872
+
873
 
874
+
875
  26% 144/549 [14:38<31:06, 4.61s/it]
876
+
877
+
878
  
879
  26% 145/549 [14:41<55:08, 8.19s/it]
880
  27% 146/549 [14:45<46:16, 6.89s/it]
881
  27% 147/549 [14:49<40:05, 5.98s/it]
882
  27% 148/549 [14:53<35:50, 5.36s/it]
883
  27% 149/549 [14:57<32:50, 4.93s/it]
884
  27% 150/549 [15:01<30:40, 4.61s/it][2025-10-10 13:26:52,575] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
885
+ [2025-10-10 13:26:55,133] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.290666103363037
886
+ [2025-10-10 13:26:56,896] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7622182369232178
887
+ [2025-10-10 13:26:58,238] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3421812057495117
888
+ [2025-10-10 13:26:59,467] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2286624908447266
889
+ [2025-10-10 13:26:59,467] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
890
+
891
+
892
  0% 0/4 [00:00<?, ?it/s]
893
+
894
  50% 2/4 [00:01<00:01, 1.85it/s]
895
+
896
  75% 3/4 [00:02<00:00, 1.33it/s]
897
+
898
 
899
+
900
 
901
+
902
  27% 150/549 [15:14<30:40, 4.61s/it]
903
+
904
+
905
  
906
  28% 151/549 [15:17<54:17, 8.18s/it]
907
  28% 152/549 [15:21<45:33, 6.89s/it]
908
  28% 153/549 [15:25<39:27, 5.98s/it]
909
  28% 154/549 [15:29<35:14, 5.35s/it]
910
  28% 155/549 [15:33<32:17, 4.92s/it]
911
  28% 156/549 [15:36<28:59, 4.43s/it][2025-10-10 13:27:27,880] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
912
+ [2025-10-10 13:27:30,445] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2810289859771729
913
+ [2025-10-10 13:27:31,672] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2266299724578857
914
+ [2025-10-10 13:27:33,392] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7197673320770264
915
+ [2025-10-10 13:27:34,889] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4970765113830566
916
+ [2025-10-10 13:27:34,889] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
917
+
918
+
919
  0% 0/4 [00:00<?, ?it/s]
920
+
921
  50% 2/4 [00:01<00:01, 1.85it/s]
922
+
923
  75% 3/4 [00:02<00:00, 1.33it/s]
924
+
925
 
926
+
927
 
928
+
929
  28% 156/549 [15:49<28:59, 4.43s/it]
930
+
931
+
932
  
933
  29% 157/549 [15:53<52:50, 8.09s/it]
934
  29% 158/549 [15:57<44:26, 6.82s/it]
935
  29% 159/549 [16:01<38:37, 5.94s/it]
936
  29% 160/549 [16:04<34:31, 5.33s/it]
937
  29% 161/549 [16:08<31:41, 4.90s/it]
938
  30% 162/549 [16:12<29:40, 4.60s/it][2025-10-10 13:28:03,961] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
939
+ [2025-10-10 13:28:06,582] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2377257347106934
940
+ [2025-10-10 13:28:07,822] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2394213676452637
941
+ [2025-10-10 13:28:09,276] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4542319774627686
942
+ [2025-10-10 13:28:10,984] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7078406810760498
943
+ [2025-10-10 13:28:10,984] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
944
+
945
+
946
  0% 0/4 [00:00<?, ?it/s]
947
+
948
  50% 2/4 [00:01<00:01, 1.85it/s]
949
+
950
  75% 3/4 [00:02<00:00, 1.33it/s]
951
+
952
 
953
+
954
 
955
+
956
  30% 162/549 [16:25<29:40, 4.60s/it]
957
+
958
+
959
  
960
  30% 163/549 [16:29<52:53, 8.22s/it]
961
  30% 164/549 [16:33<44:20, 6.91s/it]
962
  30% 165/549 [16:37<38:21, 5.99s/it]
963
  30% 166/549 [16:41<34:12, 5.36s/it]
964
  30% 167/549 [16:44<31:21, 4.92s/it]
965
  31% 168/549 [16:48<29:18, 4.62s/it][2025-10-10 13:28:40,029] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
966
+ [2025-10-10 13:28:42,579] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2564804553985596
967
+ [2025-10-10 13:28:43,831] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2514879703521729
968
+ [2025-10-10 13:28:45,090] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2588951587677002
969
+ [2025-10-10 13:28:46,762] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6718604564666748
970
+ [2025-10-10 13:28:46,762] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
971
+
972
+
973
  0% 0/4 [00:00<?, ?it/s]
974
+
975
  50% 2/4 [00:01<00:01, 1.85it/s]
976
+
977
  75% 3/4 [00:02<00:00, 1.33it/s]
978
+
979
 
980
+
981
 
982
+
983
  31% 168/549 [17:01<29:18, 4.62s/it]
984
+
985
+
986
  
987
  31% 169/549 [17:05<52:01, 8.21s/it]
988
  31% 170/549 [17:09<43:37, 6.91s/it]
989
  31% 171/549 [17:13<37:46, 6.00s/it]
990
  31% 172/549 [17:17<33:43, 5.37s/it]
991
  32% 173/549 [17:21<30:52, 4.93s/it]
992
  32% 174/549 [17:24<28:52, 4.62s/it][2025-10-10 13:29:16,068] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
993
+ [2025-10-10 13:29:18,702] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3219468593597412
994
+ [2025-10-10 13:29:20,003] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.301271915435791
995
+ [2025-10-10 13:29:21,280] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2761008739471436
996
+ [2025-10-10 13:29:22,700] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4196476936340332
997
+ [2025-10-10 13:29:22,700] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
998
+
999
+
1000
  0% 0/4 [00:00<?, ?it/s]
1001
+
1002
  50% 2/4 [00:01<00:01, 1.85it/s]
1003
+
1004
  75% 3/4 [00:02<00:00, 1.33it/s]
1005
+
1006
 
1007
+
1008
 
1009
+
1010
  32% 174/549 [17:37<28:52, 4.62s/it]
1011
+
1012
+
1013
  
1014
  32% 175/549 [17:41<51:37, 8.28s/it]
1015
  32% 176/549 [17:45<43:14, 6.96s/it]
1016
  32% 177/549 [17:49<37:23, 6.03s/it]
1017
  32% 178/549 [17:53<33:19, 5.39s/it]
1018
  33% 179/549 [17:57<30:29, 4.94s/it]
1019
  33% 180/549 [18:01<28:28, 4.63s/it][2025-10-10 13:29:52,329] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
1020
+ [2025-10-10 13:29:54,842] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2502317428588867
1021
+ [2025-10-10 13:29:56,106] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2638275623321533
1022
+ [2025-10-10 13:29:57,362] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.255711317062378
1023
+ [2025-10-10 13:29:58,622] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.260202169418335
1024
+ [2025-10-10 13:29:58,622] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
1025
+
1026
+
1027
  0% 0/4 [00:00<?, ?it/s]
1028
+
1029
  50% 2/4 [00:01<00:01, 1.85it/s]
1030
+
1031
  75% 3/4 [00:02<00:00, 1.33it/s]
1032
+
1033
 
1034
+
1035
 
1036
+
1037
  33% 180/549 [18:13<28:28, 4.63s/it]
1038
+
1039
+
1040
  
1041
  33% 181/549 [18:17<49:55, 8.14s/it]
1042
  33% 182/549 [18:21<41:59, 6.86s/it]
1043
  33% 183/549 [18:25<36:37, 6.00s/it][2025-10-10 13:30:16,541] [INFO] [axolotl.core.trainers.base._save:671] [PID:24741] Saving model checkpoint to ./qlora-out/checkpoint-183
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "<|endoftext|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<|endoftext|>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ }
12
+ },
13
+ "bos_token": "<|endoftext|>",
14
+ "clean_up_tokenization_spaces": false,
15
+ "eos_token": "<|endoftext|>",
16
+ "extra_special_tokens": {},
17
+ "model_max_length": 1000000000000000019884624838656,
18
+ "pad_token": "<|endoftext|>",
19
+ "tokenizer_class": "GPT2Tokenizer",
20
+ "unk_token": "<|endoftext|>"
21
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15872f81ba0df2910609edb27c17b1b5641f9f269d4eba7fe013ab826ef0c746
3
+ size 7313
vocab.json ADDED
The diff for this file is too large to render. See raw diff