Training in progress, epoch 1
Browse files- adapter_config.json +38 -0
- adapter_model.safetensors +3 -0
- config.json +53 -0
- debug.log +640 -0
- merges.txt +0 -0
- special_tokens_map.json +30 -0
- tokenizer.json +0 -0
- tokenizer_config.json +21 -0
- training_args.bin +3 -0
- vocab.json +0 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"alpha_pattern": {},
|
| 3 |
+
"auto_mapping": null,
|
| 4 |
+
"base_model_name_or_path": "nferruz/ProtGPT2",
|
| 5 |
+
"bias": "none",
|
| 6 |
+
"corda_config": null,
|
| 7 |
+
"eva_config": null,
|
| 8 |
+
"exclude_modules": null,
|
| 9 |
+
"fan_in_fan_out": null,
|
| 10 |
+
"inference_mode": true,
|
| 11 |
+
"init_lora_weights": true,
|
| 12 |
+
"layer_replication": null,
|
| 13 |
+
"layers_pattern": null,
|
| 14 |
+
"layers_to_transform": null,
|
| 15 |
+
"loftq_config": {},
|
| 16 |
+
"lora_alpha": 16,
|
| 17 |
+
"lora_bias": false,
|
| 18 |
+
"lora_dropout": 0.05,
|
| 19 |
+
"megatron_config": null,
|
| 20 |
+
"megatron_core": "megatron.core",
|
| 21 |
+
"modules_to_save": null,
|
| 22 |
+
"peft_type": "LORA",
|
| 23 |
+
"qalora_group_size": 16,
|
| 24 |
+
"r": 32,
|
| 25 |
+
"rank_pattern": {},
|
| 26 |
+
"revision": null,
|
| 27 |
+
"target_modules": [
|
| 28 |
+
"c_attn",
|
| 29 |
+
"c_proj",
|
| 30 |
+
"c_fc"
|
| 31 |
+
],
|
| 32 |
+
"target_parameters": [],
|
| 33 |
+
"task_type": "CAUSAL_LM",
|
| 34 |
+
"trainable_token_indices": null,
|
| 35 |
+
"use_dora": false,
|
| 36 |
+
"use_qalora": false,
|
| 37 |
+
"use_rslora": false
|
| 38 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab1a9a92ffff873459d1f10e2eb9bc52bee197b09bdcbc4b89dd4e69d5cb082
|
| 3 |
+
size 94409168
|
config.json
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"activation_function": "gelu_new",
|
| 3 |
+
"architectures": [
|
| 4 |
+
"GPT2LMHeadModel"
|
| 5 |
+
],
|
| 6 |
+
"attn_pdrop": 0.1,
|
| 7 |
+
"bos_token_id": 0,
|
| 8 |
+
"dtype": "float16",
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 0,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 1280,
|
| 16 |
+
"n_head": 20,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 36,
|
| 19 |
+
"n_positions": 2048,
|
| 20 |
+
"quantization_config": {
|
| 21 |
+
"_load_in_4bit": true,
|
| 22 |
+
"_load_in_8bit": false,
|
| 23 |
+
"bnb_4bit_compute_dtype": "float16",
|
| 24 |
+
"bnb_4bit_quant_storage": "bfloat16",
|
| 25 |
+
"bnb_4bit_quant_type": "nf4",
|
| 26 |
+
"bnb_4bit_use_double_quant": true,
|
| 27 |
+
"llm_int8_enable_fp32_cpu_offload": false,
|
| 28 |
+
"llm_int8_has_fp16_weight": false,
|
| 29 |
+
"llm_int8_skip_modules": null,
|
| 30 |
+
"llm_int8_threshold": 6.0,
|
| 31 |
+
"load_in_4bit": true,
|
| 32 |
+
"load_in_8bit": false,
|
| 33 |
+
"quant_method": "bitsandbytes"
|
| 34 |
+
},
|
| 35 |
+
"reorder_and_upcast_attn": false,
|
| 36 |
+
"resid_pdrop": 0.1,
|
| 37 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 38 |
+
"scale_attn_weights": true,
|
| 39 |
+
"summary_activation": null,
|
| 40 |
+
"summary_first_dropout": 0.1,
|
| 41 |
+
"summary_proj_to_labels": true,
|
| 42 |
+
"summary_type": "cls_index",
|
| 43 |
+
"summary_use_proj": true,
|
| 44 |
+
"task_specific_params": {
|
| 45 |
+
"text-generation": {
|
| 46 |
+
"do_sample": true,
|
| 47 |
+
"max_length": 50
|
| 48 |
+
}
|
| 49 |
+
},
|
| 50 |
+
"transformers_version": "4.57.0",
|
| 51 |
+
"use_cache": false,
|
| 52 |
+
"vocab_size": 50257
|
| 53 |
+
}
|
debug.log
ADDED
|
@@ -0,0 +1,640 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 0 |
0% 0/549 [00:00<?, ?it/s][2025-10-10 13:11:51,161] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 2 |
50% 2/4 [00:00<00:00, 3.33it/s][A
|
|
|
|
| 3 |
75% 3/4 [00:01<00:00, 1.68it/s][A
|
|
|
|
| 4 |
|
|
|
|
| 5 |
|
|
|
|
| 6 |
0% 0/549 [00:14<?, ?it/s]
|
|
|
|
|
|
|
| 7 |
[A
|
| 8 |
0% 1/549 [00:21<3:16:49, 21.55s/it]
|
| 9 |
0% 2/549 [00:25<1:40:35, 11.03s/it]
|
| 10 |
1% 3/549 [00:28<1:09:56, 7.69s/it]
|
| 11 |
1% 4/549 [00:32<55:34, 6.12s/it]
|
| 12 |
1% 5/549 [00:36<47:37, 5.25s/it]
|
| 13 |
1% 6/549 [00:40<42:50, 4.73s/it][2025-10-10 13:12:31,245] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 15 |
50% 2/4 [00:01<00:01, 1.90it/s][A
|
|
|
|
| 16 |
75% 3/4 [00:02<00:00, 1.36it/s][A
|
|
|
|
| 17 |
|
|
|
|
| 18 |
|
|
|
|
| 19 |
1% 6/549 [00:52<42:50, 4.73s/it]
|
|
|
|
|
|
|
| 20 |
[A
|
| 21 |
1% 7/549 [00:56<1:17:17, 8.56s/it]
|
| 22 |
1% 8/549 [01:00<1:03:22, 7.03s/it]
|
| 23 |
2% 9/549 [01:04<54:04, 6.01s/it]
|
| 24 |
2% 10/549 [01:07<47:47, 5.32s/it]
|
| 25 |
2% 11/549 [01:11<43:30, 4.85s/it]
|
| 26 |
2% 12/549 [01:15<40:33, 4.53s/it][2025-10-10 13:13:06,562] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 28 |
50% 2/4 [00:01<00:01, 1.86it/s][A
|
|
|
|
| 29 |
75% 3/4 [00:02<00:00, 1.34it/s][A
|
|
|
|
| 30 |
|
|
|
|
| 31 |
|
|
|
|
| 32 |
2% 12/549 [01:28<40:33, 4.53s/it]
|
|
|
|
|
|
|
| 33 |
[A
|
| 34 |
2% 13/549 [01:31<1:12:45, 8.14s/it]
|
| 35 |
3% 14/549 [01:35<1:00:59, 6.84s/it]
|
| 36 |
3% 15/549 [01:39<52:49, 5.93s/it]
|
| 37 |
3% 16/549 [01:43<47:08, 5.31s/it]
|
| 38 |
3% 17/549 [01:47<43:11, 4.87s/it]
|
| 39 |
3% 18/549 [01:51<40:28, 4.57s/it][2025-10-10 13:13:42,268] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 41 |
50% 2/4 [00:01<00:01, 1.83it/s][A
|
|
|
|
| 42 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
|
|
|
| 43 |
|
|
|
|
| 44 |
|
|
|
|
| 45 |
3% 18/549 [02:03<40:28, 4.57s/it]
|
|
|
|
|
|
|
| 46 |
[A
|
| 47 |
3% 19/549 [02:07<1:12:15, 8.18s/it]
|
| 48 |
4% 20/549 [02:11<1:00:54, 6.91s/it]
|
| 49 |
4% 21/549 [02:15<52:54, 6.01s/it]
|
| 50 |
4% 22/549 [02:19<47:19, 5.39s/it]
|
| 51 |
4% 23/549 [02:23<43:26, 4.96s/it]
|
| 52 |
4% 24/549 [02:27<40:38, 4.64s/it][2025-10-10 13:14:18,514] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 54 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 55 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 56 |
|
|
|
|
| 57 |
|
|
|
|
| 58 |
4% 24/549 [02:39<40:38, 4.64s/it]
|
|
|
|
|
|
|
| 59 |
[A
|
| 60 |
5% 25/549 [02:43<1:10:34, 8.08s/it]
|
| 61 |
5% 26/549 [02:47<59:22, 6.81s/it]
|
| 62 |
5% 27/549 [02:51<51:30, 5.92s/it]
|
| 63 |
5% 28/549 [02:54<45:59, 5.30s/it]
|
| 64 |
5% 29/549 [02:58<42:11, 4.87s/it]
|
| 65 |
5% 30/549 [03:02<39:29, 4.57s/it][2025-10-10 13:14:53,875] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 67 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 68 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
|
|
|
| 69 |
|
|
|
|
| 70 |
|
|
|
|
| 71 |
5% 30/549 [03:14<39:29, 4.57s/it]
|
|
|
|
|
|
|
| 72 |
[A
|
| 73 |
6% 31/549 [03:18<1:08:40, 7.95s/it]
|
| 74 |
6% 32/549 [03:22<57:59, 6.73s/it]
|
| 75 |
6% 33/549 [03:26<50:32, 5.88s/it]
|
| 76 |
6% 34/549 [03:30<45:20, 5.28s/it]
|
| 77 |
6% 35/549 [03:34<41:45, 4.87s/it]
|
| 78 |
7% 36/549 [03:38<39:13, 4.59s/it][2025-10-10 13:15:29,235] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 80 |
50% 2/4 [00:01<00:01, 1.84it/s][A
|
|
|
|
| 81 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
|
|
|
| 82 |
|
|
|
|
| 83 |
|
|
|
|
| 84 |
7% 36/549 [03:50<39:13, 4.59s/it]
|
|
|
|
|
|
|
| 85 |
[A
|
| 86 |
7% 37/549 [03:53<1:08:00, 7.97s/it]
|
| 87 |
7% 38/549 [03:57<57:23, 6.74s/it]
|
| 88 |
7% 39/549 [04:01<50:02, 5.89s/it]
|
| 89 |
7% 40/549 [04:05<44:51, 5.29s/it]
|
| 90 |
7% 41/549 [04:09<41:10, 4.86s/it]
|
| 91 |
8% 42/549 [04:13<38:40, 4.58s/it][2025-10-10 13:16:04,533] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 93 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 94 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 95 |
|
|
|
|
| 96 |
|
|
|
|
| 97 |
8% 42/549 [04:25<38:40, 4.58s/it]
|
|
|
|
|
|
|
| 98 |
[A
|
| 99 |
8% 43/549 [04:29<1:07:37, 8.02s/it]
|
| 100 |
8% 44/549 [04:33<56:57, 6.77s/it]
|
| 101 |
8% 45/549 [04:37<49:35, 5.90s/it]
|
| 102 |
8% 46/549 [04:41<44:25, 5.30s/it]
|
| 103 |
9% 47/549 [04:44<40:49, 4.88s/it]
|
| 104 |
9% 48/549 [04:48<38:22, 4.60s/it][2025-10-10 13:16:40,037] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 106 |
50% 2/4 [00:01<00:01, 1.84it/s][A
|
|
|
|
| 107 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
|
|
|
| 108 |
|
|
|
|
| 109 |
|
|
|
|
| 110 |
9% 48/549 [05:01<38:22, 4.60s/it]
|
|
|
|
|
|
|
| 111 |
[A
|
| 112 |
9% 49/549 [05:05<1:07:36, 8.11s/it]
|
| 113 |
9% 50/549 [05:09<56:51, 6.84s/it]
|
| 114 |
9% 51/549 [05:12<49:28, 5.96s/it]
|
| 115 |
9% 52/549 [05:16<44:15, 5.34s/it]
|
| 116 |
10% 53/549 [05:20<40:31, 4.90s/it]
|
| 117 |
10% 54/549 [05:24<36:22, 4.41s/it][2025-10-10 13:17:15,168] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 119 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 120 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 121 |
|
|
|
|
| 122 |
|
|
|
|
| 123 |
10% 54/549 [05:36<36:22, 4.41s/it]
|
|
|
|
|
|
|
| 124 |
[A
|
| 125 |
10% 55/549 [05:40<1:06:03, 8.02s/it]
|
| 126 |
10% 56/549 [05:44<55:34, 6.76s/it]
|
| 127 |
10% 57/549 [05:48<48:18, 5.89s/it]
|
| 128 |
11% 58/549 [05:52<43:15, 5.29s/it]
|
| 129 |
11% 59/549 [05:55<39:46, 4.87s/it]
|
| 130 |
11% 60/549 [05:59<37:18, 4.58s/it][2025-10-10 13:17:50,975] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 132 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 133 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 134 |
|
|
|
|
| 135 |
|
|
|
|
| 136 |
11% 60/549 [06:13<37:18, 4.58s/it]
|
|
|
|
|
|
|
| 137 |
[A
|
| 138 |
11% 61/549 [06:17<1:09:20, 8.52s/it]
|
| 139 |
11% 62/549 [06:21<57:52, 7.13s/it]
|
| 140 |
11% 63/549 [06:25<49:53, 6.16s/it]
|
| 141 |
12% 64/549 [06:29<44:24, 5.49s/it]
|
| 142 |
12% 65/549 [06:33<40:32, 5.03s/it]
|
| 143 |
12% 66/549 [06:37<37:50, 4.70s/it][2025-10-10 13:18:28,296] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 145 |
50% 2/4 [00:01<00:01, 1.80it/s][A
|
|
|
|
| 146 |
75% 3/4 [00:02<00:00, 1.31it/s][A
|
|
|
|
| 147 |
|
|
|
|
| 148 |
|
|
|
|
| 149 |
12% 66/549 [06:53<37:50, 4.70s/it]
|
|
|
|
|
|
|
| 150 |
[A
|
| 151 |
12% 67/549 [06:57<1:14:42, 9.30s/it]
|
| 152 |
12% 68/549 [07:01<1:01:33, 7.68s/it]
|
| 153 |
13% 69/549 [07:04<52:22, 6.55s/it]
|
| 154 |
13% 70/549 [07:08<45:55, 5.75s/it]
|
| 155 |
13% 71/549 [07:12<41:24, 5.20s/it]
|
| 156 |
13% 72/549 [07:16<38:15, 4.81s/it][2025-10-10 13:19:07,848] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 158 |
50% 2/4 [00:01<00:01, 1.84it/s][A
|
|
|
|
| 159 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 160 |
|
|
|
|
| 161 |
|
|
|
|
| 162 |
13% 72/549 [07:29<38:15, 4.81s/it]
|
|
|
|
|
|
|
| 163 |
[A
|
| 164 |
13% 73/549 [07:33<1:05:59, 8.32s/it]
|
| 165 |
13% 74/549 [07:36<53:50, 6.80s/it]
|
| 166 |
14% 75/549 [07:40<46:47, 5.92s/it]
|
| 167 |
14% 76/549 [07:44<41:57, 5.32s/it]
|
| 168 |
14% 77/549 [07:48<38:32, 4.90s/it]
|
| 169 |
14% 78/549 [07:52<36:06, 4.60s/it][2025-10-10 13:19:43,212] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 170 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 171 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 172 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 173 |
|
|
|
|
| 174 |
|
|
|
|
| 175 |
14% 78/549 [08:04<36:06, 4.60s/it]
|
|
|
|
|
|
|
| 176 |
[A
|
| 177 |
14% 79/549 [08:08<1:03:59, 8.17s/it]
|
| 178 |
15% 80/549 [08:12<53:44, 6.88s/it]
|
| 179 |
15% 81/549 [08:16<46:34, 5.97s/it]
|
| 180 |
15% 82/549 [08:20<41:37, 5.35s/it]
|
| 181 |
15% 83/549 [08:24<38:10, 4.91s/it]
|
| 182 |
15% 84/549 [08:27<35:43, 4.61s/it][2025-10-10 13:20:19,124] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 184 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 185 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 186 |
|
|
|
|
| 187 |
|
|
|
|
| 188 |
15% 84/549 [08:40<35:43, 4.61s/it]
|
|
|
|
|
|
|
| 189 |
[A
|
| 190 |
15% 85/549 [08:44<1:03:06, 8.16s/it]
|
| 191 |
16% 86/549 [08:48<53:01, 6.87s/it]
|
| 192 |
16% 87/549 [08:52<45:57, 5.97s/it]
|
| 193 |
16% 88/549 [08:56<41:08, 5.35s/it]
|
| 194 |
16% 89/549 [08:59<37:43, 4.92s/it]
|
| 195 |
16% 90/549 [09:03<35:17, 4.61s/it][2025-10-10 13:20:55,024] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 197 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 198 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 199 |
|
|
|
|
| 200 |
|
|
|
|
| 201 |
16% 90/549 [09:16<35:17, 4.61s/it]
|
|
|
|
|
|
|
| 202 |
[A
|
| 203 |
17% 91/549 [09:20<1:02:22, 8.17s/it]
|
| 204 |
17% 92/549 [09:24<52:22, 6.88s/it]
|
| 205 |
17% 93/549 [09:28<45:23, 5.97s/it]
|
| 206 |
17% 94/549 [09:31<40:32, 5.35s/it]
|
| 207 |
17% 95/549 [09:35<37:09, 4.91s/it]
|
| 208 |
17% 96/549 [09:39<34:46, 4.61s/it][2025-10-10 13:21:30,890] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 210 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 211 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 212 |
|
|
|
|
| 213 |
|
|
|
|
| 214 |
17% 96/549 [09:52<34:46, 4.61s/it]
|
|
|
|
|
|
|
| 215 |
[A
|
| 216 |
18% 97/549 [09:56<1:02:02, 8.24s/it]
|
| 217 |
18% 98/549 [10:00<52:01, 6.92s/it]
|
| 218 |
18% 99/549 [10:04<45:02, 6.00s/it]
|
| 219 |
18% 100/549 [10:08<40:10, 5.37s/it]
|
| 220 |
|
|
|
|
| 221 |
18% 100/549 [10:08<40:10, 5.37s/it]
|
| 222 |
18% 101/549 [10:12<36:58, 4.95s/it]
|
| 223 |
19% 102/549 [10:15<34:32, 4.64s/it][2025-10-10 13:22:07,077] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 225 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 226 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 227 |
|
|
|
|
| 228 |
|
|
|
|
| 229 |
19% 102/549 [10:28<34:32, 4.64s/it]
|
|
|
|
|
|
|
| 230 |
[A
|
| 231 |
19% 103/549 [10:32<1:00:52, 8.19s/it]
|
| 232 |
19% 104/549 [10:36<51:10, 6.90s/it]
|
| 233 |
19% 105/549 [10:40<44:23, 6.00s/it]
|
| 234 |
19% 106/549 [10:44<39:36, 5.36s/it]
|
| 235 |
19% 107/549 [10:48<36:21, 4.94s/it]
|
| 236 |
20% 108/549 [10:51<34:00, 4.63s/it][2025-10-10 13:22:43,069] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 238 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 239 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 240 |
|
|
|
|
| 241 |
|
|
|
|
| 242 |
20% 108/549 [11:04<34:00, 4.63s/it]
|
|
|
|
|
|
|
| 243 |
[A
|
| 244 |
20% 109/549 [11:08<59:10, 8.07s/it]
|
| 245 |
20% 110/549 [11:11<49:49, 6.81s/it]
|
| 246 |
20% 111/549 [11:15<43:16, 5.93s/it]
|
| 247 |
20% 112/549 [11:19<38:43, 5.32s/it]
|
| 248 |
21% 113/549 [11:23<35:29, 4.89s/it]
|
| 249 |
21% 114/549 [11:27<33:16, 4.59s/it][2025-10-10 13:23:18,582] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 251 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 252 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
|
|
|
| 253 |
|
|
|
|
| 254 |
|
|
|
|
| 255 |
21% 114/549 [11:39<33:16, 4.59s/it]
|
|
|
|
|
|
|
| 256 |
[A
|
| 257 |
21% 115/549 [11:43<57:40, 7.97s/it]
|
| 258 |
21% 116/549 [11:47<48:37, 6.74s/it]
|
| 259 |
21% 117/549 [11:51<42:19, 5.88s/it]
|
| 260 |
21% 118/549 [11:54<37:57, 5.28s/it]
|
| 261 |
22% 119/549 [11:58<34:52, 4.87s/it]
|
| 262 |
22% 120/549 [12:02<32:40, 4.57s/it][2025-10-10 13:23:53,849] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 263 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 264 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 265 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 266 |
|
|
|
|
| 267 |
|
|
|
|
| 268 |
22% 120/549 [12:14<32:40, 4.57s/it]
|
|
|
|
|
|
|
| 269 |
[A
|
| 270 |
22% 121/549 [12:18<56:50, 7.97s/it]
|
| 271 |
22% 122/549 [12:22<47:53, 6.73s/it]
|
| 272 |
22% 123/549 [12:26<41:44, 5.88s/it]
|
| 273 |
23% 124/549 [12:30<37:24, 5.28s/it]
|
| 274 |
23% 125/549 [12:34<34:22, 4.86s/it]
|
| 275 |
23% 126/549 [12:38<32:18, 4.58s/it][2025-10-10 13:24:29,185] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 277 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 278 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 279 |
|
|
|
|
| 280 |
|
|
|
|
| 281 |
23% 126/549 [12:50<32:18, 4.58s/it]
|
|
|
|
|
|
|
| 282 |
[A
|
| 283 |
23% 127/549 [12:54<56:47, 8.08s/it]
|
| 284 |
23% 128/549 [12:58<47:47, 6.81s/it]
|
| 285 |
23% 129/549 [13:02<41:35, 5.94s/it]
|
| 286 |
24% 130/549 [13:05<37:15, 5.33s/it]
|
| 287 |
24% 131/549 [13:09<34:11, 4.91s/it]
|
| 288 |
24% 132/549 [13:13<32:04, 4.61s/it][2025-10-10 13:25:04,943] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 290 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 291 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 292 |
|
|
|
|
| 293 |
|
|
|
|
| 294 |
24% 132/549 [13:26<32:04, 4.61s/it]
|
|
|
|
|
|
|
| 295 |
[A
|
| 296 |
24% 133/549 [13:30<56:33, 8.16s/it]
|
| 297 |
24% 134/549 [13:34<47:29, 6.87s/it]
|
| 298 |
25% 135/549 [13:37<41:08, 5.96s/it]
|
| 299 |
25% 136/549 [13:41<36:43, 5.34s/it]
|
| 300 |
25% 137/549 [13:45<33:38, 4.90s/it]
|
| 301 |
25% 138/549 [13:49<31:27, 4.59s/it][2025-10-10 13:25:40,704] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 303 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 304 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 305 |
|
|
|
|
| 306 |
|
|
|
|
| 307 |
25% 138/549 [14:02<31:27, 4.59s/it]
|
|
|
|
|
|
|
| 308 |
[A
|
| 309 |
25% 139/549 [14:06<55:44, 8.16s/it]
|
| 310 |
26% 140/549 [14:09<46:48, 6.87s/it]
|
| 311 |
26% 141/549 [14:13<40:35, 5.97s/it]
|
| 312 |
26% 142/549 [14:17<36:14, 5.34s/it]
|
| 313 |
26% 143/549 [14:21<33:13, 4.91s/it]
|
| 314 |
26% 144/549 [14:25<31:06, 4.61s/it][2025-10-10 13:26:16,597] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 315 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 316 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 317 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 318 |
|
|
|
|
| 319 |
|
|
|
|
| 320 |
26% 144/549 [14:38<31:06, 4.61s/it]
|
|
|
|
|
|
|
| 321 |
[A
|
| 322 |
26% 145/549 [14:41<55:08, 8.19s/it]
|
| 323 |
27% 146/549 [14:45<46:16, 6.89s/it]
|
| 324 |
27% 147/549 [14:49<40:05, 5.98s/it]
|
| 325 |
27% 148/549 [14:53<35:50, 5.36s/it]
|
| 326 |
27% 149/549 [14:57<32:50, 4.93s/it]
|
| 327 |
27% 150/549 [15:01<30:40, 4.61s/it][2025-10-10 13:26:52,575] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 328 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 329 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 330 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 331 |
|
|
|
|
| 332 |
|
|
|
|
| 333 |
27% 150/549 [15:14<30:40, 4.61s/it]
|
|
|
|
|
|
|
| 334 |
[A
|
| 335 |
28% 151/549 [15:17<54:17, 8.18s/it]
|
| 336 |
28% 152/549 [15:21<45:33, 6.89s/it]
|
| 337 |
28% 153/549 [15:25<39:27, 5.98s/it]
|
| 338 |
28% 154/549 [15:29<35:14, 5.35s/it]
|
| 339 |
28% 155/549 [15:33<32:17, 4.92s/it]
|
| 340 |
28% 156/549 [15:36<28:59, 4.43s/it][2025-10-10 13:27:27,880] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 341 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 342 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 343 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 344 |
|
|
|
|
| 345 |
|
|
|
|
| 346 |
28% 156/549 [15:49<28:59, 4.43s/it]
|
|
|
|
|
|
|
| 347 |
[A
|
| 348 |
29% 157/549 [15:53<52:50, 8.09s/it]
|
| 349 |
29% 158/549 [15:57<44:26, 6.82s/it]
|
| 350 |
29% 159/549 [16:01<38:37, 5.94s/it]
|
| 351 |
29% 160/549 [16:04<34:31, 5.33s/it]
|
| 352 |
29% 161/549 [16:08<31:41, 4.90s/it]
|
| 353 |
30% 162/549 [16:12<29:40, 4.60s/it][2025-10-10 13:28:03,961] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 354 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 355 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 356 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 357 |
|
|
|
|
| 358 |
|
|
|
|
| 359 |
30% 162/549 [16:25<29:40, 4.60s/it]
|
|
|
|
|
|
|
| 360 |
[A
|
| 361 |
30% 163/549 [16:29<52:53, 8.22s/it]
|
| 362 |
30% 164/549 [16:33<44:20, 6.91s/it]
|
| 363 |
30% 165/549 [16:37<38:21, 5.99s/it]
|
| 364 |
30% 166/549 [16:41<34:12, 5.36s/it]
|
| 365 |
30% 167/549 [16:44<31:21, 4.92s/it]
|
| 366 |
31% 168/549 [16:48<29:18, 4.62s/it][2025-10-10 13:28:40,029] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 368 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 369 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 370 |
|
|
|
|
| 371 |
|
|
|
|
| 372 |
31% 168/549 [17:01<29:18, 4.62s/it]
|
|
|
|
|
|
|
| 373 |
[A
|
| 374 |
31% 169/549 [17:05<52:01, 8.21s/it]
|
| 375 |
31% 170/549 [17:09<43:37, 6.91s/it]
|
| 376 |
31% 171/549 [17:13<37:46, 6.00s/it]
|
| 377 |
31% 172/549 [17:17<33:43, 5.37s/it]
|
| 378 |
32% 173/549 [17:21<30:52, 4.93s/it]
|
| 379 |
32% 174/549 [17:24<28:52, 4.62s/it][2025-10-10 13:29:16,068] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 381 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 382 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 383 |
|
|
|
|
| 384 |
|
|
|
|
| 385 |
32% 174/549 [17:37<28:52, 4.62s/it]
|
|
|
|
|
|
|
| 386 |
[A
|
| 387 |
32% 175/549 [17:41<51:37, 8.28s/it]
|
| 388 |
32% 176/549 [17:45<43:14, 6.96s/it]
|
| 389 |
32% 177/549 [17:49<37:23, 6.03s/it]
|
| 390 |
32% 178/549 [17:53<33:19, 5.39s/it]
|
| 391 |
33% 179/549 [17:57<30:29, 4.94s/it]
|
| 392 |
33% 180/549 [18:01<28:28, 4.63s/it][2025-10-10 13:29:52,329] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 393 |
0% 0/4 [00:00<?, ?it/s][A
|
|
|
|
| 394 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
|
|
|
| 395 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
|
|
|
| 396 |
|
|
|
|
| 397 |
|
|
|
|
| 398 |
33% 180/549 [18:13<28:28, 4.63s/it]
|
|
|
|
|
|
|
| 399 |
[A
|
| 400 |
33% 181/549 [18:17<49:55, 8.14s/it]
|
| 401 |
33% 182/549 [18:21<41:59, 6.86s/it]
|
| 402 |
33% 183/549 [18:25<36:37, 6.00s/it][2025-10-10 13:30:16,541] [INFO] [axolotl.core.trainers.base._save:671] [PID:24741] Saving model checkpoint to ./qlora-out/checkpoint-183
|
|
|
|
| 1 |
+
[2025-10-10 13:10:41,462] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:24741] baseline 0.000GB (+0.000GB allocated, +0.002GB reserved)
|
| 2 |
+
[2025-10-10 13:10:41,462] [INFO] [axolotl.cli.config.load_cfg:248] [PID:24741] config:
|
| 3 |
+
{
|
| 4 |
+
"activation_offloading": false,
|
| 5 |
+
"adapter": "qlora",
|
| 6 |
+
"axolotl_config_path": "config.yaml",
|
| 7 |
+
"base_model": "nferruz/ProtGPT2",
|
| 8 |
+
"base_model_config": "nferruz/ProtGPT2",
|
| 9 |
+
"batch_size": 2,
|
| 10 |
+
"bf16": false,
|
| 11 |
+
"capabilities": {
|
| 12 |
+
"bf16": true,
|
| 13 |
+
"compute_capability": "sm_75",
|
| 14 |
+
"fp8": false,
|
| 15 |
+
"n_gpu": 1,
|
| 16 |
+
"n_node": 1
|
| 17 |
+
},
|
| 18 |
+
"context_parallel_size": 1,
|
| 19 |
+
"dataloader_num_workers": 1,
|
| 20 |
+
"dataloader_pin_memory": true,
|
| 21 |
+
"dataloader_prefetch_factor": 256,
|
| 22 |
+
"dataset_processes": 2,
|
| 23 |
+
"datasets": [
|
| 24 |
+
{
|
| 25 |
+
"ds_type": "json",
|
| 26 |
+
"message_property_mappings": {
|
| 27 |
+
"content": "content",
|
| 28 |
+
"role": "role"
|
| 29 |
+
},
|
| 30 |
+
"path": "/content/sequences_tokenized.jsonl",
|
| 31 |
+
"trust_remote_code": false
|
| 32 |
+
}
|
| 33 |
+
],
|
| 34 |
+
"ddp": false,
|
| 35 |
+
"device": "cuda:0",
|
| 36 |
+
"dion_rank_fraction": 1.0,
|
| 37 |
+
"dion_rank_multiple_of": 1,
|
| 38 |
+
"env_capabilities": {
|
| 39 |
+
"torch_version": "2.8.0"
|
| 40 |
+
},
|
| 41 |
+
"eval_batch_size": 2,
|
| 42 |
+
"eval_causal_lm_metrics": [
|
| 43 |
+
"sacrebleu",
|
| 44 |
+
"comet",
|
| 45 |
+
"ter",
|
| 46 |
+
"chrf"
|
| 47 |
+
],
|
| 48 |
+
"eval_max_new_tokens": 128,
|
| 49 |
+
"eval_sample_packing": true,
|
| 50 |
+
"eval_steps": 0.01,
|
| 51 |
+
"eval_table_size": 0,
|
| 52 |
+
"experimental_skip_move_to_device": true,
|
| 53 |
+
"fp16": true,
|
| 54 |
+
"gradient_accumulation_steps": 1,
|
| 55 |
+
"gradient_checkpointing": true,
|
| 56 |
+
"gradient_checkpointing_kwargs": {
|
| 57 |
+
"use_reentrant": true
|
| 58 |
+
},
|
| 59 |
+
"group_by_length": false,
|
| 60 |
+
"hub_model_id": "ProtGPT2-Oxido",
|
| 61 |
+
"include_tkps": true,
|
| 62 |
+
"is_falcon_derived_model": false,
|
| 63 |
+
"is_llama_derived_model": false,
|
| 64 |
+
"is_mistral_derived_model": false,
|
| 65 |
+
"learning_rate": 0.002,
|
| 66 |
+
"lisa_layers_attribute": "model.layers",
|
| 67 |
+
"load_best_model_at_end": false,
|
| 68 |
+
"load_in_4bit": true,
|
| 69 |
+
"load_in_8bit": false,
|
| 70 |
+
"local_rank": 0,
|
| 71 |
+
"logging_steps": 100,
|
| 72 |
+
"lora_alpha": 16,
|
| 73 |
+
"lora_dropout": 0.05,
|
| 74 |
+
"lora_r": 32,
|
| 75 |
+
"lora_target_linear": true,
|
| 76 |
+
"loraplus_lr_embedding": 1e-06,
|
| 77 |
+
"lr_scheduler": "cosine",
|
| 78 |
+
"mean_resizing_embeddings": false,
|
| 79 |
+
"micro_batch_size": 2,
|
| 80 |
+
"model_config_type": "gpt2",
|
| 81 |
+
"num_epochs": 3.0,
|
| 82 |
+
"optimizer": "paged_adamw_32bit",
|
| 83 |
+
"output_dir": "./qlora-out",
|
| 84 |
+
"pad_to_sequence_len": true,
|
| 85 |
+
"pretrain_multipack_attn": true,
|
| 86 |
+
"profiler_steps_start": 0,
|
| 87 |
+
"qlora_sharded_model_loading": false,
|
| 88 |
+
"ray_num_workers": 1,
|
| 89 |
+
"resources_per_worker": {
|
| 90 |
+
"GPU": 1
|
| 91 |
+
},
|
| 92 |
+
"sample_packing": true,
|
| 93 |
+
"sample_packing_bin_size": 200,
|
| 94 |
+
"sample_packing_group_size": 100000,
|
| 95 |
+
"save_only_model": false,
|
| 96 |
+
"save_safetensors": true,
|
| 97 |
+
"save_strategy": "epoch",
|
| 98 |
+
"sequence_len": 2048,
|
| 99 |
+
"shuffle_before_merging_datasets": false,
|
| 100 |
+
"shuffle_merged_datasets": true,
|
| 101 |
+
"skip_prepare_dataset": false,
|
| 102 |
+
"special_tokens": {
|
| 103 |
+
"eos_token": "<|endoftext|>",
|
| 104 |
+
"pad_token": "<|endoftext|>"
|
| 105 |
+
},
|
| 106 |
+
"streaming_multipack_buffer_size": 10000,
|
| 107 |
+
"strict": false,
|
| 108 |
+
"tensor_parallel_size": 1,
|
| 109 |
+
"tf32": false,
|
| 110 |
+
"tiled_mlp_use_original_mlp": true,
|
| 111 |
+
"tokenizer_config": "nferruz/ProtGPT2",
|
| 112 |
+
"tokenizer_save_jinja_files": true,
|
| 113 |
+
"tokenizer_type": "AutoTokenizer",
|
| 114 |
+
"torch_dtype": "torch.float16",
|
| 115 |
+
"train_on_inputs": false,
|
| 116 |
+
"trl": {
|
| 117 |
+
"log_completions": false,
|
| 118 |
+
"mask_truncated_completions": false,
|
| 119 |
+
"ref_model_mixup_alpha": 0.9,
|
| 120 |
+
"ref_model_sync_steps": 64,
|
| 121 |
+
"scale_rewards": true,
|
| 122 |
+
"sync_ref_model": false,
|
| 123 |
+
"use_vllm": false,
|
| 124 |
+
"vllm_server_host": "0.0.0.0",
|
| 125 |
+
"vllm_server_port": 8000
|
| 126 |
+
},
|
| 127 |
+
"type_of_model": "AutoModelForCausalLM",
|
| 128 |
+
"use_ray": false,
|
| 129 |
+
"val_set_size": 0.02,
|
| 130 |
+
"vllm": {
|
| 131 |
+
"device": "auto",
|
| 132 |
+
"dtype": "auto",
|
| 133 |
+
"gpu_memory_utilization": 0.9,
|
| 134 |
+
"host": "0.0.0.0",
|
| 135 |
+
"port": 8000
|
| 136 |
+
},
|
| 137 |
+
"warmup_steps": 100,
|
| 138 |
+
"weight_decay": 0.0,
|
| 139 |
+
"world_size": 1
|
| 140 |
+
}
|
| 141 |
+
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:24741] EOS: 0 / <|endoftext|>
|
| 142 |
+
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:24741] BOS: 0 / <|endoftext|>
|
| 143 |
+
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:24741] PAD: 0 / <|endoftext|>
|
| 144 |
+
[2025-10-10 13:10:42,465] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:24741] UNK: 0 / <|endoftext|>
|
| 145 |
+
[2025-10-10 13:10:42,465] [INFO] [axolotl.loaders.tokenizer.load_tokenizer:295] [PID:24741] No Chat template selected. Consider adding a chat template for easier inference.
|
| 146 |
+
[2025-10-10 13:10:42,466] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:476] [PID:24741] Unable to find prepared dataset in last_run_prepared/120d8e2ed44f3c537dc9a20773f86561
|
| 147 |
+
[2025-10-10 13:10:42,466] [INFO] [axolotl.utils.data.sft._load_raw_datasets:320] [PID:24741] Loading raw datasets...
|
| 148 |
+
[2025-10-10 13:10:42,466] [WARNING] [axolotl.utils.data.sft._load_raw_datasets:322] [PID:24741] Processing datasets during training can lead to VRAM instability. Please pre-process your dataset using `axolotl preprocess path/to/config.yml`.
|
| 149 |
+
[2025-10-10 13:10:42,802] [INFO] [axolotl.utils.data.wrappers.get_dataset_wrapper:87] [PID:24741] Loading dataset: /content/sequences_tokenized.jsonl with base_type: None and prompt_style: None
|
| 150 |
+
[2025-10-10 13:10:42,820] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:218] [PID:24741] min_input_len: 6
|
| 151 |
+
[2025-10-10 13:10:42,821] [INFO] [axolotl.utils.data.utils.handle_long_seq_in_dataset:220] [PID:24741] max_input_len: 512
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
[2025-10-10 13:10:45,620] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:406] [PID:24741] total_num_tokens: 16_570
|
| 157 |
+
[2025-10-10 13:10:45,622] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:424] [PID:24741] `total_supervised_tokens: 16_570`
|
| 158 |
+
[2025-10-10 13:10:48,083] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9218025207519531
|
| 159 |
+
[2025-10-10 13:10:49,020] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9363107681274414
|
| 160 |
+
[2025-10-10 13:10:49,934] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9140019416809082
|
| 161 |
+
[2025-10-10 13:10:50,862] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9271283149719238
|
| 162 |
+
[2025-10-10 13:10:50,882] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 163 |
+
[2025-10-10 13:10:50,882] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:483] [PID:24741] data_loader_len: 4
|
| 164 |
+
[2025-10-10 13:10:50,883] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:499] [PID:24741] sample_packing_eff_est across ranks: [0.8989800347222222]
|
| 165 |
+
[2025-10-10 13:10:50,883] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:511] [PID:24741] sample_packing_eff_est: None
|
| 166 |
+
[2025-10-10 13:10:50,883] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:522] [PID:24741] total_num_steps: 12
|
| 167 |
+
[2025-10-10 13:10:50,893] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:406] [PID:24741] total_num_tokens: 746_874
|
| 168 |
+
[2025-10-10 13:10:50,932] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:424] [PID:24741] `total_supervised_tokens: 746_874`
|
| 169 |
+
[2025-10-10 13:10:52,871] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9358129501342773
|
| 170 |
+
[2025-10-10 13:10:53,781] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 0.9106135368347168
|
| 171 |
+
[2025-10-10 13:10:55,014] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2319858074188232
|
| 172 |
+
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2722358703613281
|
| 173 |
+
[2025-10-10 13:10:56,287] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [183]
|
| 174 |
+
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:483] [PID:24741] data_loader_len: 183
|
| 175 |
+
[2025-10-10 13:10:56,287] [INFO] [axolotl.utils.trainer.calc_sample_packing_eff_est:499] [PID:24741] sample_packing_eff_est across ranks: [0.9936909272820164]
|
| 176 |
+
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:511] [PID:24741] sample_packing_eff_est: 1.0
|
| 177 |
+
[2025-10-10 13:10:56,287] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:522] [PID:24741] total_num_steps: 549
|
| 178 |
+
[2025-10-10 13:10:56,287] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:24741] Maximum number of steps set at 549
|
| 179 |
+
[2025-10-10 13:10:56,297] [DEBUG] [axolotl.train.setup_model_and_tokenizer:65] [PID:24741] Loading tokenizer... nferruz/ProtGPT2
|
| 180 |
+
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:278] [PID:24741] EOS: 0 / <|endoftext|>
|
| 181 |
+
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:279] [PID:24741] BOS: 0 / <|endoftext|>
|
| 182 |
+
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:280] [PID:24741] PAD: 0 / <|endoftext|>
|
| 183 |
+
[2025-10-10 13:10:57,214] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:281] [PID:24741] UNK: 0 / <|endoftext|>
|
| 184 |
+
[2025-10-10 13:10:57,214] [INFO] [axolotl.loaders.tokenizer.load_tokenizer:295] [PID:24741] No Chat template selected. Consider adding a chat template for easier inference.
|
| 185 |
+
[2025-10-10 13:10:57,215] [DEBUG] [axolotl.train.setup_model_and_tokenizer:74] [PID:24741] Loading model
|
| 186 |
+
[2025-10-10 13:10:57,333] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:24741] Patched Trainer.evaluation_loop with nanmean loss calculation
|
| 187 |
+
[2025-10-10 13:10:57,334] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:24741] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
|
| 188 |
+
[2025-10-10 13:10:57,335] [INFO] [axolotl.loaders.patch_manager._apply_multipack_patches:301] [PID:24741] Applying multipack dataloader patch for sample packing...
|
| 189 |
+
[2025-10-10 13:11:27,461] [WARNING] [axolotl.loaders.model._adjust_model_config:273] [PID:24741] increasing model.config.max_position_embeddings from 1024 to 2048
|
| 190 |
+
[2025-10-10 13:11:27,467] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:863] [PID:24741] converting PEFT model w/ prepare_model_for_kbit_training
|
| 191 |
+
[2025-10-10 13:11:27,479] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:345] [PID:24741] Converting modules to torch.float16
|
| 192 |
+
[2025-10-10 13:11:27,481] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:24741] Memory usage after model load 0.849GB (+0.849GB allocated, +0.918GB reserved)
|
| 193 |
+
[2025-10-10 13:11:27,482] [INFO] [axolotl.loaders.adapter.load_lora:80] [PID:24741] found linear modules: ['c_attn', 'c_fc', 'c_proj']
|
| 194 |
+
trainable params: 23,592,960 || all params: 797,623,040 || trainable%: 2.9579
|
| 195 |
+
[2025-10-10 13:11:27,888] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:24741] after adapters 0.618GB (+0.618GB allocated, +1.012GB reserved)
|
| 196 |
+
[2025-10-10 13:11:39,738] [INFO] [axolotl.train.save_initial_configs:398] [PID:24741] Pre-saving adapter config to ./qlora-out...
|
| 197 |
+
[2025-10-10 13:11:39,738] [INFO] [axolotl.train.save_initial_configs:402] [PID:24741] Pre-saving tokenizer to ./qlora-out...
|
| 198 |
+
[2025-10-10 13:11:39,828] [INFO] [axolotl.train.save_initial_configs:407] [PID:24741] Pre-saving model config to ./qlora-out...
|
| 199 |
+
[2025-10-10 13:11:39,836] [INFO] [axolotl.train.execute_training:196] [PID:24741] Starting trainer...
|
| 200 |
+
[2025-10-10 13:11:45,415] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8461699485778809
|
| 201 |
+
[2025-10-10 13:11:47,771] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 2.3554582595825195
|
| 202 |
+
[2025-10-10 13:11:49,329] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5573019981384277
|
| 203 |
+
[2025-10-10 13:11:51,006] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6764421463012695
|
| 204 |
+
[2025-10-10 13:11:51,006] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [183]
|
| 205 |
+
|
| 206 |
0% 0/549 [00:00<?, ?it/s][2025-10-10 13:11:51,161] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 207 |
+
[2025-10-10 13:11:54,146] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2840440273284912
|
| 208 |
+
[2025-10-10 13:11:55,365] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.218794584274292
|
| 209 |
+
[2025-10-10 13:11:56,599] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2343621253967285
|
| 210 |
+
[2025-10-10 13:11:58,099] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4991233348846436
|
| 211 |
+
[2025-10-10 13:11:58,099] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 212 |
+
|
| 213 |
+
|
| 214 |
0% 0/4 [00:00<?, ?it/s][A
|
| 215 |
+
|
| 216 |
50% 2/4 [00:00<00:00, 3.33it/s][A
|
| 217 |
+
|
| 218 |
75% 3/4 [00:01<00:00, 1.68it/s][A
|
| 219 |
+
|
| 220 |
|
| 221 |
+
|
| 222 |
|
| 223 |
+
|
| 224 |
0% 0/549 [00:14<?, ?it/s]
|
| 225 |
+
|
| 226 |
+
|
| 227 |
[A
|
| 228 |
0% 1/549 [00:21<3:16:49, 21.55s/it]
|
| 229 |
0% 2/549 [00:25<1:40:35, 11.03s/it]
|
| 230 |
1% 3/549 [00:28<1:09:56, 7.69s/it]
|
| 231 |
1% 4/549 [00:32<55:34, 6.12s/it]
|
| 232 |
1% 5/549 [00:36<47:37, 5.25s/it]
|
| 233 |
1% 6/549 [00:40<42:50, 4.73s/it][2025-10-10 13:12:31,245] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 234 |
+
[2025-10-10 13:12:33,921] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245626449584961
|
| 235 |
+
[2025-10-10 13:12:35,290] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3684089183807373
|
| 236 |
+
[2025-10-10 13:12:37,029] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.738837718963623
|
| 237 |
+
[2025-10-10 13:12:38,274] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2442030906677246
|
| 238 |
+
[2025-10-10 13:12:38,274] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 239 |
+
|
| 240 |
+
|
| 241 |
0% 0/4 [00:00<?, ?it/s][A
|
| 242 |
+
|
| 243 |
50% 2/4 [00:01<00:01, 1.90it/s][A
|
| 244 |
+
|
| 245 |
75% 3/4 [00:02<00:00, 1.36it/s][A
|
| 246 |
+
|
| 247 |
|
| 248 |
+
|
| 249 |
|
| 250 |
+
|
| 251 |
1% 6/549 [00:52<42:50, 4.73s/it]
|
| 252 |
+
|
| 253 |
+
|
| 254 |
[A
|
| 255 |
1% 7/549 [00:56<1:17:17, 8.56s/it]
|
| 256 |
1% 8/549 [01:00<1:03:22, 7.03s/it]
|
| 257 |
2% 9/549 [01:04<54:04, 6.01s/it]
|
| 258 |
2% 10/549 [01:07<47:47, 5.32s/it]
|
| 259 |
2% 11/549 [01:11<43:30, 4.85s/it]
|
| 260 |
2% 12/549 [01:15<40:33, 4.53s/it][2025-10-10 13:13:06,562] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 261 |
+
[2025-10-10 13:13:09,050] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2255823612213135
|
| 262 |
+
[2025-10-10 13:13:10,291] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.240588665008545
|
| 263 |
+
[2025-10-10 13:13:11,581] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2900962829589844
|
| 264 |
+
[2025-10-10 13:13:13,335] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.753103256225586
|
| 265 |
+
[2025-10-10 13:13:13,335] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 266 |
+
|
| 267 |
+
|
| 268 |
0% 0/4 [00:00<?, ?it/s][A
|
| 269 |
+
|
| 270 |
50% 2/4 [00:01<00:01, 1.86it/s][A
|
| 271 |
+
|
| 272 |
75% 3/4 [00:02<00:00, 1.34it/s][A
|
| 273 |
+
|
| 274 |
|
| 275 |
+
|
| 276 |
|
| 277 |
+
|
| 278 |
2% 12/549 [01:28<40:33, 4.53s/it]
|
| 279 |
+
|
| 280 |
+
|
| 281 |
[A
|
| 282 |
2% 13/549 [01:31<1:12:45, 8.14s/it]
|
| 283 |
3% 14/549 [01:35<1:00:59, 6.84s/it]
|
| 284 |
3% 15/549 [01:39<52:49, 5.93s/it]
|
| 285 |
3% 16/549 [01:43<47:08, 5.31s/it]
|
| 286 |
3% 17/549 [01:47<43:11, 4.87s/it]
|
| 287 |
3% 18/549 [01:51<40:28, 4.57s/it][2025-10-10 13:13:42,268] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 288 |
+
[2025-10-10 13:13:44,777] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.233976125717163
|
| 289 |
+
[2025-10-10 13:13:45,991] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2140934467315674
|
| 290 |
+
[2025-10-10 13:13:47,230] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2387192249298096
|
| 291 |
+
[2025-10-10 13:13:48,517] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2868869304656982
|
| 292 |
+
[2025-10-10 13:13:48,517] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 293 |
+
|
| 294 |
+
|
| 295 |
0% 0/4 [00:00<?, ?it/s][A
|
| 296 |
+
|
| 297 |
50% 2/4 [00:01<00:01, 1.83it/s][A
|
| 298 |
+
|
| 299 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
| 300 |
+
|
| 301 |
|
| 302 |
+
|
| 303 |
|
| 304 |
+
|
| 305 |
3% 18/549 [02:03<40:28, 4.57s/it]
|
| 306 |
+
|
| 307 |
+
|
| 308 |
[A
|
| 309 |
3% 19/549 [02:07<1:12:15, 8.18s/it]
|
| 310 |
4% 20/549 [02:11<1:00:54, 6.91s/it]
|
| 311 |
4% 21/549 [02:15<52:54, 6.01s/it]
|
| 312 |
4% 22/549 [02:19<47:19, 5.39s/it]
|
| 313 |
4% 23/549 [02:23<43:26, 4.96s/it]
|
| 314 |
4% 24/549 [02:27<40:38, 4.64s/it][2025-10-10 13:14:18,514] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 315 |
+
[2025-10-10 13:14:20,985] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2237637042999268
|
| 316 |
+
[2025-10-10 13:14:22,224] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2387983798980713
|
| 317 |
+
[2025-10-10 13:14:23,447] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2227163314819336
|
| 318 |
+
[2025-10-10 13:14:24,682] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2346465587615967
|
| 319 |
+
[2025-10-10 13:14:24,682] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 320 |
+
|
| 321 |
+
|
| 322 |
0% 0/4 [00:00<?, ?it/s][A
|
| 323 |
+
|
| 324 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 325 |
+
|
| 326 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 327 |
+
|
| 328 |
|
| 329 |
+
|
| 330 |
|
| 331 |
+
|
| 332 |
4% 24/549 [02:39<40:38, 4.64s/it]
|
| 333 |
+
|
| 334 |
+
|
| 335 |
[A
|
| 336 |
5% 25/549 [02:43<1:10:34, 8.08s/it]
|
| 337 |
5% 26/549 [02:47<59:22, 6.81s/it]
|
| 338 |
5% 27/549 [02:51<51:30, 5.92s/it]
|
| 339 |
5% 28/549 [02:54<45:59, 5.30s/it]
|
| 340 |
5% 29/549 [02:58<42:11, 4.87s/it]
|
| 341 |
5% 30/549 [03:02<39:29, 4.57s/it][2025-10-10 13:14:53,875] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 342 |
+
[2025-10-10 13:14:56,364] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.229734182357788
|
| 343 |
+
[2025-10-10 13:14:57,586] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2214250564575195
|
| 344 |
+
[2025-10-10 13:14:58,806] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.219433307647705
|
| 345 |
+
[2025-10-10 13:15:00,136] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.329803705215454
|
| 346 |
+
[2025-10-10 13:15:00,136] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 347 |
+
|
| 348 |
+
|
| 349 |
0% 0/4 [00:00<?, ?it/s][A
|
| 350 |
+
|
| 351 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 352 |
+
|
| 353 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
| 354 |
+
|
| 355 |
|
| 356 |
+
|
| 357 |
|
| 358 |
+
|
| 359 |
5% 30/549 [03:14<39:29, 4.57s/it]
|
| 360 |
+
|
| 361 |
+
|
| 362 |
[A
|
| 363 |
6% 31/549 [03:18<1:08:40, 7.95s/it]
|
| 364 |
6% 32/549 [03:22<57:59, 6.73s/it]
|
| 365 |
6% 33/549 [03:26<50:32, 5.88s/it]
|
| 366 |
6% 34/549 [03:30<45:20, 5.28s/it]
|
| 367 |
6% 35/549 [03:34<41:45, 4.87s/it]
|
| 368 |
7% 36/549 [03:38<39:13, 4.59s/it][2025-10-10 13:15:29,235] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 369 |
+
[2025-10-10 13:15:31,722] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.212110996246338
|
| 370 |
+
[2025-10-10 13:15:32,954] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.231447458267212
|
| 371 |
+
[2025-10-10 13:15:34,205] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2508065700531006
|
| 372 |
+
[2025-10-10 13:15:35,438] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2330925464630127
|
| 373 |
+
[2025-10-10 13:15:35,439] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 374 |
+
|
| 375 |
+
|
| 376 |
0% 0/4 [00:00<?, ?it/s][A
|
| 377 |
+
|
| 378 |
50% 2/4 [00:01<00:01, 1.84it/s][A
|
| 379 |
+
|
| 380 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
| 381 |
+
|
| 382 |
|
| 383 |
+
|
| 384 |
|
| 385 |
+
|
| 386 |
7% 36/549 [03:50<39:13, 4.59s/it]
|
| 387 |
+
|
| 388 |
+
|
| 389 |
[A
|
| 390 |
7% 37/549 [03:53<1:08:00, 7.97s/it]
|
| 391 |
7% 38/549 [03:57<57:23, 6.74s/it]
|
| 392 |
7% 39/549 [04:01<50:02, 5.89s/it]
|
| 393 |
7% 40/549 [04:05<44:51, 5.29s/it]
|
| 394 |
7% 41/549 [04:09<41:10, 4.86s/it]
|
| 395 |
8% 42/549 [04:13<38:40, 4.58s/it][2025-10-10 13:16:04,533] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 396 |
+
[2025-10-10 13:16:07,240] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2179176807403564
|
| 397 |
+
[2025-10-10 13:16:08,479] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.239612340927124
|
| 398 |
+
[2025-10-10 13:16:09,694] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2147700786590576
|
| 399 |
+
[2025-10-10 13:16:10,944] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2497029304504395
|
| 400 |
+
[2025-10-10 13:16:10,945] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 401 |
+
|
| 402 |
+
|
| 403 |
0% 0/4 [00:00<?, ?it/s][A
|
| 404 |
+
|
| 405 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 406 |
+
|
| 407 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 408 |
+
|
| 409 |
|
| 410 |
+
|
| 411 |
|
| 412 |
+
|
| 413 |
8% 42/549 [04:25<38:40, 4.58s/it]
|
| 414 |
+
|
| 415 |
+
|
| 416 |
[A
|
| 417 |
8% 43/549 [04:29<1:07:37, 8.02s/it]
|
| 418 |
8% 44/549 [04:33<56:57, 6.77s/it]
|
| 419 |
8% 45/549 [04:37<49:35, 5.90s/it]
|
| 420 |
8% 46/549 [04:41<44:25, 5.30s/it]
|
| 421 |
9% 47/549 [04:44<40:49, 4.88s/it]
|
| 422 |
9% 48/549 [04:48<38:22, 4.60s/it][2025-10-10 13:16:40,037] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 423 |
+
[2025-10-10 13:16:43,053] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2188963890075684
|
| 424 |
+
[2025-10-10 13:16:44,293] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2395522594451904
|
| 425 |
+
[2025-10-10 13:16:45,513] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.21950364112854
|
| 426 |
+
[2025-10-10 13:16:46,763] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2497367858886719
|
| 427 |
+
[2025-10-10 13:16:46,763] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 428 |
+
|
| 429 |
+
|
| 430 |
0% 0/4 [00:00<?, ?it/s][A
|
| 431 |
+
|
| 432 |
50% 2/4 [00:01<00:01, 1.84it/s][A
|
| 433 |
+
|
| 434 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
| 435 |
+
|
| 436 |
|
| 437 |
+
|
| 438 |
|
| 439 |
+
|
| 440 |
9% 48/549 [05:01<38:22, 4.60s/it]
|
| 441 |
+
|
| 442 |
+
|
| 443 |
[A
|
| 444 |
9% 49/549 [05:05<1:07:36, 8.11s/it]
|
| 445 |
9% 50/549 [05:09<56:51, 6.84s/it]
|
| 446 |
9% 51/549 [05:12<49:28, 5.96s/it]
|
| 447 |
9% 52/549 [05:16<44:15, 5.34s/it]
|
| 448 |
10% 53/549 [05:20<40:31, 4.90s/it]
|
| 449 |
10% 54/549 [05:24<36:22, 4.41s/it][2025-10-10 13:17:15,168] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 450 |
+
[2025-10-10 13:17:18,275] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.8323063850402832
|
| 451 |
+
[2025-10-10 13:17:19,609] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3336091041564941
|
| 452 |
+
[2025-10-10 13:17:20,827] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2181472778320312
|
| 453 |
+
[2025-10-10 13:17:22,034] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2064990997314453
|
| 454 |
+
[2025-10-10 13:17:22,034] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 455 |
+
|
| 456 |
+
|
| 457 |
0% 0/4 [00:00<?, ?it/s][A
|
| 458 |
+
|
| 459 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 460 |
+
|
| 461 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 462 |
+
|
| 463 |
|
| 464 |
+
|
| 465 |
|
| 466 |
+
|
| 467 |
10% 54/549 [05:36<36:22, 4.41s/it]
|
| 468 |
+
|
| 469 |
+
|
| 470 |
[A
|
| 471 |
10% 55/549 [05:40<1:06:03, 8.02s/it]
|
| 472 |
10% 56/549 [05:44<55:34, 6.76s/it]
|
| 473 |
10% 57/549 [05:48<48:18, 5.89s/it]
|
| 474 |
11% 58/549 [05:52<43:15, 5.29s/it]
|
| 475 |
11% 59/549 [05:55<39:46, 4.87s/it]
|
| 476 |
11% 60/549 [05:59<37:18, 4.58s/it][2025-10-10 13:17:50,975] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 477 |
+
[2025-10-10 13:17:53,534] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2939870357513428
|
| 478 |
+
[2025-10-10 13:17:55,286] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7515311241149902
|
| 479 |
+
[2025-10-10 13:17:56,595] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3087666034698486
|
| 480 |
+
[2025-10-10 13:17:58,392] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7966506481170654
|
| 481 |
+
[2025-10-10 13:17:58,392] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 482 |
+
|
| 483 |
+
|
| 484 |
0% 0/4 [00:00<?, ?it/s][A
|
| 485 |
+
|
| 486 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 487 |
+
|
| 488 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 489 |
+
|
| 490 |
|
| 491 |
+
|
| 492 |
|
| 493 |
+
|
| 494 |
11% 60/549 [06:13<37:18, 4.58s/it]
|
| 495 |
+
|
| 496 |
+
|
| 497 |
[A
|
| 498 |
11% 61/549 [06:17<1:09:20, 8.52s/it]
|
| 499 |
11% 62/549 [06:21<57:52, 7.13s/it]
|
| 500 |
11% 63/549 [06:25<49:53, 6.16s/it]
|
| 501 |
12% 64/549 [06:29<44:24, 5.49s/it]
|
| 502 |
12% 65/549 [06:33<40:32, 5.03s/it]
|
| 503 |
12% 66/549 [06:37<37:50, 4.70s/it][2025-10-10 13:18:28,296] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 504 |
+
[2025-10-10 13:18:33,461] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 2.976652145385742
|
| 505 |
+
[2025-10-10 13:18:35,243] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7817871570587158
|
| 506 |
+
[2025-10-10 13:18:36,478] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2347698211669922
|
| 507 |
+
[2025-10-10 13:18:37,729] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2507171630859375
|
| 508 |
+
[2025-10-10 13:18:37,729] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 509 |
+
|
| 510 |
+
|
| 511 |
0% 0/4 [00:00<?, ?it/s][A
|
| 512 |
+
|
| 513 |
50% 2/4 [00:01<00:01, 1.80it/s][A
|
| 514 |
+
|
| 515 |
75% 3/4 [00:02<00:00, 1.31it/s][A
|
| 516 |
+
|
| 517 |
|
| 518 |
+
|
| 519 |
|
| 520 |
+
|
| 521 |
12% 66/549 [06:53<37:50, 4.70s/it]
|
| 522 |
+
|
| 523 |
+
|
| 524 |
[A
|
| 525 |
12% 67/549 [06:57<1:14:42, 9.30s/it]
|
| 526 |
12% 68/549 [07:01<1:01:33, 7.68s/it]
|
| 527 |
13% 69/549 [07:04<52:22, 6.55s/it]
|
| 528 |
13% 70/549 [07:08<45:55, 5.75s/it]
|
| 529 |
13% 71/549 [07:12<41:24, 5.20s/it]
|
| 530 |
13% 72/549 [07:16<38:15, 4.81s/it][2025-10-10 13:19:07,848] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 531 |
+
[2025-10-10 13:19:10,905] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.743417501449585
|
| 532 |
+
[2025-10-10 13:19:12,274] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3681507110595703
|
| 533 |
+
[2025-10-10 13:19:13,507] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.232816457748413
|
| 534 |
+
[2025-10-10 13:19:14,734] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2264103889465332
|
| 535 |
+
[2025-10-10 13:19:14,734] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 536 |
+
|
| 537 |
+
|
| 538 |
0% 0/4 [00:00<?, ?it/s][A
|
| 539 |
+
|
| 540 |
50% 2/4 [00:01<00:01, 1.84it/s][A
|
| 541 |
+
|
| 542 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 543 |
+
|
| 544 |
|
| 545 |
+
|
| 546 |
|
| 547 |
+
|
| 548 |
13% 72/549 [07:29<38:15, 4.81s/it]
|
| 549 |
+
|
| 550 |
+
|
| 551 |
[A
|
| 552 |
13% 73/549 [07:33<1:05:59, 8.32s/it]
|
| 553 |
13% 74/549 [07:36<53:50, 6.80s/it]
|
| 554 |
14% 75/549 [07:40<46:47, 5.92s/it]
|
| 555 |
14% 76/549 [07:44<41:57, 5.32s/it]
|
| 556 |
14% 77/549 [07:48<38:32, 4.90s/it]
|
| 557 |
14% 78/549 [07:52<36:06, 4.60s/it][2025-10-10 13:19:43,212] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 558 |
+
[2025-10-10 13:19:45,690] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2276828289031982
|
| 559 |
+
[2025-10-10 13:19:47,405] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7151846885681152
|
| 560 |
+
[2025-10-10 13:19:48,875] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4696624279022217
|
| 561 |
+
[2025-10-10 13:19:50,118] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.24239182472229
|
| 562 |
+
[2025-10-10 13:19:50,118] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 563 |
+
|
| 564 |
+
|
| 565 |
0% 0/4 [00:00<?, ?it/s][A
|
| 566 |
+
|
| 567 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 568 |
+
|
| 569 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 570 |
+
|
| 571 |
|
| 572 |
+
|
| 573 |
|
| 574 |
+
|
| 575 |
14% 78/549 [08:04<36:06, 4.60s/it]
|
| 576 |
+
|
| 577 |
+
|
| 578 |
[A
|
| 579 |
14% 79/549 [08:08<1:03:59, 8.17s/it]
|
| 580 |
15% 80/549 [08:12<53:44, 6.88s/it]
|
| 581 |
15% 81/549 [08:16<46:34, 5.97s/it]
|
| 582 |
15% 82/549 [08:20<41:37, 5.35s/it]
|
| 583 |
15% 83/549 [08:24<38:10, 4.91s/it]
|
| 584 |
15% 84/549 [08:27<35:43, 4.61s/it][2025-10-10 13:20:19,124] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 585 |
+
[2025-10-10 13:20:21,612] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2227427959442139
|
| 586 |
+
[2025-10-10 13:20:22,917] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3052830696105957
|
| 587 |
+
[2025-10-10 13:20:24,696] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7787699699401855
|
| 588 |
+
[2025-10-10 13:20:25,947] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2503879070281982
|
| 589 |
+
[2025-10-10 13:20:25,947] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 590 |
+
|
| 591 |
+
|
| 592 |
0% 0/4 [00:00<?, ?it/s][A
|
| 593 |
+
|
| 594 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 595 |
+
|
| 596 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 597 |
+
|
| 598 |
|
| 599 |
+
|
| 600 |
|
| 601 |
+
|
| 602 |
15% 84/549 [08:40<35:43, 4.61s/it]
|
| 603 |
+
|
| 604 |
+
|
| 605 |
[A
|
| 606 |
15% 85/549 [08:44<1:03:06, 8.16s/it]
|
| 607 |
16% 86/549 [08:48<53:01, 6.87s/it]
|
| 608 |
16% 87/549 [08:52<45:57, 5.97s/it]
|
| 609 |
16% 88/549 [08:56<41:08, 5.35s/it]
|
| 610 |
16% 89/549 [08:59<37:43, 4.92s/it]
|
| 611 |
16% 90/549 [09:03<35:17, 4.61s/it][2025-10-10 13:20:55,024] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 612 |
+
[2025-10-10 13:20:57,527] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2476911544799805
|
| 613 |
+
[2025-10-10 13:20:58,765] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2383363246917725
|
| 614 |
+
[2025-10-10 13:21:00,176] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4102411270141602
|
| 615 |
+
[2025-10-10 13:21:01,923] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7466182708740234
|
| 616 |
+
[2025-10-10 13:21:01,923] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 617 |
+
|
| 618 |
+
|
| 619 |
0% 0/4 [00:00<?, ?it/s][A
|
| 620 |
+
|
| 621 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 622 |
+
|
| 623 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 624 |
+
|
| 625 |
|
| 626 |
+
|
| 627 |
|
| 628 |
+
|
| 629 |
16% 90/549 [09:16<35:17, 4.61s/it]
|
| 630 |
+
|
| 631 |
+
|
| 632 |
[A
|
| 633 |
17% 91/549 [09:20<1:02:22, 8.17s/it]
|
| 634 |
17% 92/549 [09:24<52:22, 6.88s/it]
|
| 635 |
17% 93/549 [09:28<45:23, 5.97s/it]
|
| 636 |
17% 94/549 [09:31<40:32, 5.35s/it]
|
| 637 |
17% 95/549 [09:35<37:09, 4.91s/it]
|
| 638 |
17% 96/549 [09:39<34:46, 4.61s/it][2025-10-10 13:21:30,890] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 639 |
+
[2025-10-10 13:21:33,421] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2245755195617676
|
| 640 |
+
[2025-10-10 13:21:34,670] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2479896545410156
|
| 641 |
+
[2025-10-10 13:21:35,919] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2494275569915771
|
| 642 |
+
[2025-10-10 13:21:37,555] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6355178356170654
|
| 643 |
+
[2025-10-10 13:21:37,555] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 644 |
+
|
| 645 |
+
|
| 646 |
0% 0/4 [00:00<?, ?it/s][A
|
| 647 |
+
|
| 648 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 649 |
+
|
| 650 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 651 |
+
|
| 652 |
|
| 653 |
+
|
| 654 |
|
| 655 |
+
|
| 656 |
17% 96/549 [09:52<34:46, 4.61s/it]
|
| 657 |
+
|
| 658 |
+
|
| 659 |
[A
|
| 660 |
18% 97/549 [09:56<1:02:02, 8.24s/it]
|
| 661 |
18% 98/549 [10:00<52:01, 6.92s/it]
|
| 662 |
18% 99/549 [10:04<45:02, 6.00s/it]
|
| 663 |
18% 100/549 [10:08<40:10, 5.37s/it]
|
| 664 |
|
| 665 |
+
|
| 666 |
18% 100/549 [10:08<40:10, 5.37s/it]
|
| 667 |
18% 101/549 [10:12<36:58, 4.95s/it]
|
| 668 |
19% 102/549 [10:15<34:32, 4.64s/it][2025-10-10 13:22:07,077] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 669 |
+
[2025-10-10 13:22:09,610] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245088815689087
|
| 670 |
+
[2025-10-10 13:22:10,855] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2448465824127197
|
| 671 |
+
[2025-10-10 13:22:12,106] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2508001327514648
|
| 672 |
+
[2025-10-10 13:22:13,381] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2749567031860352
|
| 673 |
+
[2025-10-10 13:22:13,382] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 674 |
+
|
| 675 |
+
|
| 676 |
0% 0/4 [00:00<?, ?it/s][A
|
| 677 |
+
|
| 678 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 679 |
+
|
| 680 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 681 |
+
|
| 682 |
|
| 683 |
+
|
| 684 |
|
| 685 |
+
|
| 686 |
19% 102/549 [10:28<34:32, 4.64s/it]
|
| 687 |
+
|
| 688 |
+
|
| 689 |
[A
|
| 690 |
19% 103/549 [10:32<1:00:52, 8.19s/it]
|
| 691 |
19% 104/549 [10:36<51:10, 6.90s/it]
|
| 692 |
19% 105/549 [10:40<44:23, 6.00s/it]
|
| 693 |
19% 106/549 [10:44<39:36, 5.36s/it]
|
| 694 |
19% 107/549 [10:48<36:21, 4.94s/it]
|
| 695 |
20% 108/549 [10:51<34:00, 4.63s/it][2025-10-10 13:22:43,069] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 696 |
+
[2025-10-10 13:22:45,569] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.246150016784668
|
| 697 |
+
[2025-10-10 13:22:46,822] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2529652118682861
|
| 698 |
+
[2025-10-10 13:22:48,105] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2827684879302979
|
| 699 |
+
[2025-10-10 13:22:49,347] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2417457103729248
|
| 700 |
+
[2025-10-10 13:22:49,347] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 701 |
+
|
| 702 |
+
|
| 703 |
0% 0/4 [00:00<?, ?it/s][A
|
| 704 |
+
|
| 705 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 706 |
+
|
| 707 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 708 |
+
|
| 709 |
|
| 710 |
+
|
| 711 |
|
| 712 |
+
|
| 713 |
20% 108/549 [11:04<34:00, 4.63s/it]
|
| 714 |
+
|
| 715 |
+
|
| 716 |
[A
|
| 717 |
20% 109/549 [11:08<59:10, 8.07s/it]
|
| 718 |
20% 110/549 [11:11<49:49, 6.81s/it]
|
| 719 |
20% 111/549 [11:15<43:16, 5.93s/it]
|
| 720 |
20% 112/549 [11:19<38:43, 5.32s/it]
|
| 721 |
21% 113/549 [11:23<35:29, 4.89s/it]
|
| 722 |
21% 114/549 [11:27<33:16, 4.59s/it][2025-10-10 13:23:18,582] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 723 |
+
[2025-10-10 13:23:21,111] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2601120471954346
|
| 724 |
+
[2025-10-10 13:23:22,364] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2526865005493164
|
| 725 |
+
[2025-10-10 13:23:23,588] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.223961591720581
|
| 726 |
+
[2025-10-10 13:23:24,824] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2359259128570557
|
| 727 |
+
[2025-10-10 13:23:24,824] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 728 |
+
|
| 729 |
+
|
| 730 |
0% 0/4 [00:00<?, ?it/s][A
|
| 731 |
+
|
| 732 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 733 |
+
|
| 734 |
75% 3/4 [00:02<00:00, 1.32it/s][A
|
| 735 |
+
|
| 736 |
|
| 737 |
+
|
| 738 |
|
| 739 |
+
|
| 740 |
21% 114/549 [11:39<33:16, 4.59s/it]
|
| 741 |
+
|
| 742 |
+
|
| 743 |
[A
|
| 744 |
21% 115/549 [11:43<57:40, 7.97s/it]
|
| 745 |
21% 116/549 [11:47<48:37, 6.74s/it]
|
| 746 |
21% 117/549 [11:51<42:19, 5.88s/it]
|
| 747 |
21% 118/549 [11:54<37:57, 5.28s/it]
|
| 748 |
22% 119/549 [11:58<34:52, 4.87s/it]
|
| 749 |
22% 120/549 [12:02<32:40, 4.57s/it][2025-10-10 13:23:53,849] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 750 |
+
[2025-10-10 13:23:56,384] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2743051052093506
|
| 751 |
+
[2025-10-10 13:23:57,627] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2425775527954102
|
| 752 |
+
[2025-10-10 13:23:58,872] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2450978755950928
|
| 753 |
+
[2025-10-10 13:24:00,110] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2371137142181396
|
| 754 |
+
[2025-10-10 13:24:00,110] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 755 |
+
|
| 756 |
+
|
| 757 |
0% 0/4 [00:00<?, ?it/s][A
|
| 758 |
+
|
| 759 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 760 |
+
|
| 761 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 762 |
+
|
| 763 |
|
| 764 |
+
|
| 765 |
|
| 766 |
+
|
| 767 |
22% 120/549 [12:14<32:40, 4.57s/it]
|
| 768 |
+
|
| 769 |
+
|
| 770 |
[A
|
| 771 |
22% 121/549 [12:18<56:50, 7.97s/it]
|
| 772 |
22% 122/549 [12:22<47:53, 6.73s/it]
|
| 773 |
22% 123/549 [12:26<41:44, 5.88s/it]
|
| 774 |
23% 124/549 [12:30<37:24, 5.28s/it]
|
| 775 |
23% 125/549 [12:34<34:22, 4.86s/it]
|
| 776 |
23% 126/549 [12:38<32:18, 4.58s/it][2025-10-10 13:24:29,185] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 777 |
+
[2025-10-10 13:24:32,019] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2589247226715088
|
| 778 |
+
[2025-10-10 13:24:33,287] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2684495449066162
|
| 779 |
+
[2025-10-10 13:24:34,521] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2337257862091064
|
| 780 |
+
[2025-10-10 13:24:35,751] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2298574447631836
|
| 781 |
+
[2025-10-10 13:24:35,752] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 782 |
+
|
| 783 |
+
|
| 784 |
0% 0/4 [00:00<?, ?it/s][A
|
| 785 |
+
|
| 786 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 787 |
+
|
| 788 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 789 |
+
|
| 790 |
|
| 791 |
+
|
| 792 |
|
| 793 |
+
|
| 794 |
23% 126/549 [12:50<32:18, 4.58s/it]
|
| 795 |
+
|
| 796 |
+
|
| 797 |
[A
|
| 798 |
23% 127/549 [12:54<56:47, 8.08s/it]
|
| 799 |
23% 128/549 [12:58<47:47, 6.81s/it]
|
| 800 |
23% 129/549 [13:02<41:35, 5.94s/it]
|
| 801 |
24% 130/549 [13:05<37:15, 5.33s/it]
|
| 802 |
24% 131/549 [13:09<34:11, 4.91s/it]
|
| 803 |
24% 132/549 [13:13<32:04, 4.61s/it][2025-10-10 13:25:04,943] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 804 |
+
[2025-10-10 13:25:08,050] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2962446212768555
|
| 805 |
+
[2025-10-10 13:25:09,299] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2489659786224365
|
| 806 |
+
[2025-10-10 13:25:10,529] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2295067310333252
|
| 807 |
+
[2025-10-10 13:25:11,775] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2457163333892822
|
| 808 |
+
[2025-10-10 13:25:11,775] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 809 |
+
|
| 810 |
+
|
| 811 |
0% 0/4 [00:00<?, ?it/s][A
|
| 812 |
+
|
| 813 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 814 |
+
|
| 815 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 816 |
+
|
| 817 |
|
| 818 |
+
|
| 819 |
|
| 820 |
+
|
| 821 |
24% 132/549 [13:26<32:04, 4.61s/it]
|
| 822 |
+
|
| 823 |
+
|
| 824 |
[A
|
| 825 |
24% 133/549 [13:30<56:33, 8.16s/it]
|
| 826 |
24% 134/549 [13:34<47:29, 6.87s/it]
|
| 827 |
25% 135/549 [13:37<41:08, 5.96s/it]
|
| 828 |
25% 136/549 [13:41<36:43, 5.34s/it]
|
| 829 |
25% 137/549 [13:45<33:38, 4.90s/it]
|
| 830 |
25% 138/549 [13:49<31:27, 4.59s/it][2025-10-10 13:25:40,704] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 831 |
+
[2025-10-10 13:25:43,878] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.726452112197876
|
| 832 |
+
[2025-10-10 13:25:45,124] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.245727777481079
|
| 833 |
+
[2025-10-10 13:25:46,372] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2481646537780762
|
| 834 |
+
[2025-10-10 13:25:47,600] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2278366088867188
|
| 835 |
+
[2025-10-10 13:25:47,601] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 836 |
+
|
| 837 |
+
|
| 838 |
0% 0/4 [00:00<?, ?it/s][A
|
| 839 |
+
|
| 840 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 841 |
+
|
| 842 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 843 |
+
|
| 844 |
|
| 845 |
+
|
| 846 |
|
| 847 |
+
|
| 848 |
25% 138/549 [14:02<31:27, 4.59s/it]
|
| 849 |
+
|
| 850 |
+
|
| 851 |
[A
|
| 852 |
25% 139/549 [14:06<55:44, 8.16s/it]
|
| 853 |
26% 140/549 [14:09<46:48, 6.87s/it]
|
| 854 |
26% 141/549 [14:13<40:35, 5.97s/it]
|
| 855 |
26% 142/549 [14:17<36:14, 5.34s/it]
|
| 856 |
26% 143/549 [14:21<33:13, 4.91s/it]
|
| 857 |
26% 144/549 [14:25<31:06, 4.61s/it][2025-10-10 13:26:16,597] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 858 |
+
[2025-10-10 13:26:19,460] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5904531478881836
|
| 859 |
+
[2025-10-10 13:26:21,049] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.5887093544006348
|
| 860 |
+
[2025-10-10 13:26:22,299] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2505967617034912
|
| 861 |
+
[2025-10-10 13:26:23,529] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.229478120803833
|
| 862 |
+
[2025-10-10 13:26:23,529] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 863 |
+
|
| 864 |
+
|
| 865 |
0% 0/4 [00:00<?, ?it/s][A
|
| 866 |
+
|
| 867 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 868 |
+
|
| 869 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 870 |
+
|
| 871 |
|
| 872 |
+
|
| 873 |
|
| 874 |
+
|
| 875 |
26% 144/549 [14:38<31:06, 4.61s/it]
|
| 876 |
+
|
| 877 |
+
|
| 878 |
[A
|
| 879 |
26% 145/549 [14:41<55:08, 8.19s/it]
|
| 880 |
27% 146/549 [14:45<46:16, 6.89s/it]
|
| 881 |
27% 147/549 [14:49<40:05, 5.98s/it]
|
| 882 |
27% 148/549 [14:53<35:50, 5.36s/it]
|
| 883 |
27% 149/549 [14:57<32:50, 4.93s/it]
|
| 884 |
27% 150/549 [15:01<30:40, 4.61s/it][2025-10-10 13:26:52,575] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 885 |
+
[2025-10-10 13:26:55,133] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.290666103363037
|
| 886 |
+
[2025-10-10 13:26:56,896] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7622182369232178
|
| 887 |
+
[2025-10-10 13:26:58,238] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3421812057495117
|
| 888 |
+
[2025-10-10 13:26:59,467] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2286624908447266
|
| 889 |
+
[2025-10-10 13:26:59,467] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 890 |
+
|
| 891 |
+
|
| 892 |
0% 0/4 [00:00<?, ?it/s][A
|
| 893 |
+
|
| 894 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 895 |
+
|
| 896 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 897 |
+
|
| 898 |
|
| 899 |
+
|
| 900 |
|
| 901 |
+
|
| 902 |
27% 150/549 [15:14<30:40, 4.61s/it]
|
| 903 |
+
|
| 904 |
+
|
| 905 |
[A
|
| 906 |
28% 151/549 [15:17<54:17, 8.18s/it]
|
| 907 |
28% 152/549 [15:21<45:33, 6.89s/it]
|
| 908 |
28% 153/549 [15:25<39:27, 5.98s/it]
|
| 909 |
28% 154/549 [15:29<35:14, 5.35s/it]
|
| 910 |
28% 155/549 [15:33<32:17, 4.92s/it]
|
| 911 |
28% 156/549 [15:36<28:59, 4.43s/it][2025-10-10 13:27:27,880] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 912 |
+
[2025-10-10 13:27:30,445] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2810289859771729
|
| 913 |
+
[2025-10-10 13:27:31,672] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2266299724578857
|
| 914 |
+
[2025-10-10 13:27:33,392] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7197673320770264
|
| 915 |
+
[2025-10-10 13:27:34,889] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4970765113830566
|
| 916 |
+
[2025-10-10 13:27:34,889] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 917 |
+
|
| 918 |
+
|
| 919 |
0% 0/4 [00:00<?, ?it/s][A
|
| 920 |
+
|
| 921 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 922 |
+
|
| 923 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 924 |
+
|
| 925 |
|
| 926 |
+
|
| 927 |
|
| 928 |
+
|
| 929 |
28% 156/549 [15:49<28:59, 4.43s/it]
|
| 930 |
+
|
| 931 |
+
|
| 932 |
[A
|
| 933 |
29% 157/549 [15:53<52:50, 8.09s/it]
|
| 934 |
29% 158/549 [15:57<44:26, 6.82s/it]
|
| 935 |
29% 159/549 [16:01<38:37, 5.94s/it]
|
| 936 |
29% 160/549 [16:04<34:31, 5.33s/it]
|
| 937 |
29% 161/549 [16:08<31:41, 4.90s/it]
|
| 938 |
30% 162/549 [16:12<29:40, 4.60s/it][2025-10-10 13:28:03,961] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 939 |
+
[2025-10-10 13:28:06,582] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2377257347106934
|
| 940 |
+
[2025-10-10 13:28:07,822] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2394213676452637
|
| 941 |
+
[2025-10-10 13:28:09,276] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4542319774627686
|
| 942 |
+
[2025-10-10 13:28:10,984] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.7078406810760498
|
| 943 |
+
[2025-10-10 13:28:10,984] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 944 |
+
|
| 945 |
+
|
| 946 |
0% 0/4 [00:00<?, ?it/s][A
|
| 947 |
+
|
| 948 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 949 |
+
|
| 950 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 951 |
+
|
| 952 |
|
| 953 |
+
|
| 954 |
|
| 955 |
+
|
| 956 |
30% 162/549 [16:25<29:40, 4.60s/it]
|
| 957 |
+
|
| 958 |
+
|
| 959 |
[A
|
| 960 |
30% 163/549 [16:29<52:53, 8.22s/it]
|
| 961 |
30% 164/549 [16:33<44:20, 6.91s/it]
|
| 962 |
30% 165/549 [16:37<38:21, 5.99s/it]
|
| 963 |
30% 166/549 [16:41<34:12, 5.36s/it]
|
| 964 |
30% 167/549 [16:44<31:21, 4.92s/it]
|
| 965 |
31% 168/549 [16:48<29:18, 4.62s/it][2025-10-10 13:28:40,029] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 966 |
+
[2025-10-10 13:28:42,579] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2564804553985596
|
| 967 |
+
[2025-10-10 13:28:43,831] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2514879703521729
|
| 968 |
+
[2025-10-10 13:28:45,090] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2588951587677002
|
| 969 |
+
[2025-10-10 13:28:46,762] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.6718604564666748
|
| 970 |
+
[2025-10-10 13:28:46,762] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 971 |
+
|
| 972 |
+
|
| 973 |
0% 0/4 [00:00<?, ?it/s][A
|
| 974 |
+
|
| 975 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 976 |
+
|
| 977 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 978 |
+
|
| 979 |
|
| 980 |
+
|
| 981 |
|
| 982 |
+
|
| 983 |
31% 168/549 [17:01<29:18, 4.62s/it]
|
| 984 |
+
|
| 985 |
+
|
| 986 |
[A
|
| 987 |
31% 169/549 [17:05<52:01, 8.21s/it]
|
| 988 |
31% 170/549 [17:09<43:37, 6.91s/it]
|
| 989 |
31% 171/549 [17:13<37:46, 6.00s/it]
|
| 990 |
31% 172/549 [17:17<33:43, 5.37s/it]
|
| 991 |
32% 173/549 [17:21<30:52, 4.93s/it]
|
| 992 |
32% 174/549 [17:24<28:52, 4.62s/it][2025-10-10 13:29:16,068] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 993 |
+
[2025-10-10 13:29:18,702] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.3219468593597412
|
| 994 |
+
[2025-10-10 13:29:20,003] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.301271915435791
|
| 995 |
+
[2025-10-10 13:29:21,280] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2761008739471436
|
| 996 |
+
[2025-10-10 13:29:22,700] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.4196476936340332
|
| 997 |
+
[2025-10-10 13:29:22,700] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 998 |
+
|
| 999 |
+
|
| 1000 |
0% 0/4 [00:00<?, ?it/s][A
|
| 1001 |
+
|
| 1002 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 1003 |
+
|
| 1004 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 1005 |
+
|
| 1006 |
|
| 1007 |
+
|
| 1008 |
|
| 1009 |
+
|
| 1010 |
32% 174/549 [17:37<28:52, 4.62s/it]
|
| 1011 |
+
|
| 1012 |
+
|
| 1013 |
[A
|
| 1014 |
32% 175/549 [17:41<51:37, 8.28s/it]
|
| 1015 |
32% 176/549 [17:45<43:14, 6.96s/it]
|
| 1016 |
32% 177/549 [17:49<37:23, 6.03s/it]
|
| 1017 |
32% 178/549 [17:53<33:19, 5.39s/it]
|
| 1018 |
33% 179/549 [17:57<30:29, 4.94s/it]
|
| 1019 |
33% 180/549 [18:01<28:28, 4.63s/it][2025-10-10 13:29:52,329] [INFO] [axolotl.core.trainers.base.evaluate:376] [PID:24741] Running evaluation step...
|
| 1020 |
+
[2025-10-10 13:29:54,842] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2502317428588867
|
| 1021 |
+
[2025-10-10 13:29:56,106] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.2638275623321533
|
| 1022 |
+
[2025-10-10 13:29:57,362] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.255711317062378
|
| 1023 |
+
[2025-10-10 13:29:58,622] [DEBUG] [axolotl.utils.samplers.multipack.__len__:462] [PID:24741] generate_batches time: 1.260202169418335
|
| 1024 |
+
[2025-10-10 13:29:58,622] [INFO] [axolotl.utils.samplers.multipack.calc_min_len:438] [PID:24741] gather_len_batches: [4]
|
| 1025 |
+
|
| 1026 |
+
|
| 1027 |
0% 0/4 [00:00<?, ?it/s][A
|
| 1028 |
+
|
| 1029 |
50% 2/4 [00:01<00:01, 1.85it/s][A
|
| 1030 |
+
|
| 1031 |
75% 3/4 [00:02<00:00, 1.33it/s][A
|
| 1032 |
+
|
| 1033 |
|
| 1034 |
+
|
| 1035 |
|
| 1036 |
+
|
| 1037 |
33% 180/549 [18:13<28:28, 4.63s/it]
|
| 1038 |
+
|
| 1039 |
+
|
| 1040 |
[A
|
| 1041 |
33% 181/549 [18:17<49:55, 8.14s/it]
|
| 1042 |
33% 182/549 [18:21<41:59, 6.86s/it]
|
| 1043 |
33% 183/549 [18:25<36:37, 6.00s/it][2025-10-10 13:30:16,541] [INFO] [axolotl.core.trainers.base._save:671] [PID:24741] Saving model checkpoint to ./qlora-out/checkpoint-183
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "<|endoftext|>",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": true,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "<|endoftext|>",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
},
|
| 16 |
+
"pad_token": {
|
| 17 |
+
"content": "<|endoftext|>",
|
| 18 |
+
"lstrip": false,
|
| 19 |
+
"normalized": false,
|
| 20 |
+
"rstrip": false,
|
| 21 |
+
"single_word": false
|
| 22 |
+
},
|
| 23 |
+
"unk_token": {
|
| 24 |
+
"content": "<|endoftext|>",
|
| 25 |
+
"lstrip": false,
|
| 26 |
+
"normalized": true,
|
| 27 |
+
"rstrip": false,
|
| 28 |
+
"single_word": false
|
| 29 |
+
}
|
| 30 |
+
}
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"0": {
|
| 5 |
+
"content": "<|endoftext|>",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
}
|
| 12 |
+
},
|
| 13 |
+
"bos_token": "<|endoftext|>",
|
| 14 |
+
"clean_up_tokenization_spaces": false,
|
| 15 |
+
"eos_token": "<|endoftext|>",
|
| 16 |
+
"extra_special_tokens": {},
|
| 17 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 18 |
+
"pad_token": "<|endoftext|>",
|
| 19 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 20 |
+
"unk_token": "<|endoftext|>"
|
| 21 |
+
}
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:15872f81ba0df2910609edb27c17b1b5641f9f269d4eba7fe013ab826ef0c746
|
| 3 |
+
size 7313
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|