[2026-03-31 02:46:14,052] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:10906] baseline 0.000GB () [2026-03-31 02:46:14,053] [INFO] [axolotl.cli.config.load_cfg:341] [PID:10906] config: { "activation_offloading": false, "adapter": "lora", "axolotl_config_path": "writer.yaml", "base_model": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "base_model_config": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "batch_size": 16, "bf16": true, "capabilities": { "bf16": true, "compute_capability": "sm_90", "fp8": true, "n_gpu": 1, "n_node": 1, "tf32": true }, "chat_template": "jinja", "chat_template_jinja": "{% set bos = \"<|begin_of_text|>\" %}{%- set enable_thinking = false -%}{% set system_start_header = \"<|start_header_id|>\" %}{% set system_end_header = \"<|end_header_id|>\n\n\" %}{% set start_header = \"<|start_header_id|>\" %}{% set end_header = \"<|end_header_id|>\n\n\" %}{% set eot = \"<|eot_id|>\" %}{% set system_token = \"system\" %}{% set user_token = \"user\" %}{% set assistant_token = \"assistant\" %}{% set tool_token = \"tool\" %}{{- bos ~ system_start_header ~ system_token ~ system_end_header -}}{%- if messages[0].role == 'system' and messages[0].content != '' -%}{%- set system_content = messages[0].content -%}{%- if '/no_think' in system_content -%}{%- set system_content = system_content.replace('/no_think', '')|trim -%}{%- set enable_thinking = false -%}{%- elif '/think' in system_content -%}{%- set system_content = system_content.replace('/think', '')|trim -%}{%- set enable_thinking = true -%}{%- endif -%}{{- system_content + '\n\n' -}}{%- endif -%}{%- if tools -%}{{- 'You can use the following tools to assist the user if required:\n[' -}}{%- for tool in tools -%}{{- (tool.function if tool.function is defined else tool) | tojson -}}{{- ', ' if not loop.last else '' -}}{%- endfor -%}{{- ']\n\nIf you decide to call any tool(s), use the following format:\n[{{\"name\": \"tool_name1\", \"arguments\": \"tool_args1\"}}, {{\"name\": \"tool_name2\", \"arguments\": \"tool_args2\"}}]\n\nResponse from tool(s) will be returned in this format:\n[{{\"response\": \"tool_response1\"}}, {{\"response\": \"tool_response2\"}}]\n\nBased on the results returned by the tool(s), you can call additional tools if needed, correct tool calls if any errors are found, or just respond with the answer to the user.' -}}{%- endif -%}{{- eot -}}{%- for message in messages -%}{%- if message.role == user_token -%}{{- start_header ~ user_token ~ end_header -}}{{ message.content -}}{{ eot -}}{%- elif message.role == assistant_token -%}{%- if '' in message.content -%}{%- set content = message.content.split('')[-1].lstrip() -%}{%- else -%}{%- set content = message.content -%}{%- endif -%}{{- start_header ~ assistant_token ~ end_header -}}{{ content -}}{%- if message.tool_calls -%}{{- '[' -}}{%- for call in message.tool_calls -%}{%- set fn = call.function if call.function is defined else call -%}{{- '{\"name\": \"' + fn.name + '\", \"arguments\": ' -}}{%- if fn.arguments is string -%}{{- fn.arguments -}}{%- else -%}{{- fn.arguments | tojson -}}{%- endif -%}{{- '}' + (', ' if not loop.last else '') -}}{%- endfor -%}{{- ']' -}}{%- endif -%}{{- eot -}}{%- elif message.role == tool_token -%}{%- if loop.first or (messages[loop.index0 - 1].role != tool_token) -%}{{- start_header ~ tool_token ~ end_header -}}{{ '[' -}}{%- endif -%}{{- message.content -}}{{- ', ' if not loop.last and (messages[loop.index0 + 1].role == tool_token) else '' -}}{%- if loop.last or (messages[loop.index0 + 1].role != tool_token) -%}{{- ']' -}}{{ eot -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{- start_header ~ assistant_token ~ end_header -}}{%- if not enable_thinking -%}{{- '\n\n\n\n' -}}{%- endif -%}{%- endif -%}", "context_parallel_size": 1, "dataloader_num_workers": 1, "dataloader_pin_memory": true, "dataloader_prefetch_factor": 256, "dataset_num_proc": 8, "datasets": [ { "chat_template": "tokenizer_default", "message_field_training": "train", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/GLiMA_Thinking", "roles_to_train": [], "train_on_eos": "turn", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/Gutenberg-SFT", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/Condor-SFT-Filtered", "split": "train[:250]", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/Ao3_Soft_Refusal", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/VSF", "trust_remote_code": false, "type": "chat_template" } ], "ddp": false, "device": "cuda:0", "device_map": "auto", "dion_rank_fraction": 1.0, "dion_rank_multiple_of": 1, "eaft_alpha": 1.0, "eaft_k": 20, "env_capabilities": { "torch_version": "2.9.1" }, "eval_batch_size": 1, "eval_causal_lm_metrics": [ "sacrebleu", "comet", "ter", "chrf" ], "eval_max_new_tokens": 128, "eval_sample_packing": true, "eval_table_size": 0, "experimental_skip_move_to_device": true, "flash_attention": true, "fp16": false, "generate_samples": false, "generation_do_sample": true, "generation_max_new_tokens": 50, "generation_prompt_ratio": 0.5, "generation_temperature": 0.7, "gradient_accumulation_steps": 16, "gradient_checkpointing": true, "gradient_checkpointing_kwargs": { "use_reentrant": true }, "include_tkps": true, "is_llama_derived_model": true, "layer_offloading": false, "learning_rate": 1.25e-05, "lisa_layers_attribute": "model.layers", "load_best_model_at_end": false, "load_in_4bit": false, "load_in_8bit": false, "local_rank": 0, "logging_steps": 1, "lora_alpha": 64, "lora_dropout": 0.0, "lora_mlp_kernel": false, "lora_o_kernel": false, "lora_qkv_kernel": false, "lora_r": 32, "lora_target_linear": true, "loraplus_lr_embedding": 1e-06, "loraplus_lr_ratio": 16.0, "lr_scheduler": "constant_with_warmup", "max_grad_norm": 1.0, "mean_resizing_embeddings": false, "merge_method": "memory_efficient", "micro_batch_size": 1, "model_config_type": "nemotron-nas", "num_epochs": 3.0, "num_generation_samples": 3, "optimizer": "paged_adamw_8bit", "otel_metrics_host": "localhost", "otel_metrics_port": 8000, "output_dir": "./Writer-Stage-1", "pad_to_sequence_len": true, "pretrain_multipack_attn": true, "profiler_steps_start": 0, "qlora_sharded_model_loading": false, "quantize_moe_experts": false, "ray_num_workers": 1, "resources_per_worker": { "GPU": 1 }, "sample_packing": true, "sample_packing_bin_size": 200, "sample_packing_group_size": 100000, "save_only_model": false, "save_safetensors": true, "save_strategy": "no", "seed": 42, "sequence_len": 5120, "shuffle_before_merging_datasets": false, "shuffle_merged_datasets": true, "skip_prepare_dataset": false, "streaming_multipack_buffer_size": 10000, "strict": false, "tensor_parallel_size": 1, "tf32": true, "tiled_mlp_use_original_mlp": true, "tokenizer_config": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "tokenizer_save_jinja_files": true, "torch_dtype": "torch.bfloat16", "train_on_inputs": false, "trl": { "async_prefetch": false, "log_completions": false, "mask_truncated_completions": false, "ref_model_mixup_alpha": 0.9, "ref_model_sync_steps": 64, "replay_buffer_size": 0, "replay_recompute_logps": true, "reroll_max_groups": 1, "reroll_start_fraction": 1.0, "reward_num_workers": 1, "scale_rewards": true, "skip_zero_advantage_batches": true, "sync_ref_model": false, "use_data_producer": false, "use_vllm": false, "vllm_lora_sync": false, "vllm_server_host": "0.0.0.0", "vllm_server_port": 8000 }, "trust_remote_code": true, "use_otel_metrics": false, "use_ray": false, "use_tensorboard": true, "val_set_size": 0.0, "vllm": { "device": "auto", "dtype": "auto", "gpu_memory_utilization": 0.9, "host": "0.0.0.0", "port": 8000 }, "warmup_ratio": 0.05, "weight_decay": 0.0, "world_size": 1 } [2026-03-31 02:46:14,057] [INFO] [axolotl.utils.schemas.validation.check_eval_packing:129] [PID:10906] explicitly setting `eval_sample_packing` to match `sample_packing` [2026-03-31 02:46:14,057] [WARNING] [axolotl.utils.schemas.validation.check_sample_packing_without_attention:190] [PID:10906] sample_packing without flash, sdp, xformers, sage, or flex attention does not handle cross sample decontamination. [2026-03-31 02:46:14,057] [INFO] [axolotl.utils.schemas.validation.hint_sample_packing_padding:239] [PID:10906] Setting `pad_to_sequence_len: true` to prevent memory leaks when sample_packing [2026-03-31 02:46:14,057] [WARNING] [axolotl.utils.schemas.model.hint_trust_remote_code:103] [PID:10906] `trust_remote_code` is set to true. Please make sure that you reviewed the remote code/model. [2026-03-31 02:46:14,759] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:10906] baseline 0.000GB () [2026-03-31 02:46:14,760] [INFO] [axolotl.cli.config.load_cfg:341] [PID:10906] config: { "activation_offloading": false, "adapter": "lora", "axolotl_config_path": "writer.yaml", "base_model": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "base_model_config": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "batch_size": 16, "bf16": true, "capabilities": { "bf16": true, "compute_capability": "sm_90", "fp8": true, "n_gpu": 1, "n_node": 1, "tf32": true }, "chat_template": "jinja", "chat_template_jinja": "{% set bos = \"<|begin_of_text|>\" %}{%- set enable_thinking = false -%}{% set system_start_header = \"<|start_header_id|>\" %}{% set system_end_header = \"<|end_header_id|>\n\n\" %}{% set start_header = \"<|start_header_id|>\" %}{% set end_header = \"<|end_header_id|>\n\n\" %}{% set eot = \"<|eot_id|>\" %}{% set system_token = \"system\" %}{% set user_token = \"user\" %}{% set assistant_token = \"assistant\" %}{% set tool_token = \"tool\" %}{{- bos ~ system_start_header ~ system_token ~ system_end_header -}}{%- if messages[0].role == 'system' and messages[0].content != '' -%}{%- set system_content = messages[0].content -%}{%- if '/no_think' in system_content -%}{%- set system_content = system_content.replace('/no_think', '')|trim -%}{%- set enable_thinking = false -%}{%- elif '/think' in system_content -%}{%- set system_content = system_content.replace('/think', '')|trim -%}{%- set enable_thinking = true -%}{%- endif -%}{{- system_content + '\n\n' -}}{%- endif -%}{%- if tools -%}{{- 'You can use the following tools to assist the user if required:\n[' -}}{%- for tool in tools -%}{{- (tool.function if tool.function is defined else tool) | tojson -}}{{- ', ' if not loop.last else '' -}}{%- endfor -%}{{- ']\n\nIf you decide to call any tool(s), use the following format:\n[{{\"name\": \"tool_name1\", \"arguments\": \"tool_args1\"}}, {{\"name\": \"tool_name2\", \"arguments\": \"tool_args2\"}}]\n\nResponse from tool(s) will be returned in this format:\n[{{\"response\": \"tool_response1\"}}, {{\"response\": \"tool_response2\"}}]\n\nBased on the results returned by the tool(s), you can call additional tools if needed, correct tool calls if any errors are found, or just respond with the answer to the user.' -}}{%- endif -%}{{- eot -}}{%- for message in messages -%}{%- if message.role == user_token -%}{{- start_header ~ user_token ~ end_header -}}{{ message.content -}}{{ eot -}}{%- elif message.role == assistant_token -%}{%- if '' in message.content -%}{%- set content = message.content.split('')[-1].lstrip() -%}{%- else -%}{%- set content = message.content -%}{%- endif -%}{{- start_header ~ assistant_token ~ end_header -}}{{ content -}}{%- if message.tool_calls -%}{{- '[' -}}{%- for call in message.tool_calls -%}{%- set fn = call.function if call.function is defined else call -%}{{- '{\"name\": \"' + fn.name + '\", \"arguments\": ' -}}{%- if fn.arguments is string -%}{{- fn.arguments -}}{%- else -%}{{- fn.arguments | tojson -}}{%- endif -%}{{- '}' + (', ' if not loop.last else '') -}}{%- endfor -%}{{- ']' -}}{%- endif -%}{{- eot -}}{%- elif message.role == tool_token -%}{%- if loop.first or (messages[loop.index0 - 1].role != tool_token) -%}{{- start_header ~ tool_token ~ end_header -}}{{ '[' -}}{%- endif -%}{{- message.content -}}{{- ', ' if not loop.last and (messages[loop.index0 + 1].role == tool_token) else '' -}}{%- if loop.last or (messages[loop.index0 + 1].role != tool_token) -%}{{- ']' -}}{{ eot -}}{%- endif -%}{%- endif -%}{%- endfor -%}{%- if add_generation_prompt -%}{{- start_header ~ assistant_token ~ end_header -}}{%- if not enable_thinking -%}{{- '\n\n\n\n' -}}{%- endif -%}{%- endif -%}", "context_parallel_size": 1, "dataloader_num_workers": 1, "dataloader_pin_memory": true, "dataloader_prefetch_factor": 256, "dataset_num_proc": 8, "datasets": [ { "chat_template": "tokenizer_default", "message_field_training": "train", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/GLiMA_Thinking", "roles_to_train": [], "train_on_eos": "turn", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/Gutenberg-SFT", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/Condor-SFT-Filtered", "split": "train[:250]", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/Ao3_Soft_Refusal", "trust_remote_code": false, "type": "chat_template" }, { "chat_template": "tokenizer_default", "message_property_mappings": { "content": "content", "role": "role" }, "path": "ConicCat/VSF", "trust_remote_code": false, "type": "chat_template" } ], "ddp": false, "device": "cuda:0", "device_map": "auto", "dion_rank_fraction": 1.0, "dion_rank_multiple_of": 1, "eaft_alpha": 1.0, "eaft_k": 20, "env_capabilities": { "torch_version": "2.9.1" }, "eval_batch_size": 1, "eval_causal_lm_metrics": [ "sacrebleu", "comet", "ter", "chrf" ], "eval_max_new_tokens": 128, "eval_sample_packing": true, "eval_table_size": 0, "experimental_skip_move_to_device": true, "flash_attention": false, "fp16": false, "generate_samples": false, "generation_do_sample": true, "generation_max_new_tokens": 50, "generation_prompt_ratio": 0.5, "generation_temperature": 0.7, "gradient_accumulation_steps": 16, "gradient_checkpointing": true, "gradient_checkpointing_kwargs": { "use_reentrant": true }, "include_tkps": true, "is_llama_derived_model": true, "layer_offloading": false, "learning_rate": 1.25e-05, "lisa_layers_attribute": "model.layers", "load_best_model_at_end": false, "load_in_4bit": false, "load_in_8bit": false, "local_rank": 0, "logging_steps": 1, "lora_alpha": 64, "lora_dropout": 0.0, "lora_mlp_kernel": false, "lora_o_kernel": false, "lora_qkv_kernel": false, "lora_r": 32, "lora_target_linear": true, "loraplus_lr_embedding": 1e-06, "loraplus_lr_ratio": 16.0, "lr_scheduler": "constant_with_warmup", "max_grad_norm": 1.0, "mean_resizing_embeddings": false, "merge_lora": true, "merge_method": "memory_efficient", "micro_batch_size": 1, "model_config_type": "nemotron-nas", "num_epochs": 3.0, "num_generation_samples": 3, "optimizer": "paged_adamw_8bit", "otel_metrics_host": "localhost", "otel_metrics_port": 8000, "output_dir": "./Writer-Stage-1", "pad_to_sequence_len": true, "pretrain_multipack_attn": true, "profiler_steps_start": 0, "qlora_sharded_model_loading": false, "quantize_moe_experts": false, "ray_num_workers": 1, "resources_per_worker": { "GPU": 1 }, "sample_packing": true, "sample_packing_bin_size": 200, "sample_packing_group_size": 100000, "save_only_model": false, "save_safetensors": true, "save_strategy": "no", "seed": 42, "sequence_len": 5120, "shuffle_before_merging_datasets": false, "shuffle_merged_datasets": true, "skip_prepare_dataset": false, "streaming_multipack_buffer_size": 10000, "strict": false, "tensor_parallel_size": 1, "tf32": true, "tiled_mlp_use_original_mlp": true, "tokenizer_config": "nvidia/Llama-3_3-Nemotron-Super-49B-v1_5", "tokenizer_save_jinja_files": true, "torch_dtype": "torch.bfloat16", "train_on_inputs": false, "trl": { "async_prefetch": false, "log_completions": false, "mask_truncated_completions": false, "ref_model_mixup_alpha": 0.9, "ref_model_sync_steps": 64, "replay_buffer_size": 0, "replay_recompute_logps": true, "reroll_max_groups": 1, "reroll_start_fraction": 1.0, "reward_num_workers": 1, "scale_rewards": true, "skip_zero_advantage_batches": true, "sync_ref_model": false, "use_data_producer": false, "use_vllm": false, "vllm_lora_sync": false, "vllm_server_host": "0.0.0.0", "vllm_server_port": 8000 }, "trust_remote_code": true, "use_otel_metrics": false, "use_ray": false, "use_tensorboard": true, "val_set_size": 0.0, "vllm": { "device": "auto", "dtype": "auto", "gpu_memory_utilization": 0.9, "host": "0.0.0.0", "port": 8000 }, "warmup_ratio": 0.05, "weight_decay": 0.0, "world_size": 1 } [2026-03-31 02:46:14,760] [DEBUG] [axolotl.cli.merge_lora.do_merge_lora:32] [PID:10906] Using memory-efficient LoRA merging method... [2026-03-31 02:46:14,760] [DEBUG] [axolotl.cli.merge_lora._do_merge_lora_efficient:79] [PID:10906] Using memory-efficient LoRA merging method... Downloading (incomplete total...): 0.00B [00:00, ?B/s] Fetching 47 files: 0%| | 0/47 [00:00 sys.exit(main()) ^^^^^^ File "/workspace/axolotl/src/axolotl/cli/main.py", line 347, in main cli() File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/click/core.py", line 1485, in __call__ return self.main(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/click/core.py", line 1406, in main rv = self.invoke(ctx) ^^^^^^^^^^^^^^^^ File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/click/core.py", line 1873, in invoke return _process_result(sub_ctx.command.invoke(sub_ctx)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/click/core.py", line 1269, in invoke return ctx.invoke(self.callback, **ctx.params) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/click/core.py", line 824, in invoke return callback(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^ File "/workspace/axolotl/src/axolotl/cli/utils/args.py", line 48, in wrapper return func(*args, **filtered_kwargs) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/workspace/axolotl/src/axolotl/cli/main.py", line 293, in merge_lora do_cli(config=config, **kwargs) File "/workspace/axolotl/src/axolotl/cli/merge_lora.py", line 169, in do_cli do_merge_lora(cfg=parsed_cfg) File "/workspace/axolotl/src/axolotl/telemetry/errors.py", line 127, in wrapper return func(*args, **kwargs) ^^^^^^^^^^^^^^^^^^^^^ File "/workspace/axolotl/src/axolotl/cli/merge_lora.py", line 33, in do_merge_lora _do_merge_lora_efficient(cfg=cfg) File "/workspace/axolotl/src/axolotl/cli/merge_lora.py", line 108, in _do_merge_lora_efficient merge_lora_sharded_efficient( File "/workspace/axolotl/src/axolotl/cli/utils/lora_merge.py", line 940, in merge_lora_sharded_efficient safetensors.torch.save_file( File "/root/miniconda3/envs/py3.11/lib/python3.11/site-packages/safetensors/torch.py", line 307, in save_file serialize_file(_flatten(tensors), filename, metadata=metadata) safetensors_rust.SafetensorError: Error while serializing: I/O error: No space left on device (os error 28)