[2026-04-09 03:30:40,680] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:293655] baseline 0.000GB ()
[2026-04-09 03:30:40,681] [INFO] [axolotl.cli.config.load_cfg:259] [PID:293655] config:
{
  "activation_offloading": false,
  "adapter": "lora",
  "axolotl_config_path": "./axolotl_configs/Qwen2.5-coder-7b-instruct/stage_1_2/lora-stage2-secure.yaml",
  "base_model": "Qwen/Qwen2.5-Coder-7B-Instruct",
  "base_model_config": "Qwen/Qwen2.5-Coder-7B-Instruct",
  "batch_size": 64,
  "bf16": true,
  "capabilities": {
    "bf16": true,
    "compute_capability": "sm_90",
    "fp8": true,
    "n_gpu": 4,
    "n_node": 1
  },
  "context_parallel_size": 1,
  "dataloader_num_workers": 4,
  "dataloader_pin_memory": true,
  "dataloader_prefetch_factor": 256,
  "dataset_num_proc": 112,
  "dataset_prepared_path": "/home/tkwang/scratch/SecSteer/axolotl-datasets/lora/Qwen2.5-Coder-7B/stage2-secure",
  "datasets": [
    {
      "chat_template": "tokenizer_default",
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "felixwangg/stage_2_secure",
      "split": "train",
      "trust_remote_code": false,
      "type": "chat_template"
    }
  ],
  "ddp": true,
  "device": "cuda:0",
  "device_map": {
    "": 0
  },
  "dion_rank_fraction": 1.0,
  "dion_rank_multiple_of": 1,
  "early_stopping_patience": 1000,
  "env_capabilities": {
    "torch_version": "2.10.0"
  },
  "eval_batch_size": 4,
  "eval_causal_lm_metrics": [
    "sacrebleu",
    "comet",
    "ter",
    "chrf"
  ],
  "eval_max_new_tokens": 128,
  "eval_sample_packing": false,
  "eval_steps": 15,
  "eval_table_size": 0,
  "experimental_skip_move_to_device": true,
  "flash_attention": true,
  "fp16": false,
  "gradient_accumulation_steps": 4,
  "gradient_checkpointing": true,
  "gradient_checkpointing_kwargs": {
    "use_reentrant": true
  },
  "include_tkps": true,
  "is_falcon_derived_model": false,
  "is_llama_derived_model": false,
  "is_mistral_derived_model": false,
  "learning_rate": 2e-05,
  "lisa_layers_attribute": "model.layers",
  "load_best_model_at_end": true,
  "load_in_4bit": false,
  "load_in_8bit": false,
  "local_rank": 0,
  "logging_steps": 1,
  "lora_alpha": 16,
  "lora_dropout": 0.05,
  "lora_model_dir": "/home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage1-combined/checkpoint-23",
  "lora_r": 16,
  "lora_target_linear": true,
  "loraplus_lr_embedding": 1e-06,
  "lr_scheduler": "cosine",
  "mean_resizing_embeddings": false,
  "merge_lora": true,
  "micro_batch_size": 4,
  "model_config_type": "qwen2",
  "num_epochs": 1.0,
  "optimizer": "adamw_torch",
  "otel_metrics_host": "localhost",
  "otel_metrics_port": 8000,
  "output_dir": "/home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure",
  "pad_to_sequence_len": true,
  "pretrain_multipack_attn": true,
  "profiler_steps_start": 0,
  "qlora_sharded_model_loading": false,
  "ray_num_workers": 1,
  "resources_per_worker": {
    "GPU": 1
  },
  "sample_packing": false,
  "sample_packing_bin_size": 200,
  "sample_packing_group_size": 100000,
  "save_only_model": false,
  "save_safetensors": true,
  "save_steps": 15,
  "save_total_limit": 1000,
  "sequence_len": 4096,
  "shuffle_before_merging_datasets": false,
  "shuffle_merged_datasets": true,
  "skip_prepare_dataset": false,
  "streaming_multipack_buffer_size": 10000,
  "strict": false,
  "tensor_parallel_size": 1,
  "test_datasets": [
    {
      "chat_template": "tokenizer_default",
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "felixwangg/stage_2_secure",
      "split": "validation",
      "trust_remote_code": false,
      "type": "chat_template"
    }
  ],
  "tf32": false,
  "tiled_mlp_use_original_mlp": true,
  "tokenizer_config": "Qwen/Qwen2.5-Coder-7B-Instruct",
  "tokenizer_save_jinja_files": true,
  "tokenizer_type": "AutoTokenizer",
  "torch_dtype": "torch.bfloat16",
  "train_on_inputs": false,
  "trl": {
    "log_completions": false,
    "mask_truncated_completions": false,
    "ref_model_mixup_alpha": 0.9,
    "ref_model_sync_steps": 64,
    "scale_rewards": true,
    "sync_ref_model": false,
    "use_vllm": false,
    "vllm_server_host": "0.0.0.0",
    "vllm_server_port": 8000
  },
  "type_of_model": "Qwen2ForCausalLM",
  "use_otel_metrics": false,
  "use_ray": false,
  "use_wandb": true,
  "val_set_size": 0.0,
  "vllm": {
    "device": "auto",
    "dtype": "auto",
    "gpu_memory_utilization": 0.9,
    "host": "0.0.0.0",
    "port": 8000
  },
  "wandb_entity": "wtkuan",
  "wandb_log_model": "false",
  "wandb_name": "Qwen2.5-Coder-7B-stage2-secure",
  "wandb_project": "sft-primevul-sweep-ctx-0",
  "wandb_watch": "false",
  "warmup_ratio": 0.1,
  "weight_decay": 0.02,
  "world_size": 4
}
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:293655] EOS: 151645 / <|im_end|>
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:293655] BOS: None / None
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:293655] PAD: 151643 / <|endoftext|>
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:293655] UNK: None / None
[2026-04-09 03:30:41,812] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:293655] Loading prepared dataset from disk at /home/tkwang/scratch/SecSteer/axolotl-datasets/lora/Qwen2.5-Coder-7B/stage2-secure/372a26636afc98b5e76ce25d2b26305b...
[2026-04-09 03:30:41,827] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:293655] Loading prepared dataset from disk at /home/tkwang/scratch/SecSteer/axolotl-datasets/lora/Qwen2.5-Coder-7B/stage2-secure/d6ee772141270dc60aba6ee42648aaa1...
[2026-04-09 03:30:42,214] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:417] [PID:293655] total_num_tokens: 3_095_863
[2026-04-09 03:30:42,684] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:435] [PID:293655] `total_supervised_tokens: 2_473_951`
[2026-04-09 03:30:42,684] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:533] [PID:293655] total_num_steps: 46
[2026-04-09 03:30:42,684] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:293655] Maximum number of steps set at 46
[2026-04-09 03:30:42,702] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:293655] loading tokenizer... Qwen/Qwen2.5-Coder-7B-Instruct
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:293655] EOS: 151645 / <|im_end|>
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:293655] BOS: None / None
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:293655] PAD: 151643 / <|endoftext|>
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:293655] UNK: None / None
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:293655] Loading model
[2026-04-09 03:30:43,316] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:293655] Patched Trainer.evaluation_loop with nanmean loss calculation
[2026-04-09 03:30:43,317] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:293655] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
Loading checkpoint shards:   0%|                                                                                    | 0/4 [00:00<?, ?it/s]Loading checkpoint shards:  25%|███████████████████                                                         | 1/4 [00:00<00:02,  1.01it/s]Loading checkpoint shards:  50%|██████████████████████████████████████                                      | 2/4 [00:02<00:02,  1.06s/it]Loading checkpoint shards:  75%|█████████████████████████████████████████████████████████                   | 3/4 [00:03<00:01,  1.11s/it]Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00,  1.27it/s]Loading checkpoint shards: 100%|████████████████████████████████████████████████████████████████████████████| 4/4 [00:03<00:00,  1.12it/s]
[2026-04-09 03:30:48,236] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:347] [PID:293655] Converting modules to torch.bfloat16
[2026-04-09 03:30:48,238] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:293655] Memory usage after model load 17.233GB (+17.233GB allocated, +18.252GB reserved)
[2026-04-09 03:30:48,238] [INFO] [axolotl.loaders.adapter.load_lora:81] [PID:293655] found linear modules: ['down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj']
[2026-04-09 03:30:48,239] [DEBUG] [axolotl.loaders.adapter.load_lora:150] [PID:293655] Loading pretrained PEFT - LoRA
trainable params: 40,370,176 || all params: 7,655,986,688 || trainable%: 0.5273
[2026-04-09 03:30:48,731] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:293655] after adapters 14.487GB (+14.487GB allocated, +18.365GB reserved)
[2026-04-09 03:30:53,769] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/trl/extras/vllm_client.py:37: UserWarning: TRL currently supports vLLM versions: 0.10.2, 0.11.0, 0.11.1, 0.11.2, 0.12.0. You have version 0.18.0 installed. We recommend installing a supported version to avoid compatibility issues.
  if is_vllm_available():

[2026-04-09 03:30:54,989] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/trl/trainer/grpo_trainer.py:105: UserWarning: TRL currently supports vLLM versions: 0.10.2, 0.11.0, 0.11.1, 0.11.2, 0.12.0. You have version 0.18.0 installed. We recommend installing a supported version to avoid compatibility issues.
  if is_vllm_available():

[2026-04-09 03:31:07,404] [INFO] [axolotl.train.save_initial_configs:413] [PID:293655] Pre-saving adapter config to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure...
[2026-04-09 03:31:07,690] [INFO] [axolotl.train.save_initial_configs:417] [PID:293655] Pre-saving tokenizer to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure...
[2026-04-09 03:31:08,990] [INFO] [axolotl.train.save_initial_configs:422] [PID:293655] Pre-saving model config to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure...
[2026-04-09 03:31:09,015] [INFO] [axolotl.train.execute_training:212] [PID:293655] Starting trainer...
[34m[1mwandb[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /home/tkwang/.netrc.
[34m[1mwandb[0m: Currently logged in as: [33mwtkuan[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: [38;5;178m⢿[0m Waiting for wandb.init()...
[Am[2K[34m[1mwandb[0m: [38;5;178m⣻[0m Waiting for wandb.init()...
[Am[2K[34m[1mwandb[0m: [38;5;178m⣽[0m Waiting for wandb.init()...
[Am[2K[34m[1mwandb[0m: [38;5;178m⣾[0m Waiting for wandb.init()...
[Am[2K[34m[1mwandb[0m: [38;5;178m⣷[0m setting up run ba5u2wmv (0.5s)
[Am[2K[34m[1mwandb[0m: [38;5;178m⣯[0m setting up run ba5u2wmv (0.5s)
[Am[2K[34m[1mwandb[0m: Tracking run with wandb version 0.24.0
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/scratch/tkwang/SecSteer/wandb/run-20260409_033112-ba5u2wmv[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mQwen2.5-Coder-7B-stage2-secure[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/wtkuan/sft-primevul-sweep-ctx-0[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/wtkuan/sft-primevul-sweep-ctx-0/runs/ba5u2wmv[0m
[34m[1mwandb[0m: Detected [huggingface_hub.inference, mcp, openai] in use.
[34m[1mwandb[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
[34m[1mwandb[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/
[34m[1mwandb[0m: [33mWARNING[0m Saving files without folders. If you want to preserve subdirectories pass base_path to wandb.save, i.e. wandb.save("/mnt/folder/file.h5", base_path="/mnt")
[34m[1mwandb[0m: [33mWARNING[0m Symlinked 1 file into the W&B run directory; call wandb.save again to sync new files.
[2026-04-09 03:31:17,858] [INFO] [axolotl.utils.callbacks.on_train_begin:757] [PID:293655] The Axolotl config has been saved to the WandB run under files.
  0%|                                                                                                              | 0/46 [00:00<?, ?it/s][2026-04-09 03:31:17,863] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...

  0%|                                                                                                              | 0/20 [00:00<?, ?it/s][A
 10%|██████████▏                                                                                           | 2/20 [00:00<00:05,  3.43it/s][A
 15%|███████████████▎                                                                                      | 3/20 [00:01<00:07,  2.28it/s][A
 20%|████████████████████▍                                                                                 | 4/20 [00:01<00:08,  1.94it/s][A
 25%|█████████████████████████▌                                                                            | 5/20 [00:02<00:08,  1.77it/s][A
 30%|██████████████████████████████▌                                                                       | 6/20 [00:03<00:08,  1.66it/s][A
 35%|███████████████████████████████████▋                                                                  | 7/20 [00:03<00:07,  1.63it/s][A
 40%|████████████████████████████████████████▊                                                             | 8/20 [00:04<00:07,  1.62it/s][A
 45%|█████████████████████████████████████████████▉                                                        | 9/20 [00:05<00:07,  1.55it/s][A
 50%|██████████████████████████████████████████████████▌                                                  | 10/20 [00:05<00:06,  1.54it/s][A
 55%|███████████████████████████████████████████████████████▌                                             | 11/20 [00:06<00:05,  1.54it/s][A
 60%|████████████████████████████████████████████████████████████▌                                        | 12/20 [00:07<00:05,  1.55it/s][A
 65%|█████████████████████████████████████████████████████████████████▋                                   | 13/20 [00:07<00:04,  1.51it/s][A
 70%|██████████████████████████████████████████████████████████████████████▋                              | 14/20 [00:08<00:03,  1.52it/s][A
 75%|███████████████████████████████████████████████████████████████████████████▊                         | 15/20 [00:09<00:03,  1.53it/s][A
 80%|████████████████████████████████████████████████████████████████████████████████▊                    | 16/20 [00:09<00:02,  1.54it/s][A
 85%|█████████████████████████████████████████████████████████████████████████████████████▊               | 17/20 [00:10<00:02,  1.50it/s][A
 90%|██████████████████████████████████████████████████████████████████████████████████████████▉          | 18/20 [00:11<00:01,  1.52it/s][A
 95%|███████████████████████████████████████████████████████████████████████████████████████████████▉     | 19/20 [00:11<00:00,  1.53it/s][ATraceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-du5o9zz7'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-a37y7u4h'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-c71mro43'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-f1vbkqeq'

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.44it/s][A                                                                                                                                          
                                                                                                                                          [A{'eval_loss': 0.7821827530860901, 'eval_runtime': 14.1138, 'eval_samples_per_second': 22.602, 'eval_steps_per_second': 1.417, 'eval_ppl': 2.18624, 'memory/max_active (GiB)': 37.91, 'memory/max_allocated (GiB)': 37.91, 'memory/device_reserved (GiB)': 41.87, 'epoch': 0}
  0%|                                                                                                              | 0/46 [00:14<?, ?it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.44it/s][A
                                                                                                                                          [A  2%|██▏                                                                                                   | 1/46 [00:23<17:42, 23.61s/it]                                                                                                                                          {'loss': 0.8096, 'grad_norm': 0.0860491394996643, 'learning_rate': 0.0, 'ppl': 2.24701, 'memory/max_active (GiB)': 45.83, 'memory/max_allocated (GiB)': 45.83, 'memory/device_reserved (GiB)': 55.35, 'tokens/train_per_sec_per_gpu': 1219.3382568359375, 'tokens/total': 262144, 'tokens/trainable': 47417, 'epoch': 0.02}
  2%|██▏                                                                                                   | 1/46 [00:23<17:42, 23.61s/it]  4%|████▍                                                                                                 | 2/46 [00:32<10:55, 14.90s/it]                                                                                                                                          {'loss': 0.7548, 'grad_norm': 0.07023165374994278, 'learning_rate': 5e-06, 'ppl': 2.12719, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.37, 'tokens/train_per_sec_per_gpu': 883.8273315429688, 'tokens/total': 524288, 'tokens/trainable': 104680, 'epoch': 0.04}
  4%|████▍                                                                                                 | 2/46 [00:32<10:55, 14.90s/it]  7%|██████▋                                                                                               | 3/46 [00:41<08:40, 12.11s/it]                                                                                                                                          {'loss': 0.7674, 'grad_norm': 0.07232996821403503, 'learning_rate': 1e-05, 'ppl': 2.15416, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.37, 'tokens/train_per_sec_per_gpu': 1246.1279296875, 'tokens/total': 786432, 'tokens/trainable': 154792, 'epoch': 0.07}
  7%|██████▋                                                                                               | 3/46 [00:41<08:40, 12.11s/it]  9%|████████▊                                                                                             | 4/46 [00:50<07:35, 10.85s/it]                                                                                                                                          {'loss': 0.8402, 'grad_norm': 0.0671195313334465, 'learning_rate': 1.5000000000000002e-05, 'ppl': 2.31683, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1250.1392822265625, 'tokens/total': 1048576, 'tokens/trainable': 217458, 'epoch': 0.09}
  9%|████████▊                                                                                             | 4/46 [00:50<07:35, 10.85s/it] 11%|███████████                                                                                           | 5/46 [00:58<06:54, 10.11s/it]                                                                                                                                          {'loss': 0.8154, 'grad_norm': 0.06423480808734894, 'learning_rate': 2e-05, 'ppl': 2.26008, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1093.50244140625, 'tokens/total': 1310720, 'tokens/trainable': 279817, 'epoch': 0.11}
 11%|███████████                                                                                           | 5/46 [00:58<06:54, 10.11s/it] 13%|█████████████▎                                                                                        | 6/46 [01:07<06:27,  9.68s/it]                                                                                                                                          {'loss': 0.7097, 'grad_norm': 0.06509919464588165, 'learning_rate': 1.9972037971811802e-05, 'ppl': 2.03338, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1743.6982421875, 'tokens/total': 1572864, 'tokens/trainable': 337333, 'epoch': 0.13}
 13%|█████████████▎                                                                                        | 6/46 [01:07<06:27,  9.68s/it] 15%|███████████████▌                                                                                      | 7/46 [01:16<06:05,  9.36s/it]                                                                                                                                          {'loss': 0.7733, 'grad_norm': 0.077994205057621, 'learning_rate': 1.9888308262251286e-05, 'ppl': 2.16691, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1229.959228515625, 'tokens/total': 1835008, 'tokens/trainable': 377704, 'epoch': 0.15}
 15%|███████████████▌                                                                                      | 7/46 [01:16<06:05,  9.36s/it] 17%|█████████████████▋                                                                                    | 8/46 [01:25<05:48,  9.18s/it]                                                                                                                                          {'loss': 0.7392, 'grad_norm': 0.06839064508676529, 'learning_rate': 1.9749279121818235e-05, 'ppl': 2.09426, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2193.31396484375, 'tokens/total': 2097152, 'tokens/trainable': 426032, 'epoch': 0.18}
 17%|█████████████████▋                                                                                    | 8/46 [01:25<05:48,  9.18s/it] 20%|███████████████████▉                                                                                  | 9/46 [01:34<05:35,  9.08s/it]                                                                                                                                          {'loss': 0.7935, 'grad_norm': 0.058815669268369675, 'learning_rate': 1.955572805786141e-05, 'ppl': 2.21112, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2324.8984375, 'tokens/total': 2359296, 'tokens/trainable': 488622, 'epoch': 0.2}
 20%|███████████████████▉                                                                                  | 9/46 [01:34<05:35,  9.08s/it] 22%|█████████████████████▉                                                                               | 10/46 [01:42<05:24,  9.02s/it]                                                                                                                                          {'loss': 0.6788, 'grad_norm': 0.05653702840209007, 'learning_rate': 1.9308737486442045e-05, 'ppl': 1.97151, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1683.135986328125, 'tokens/total': 2621440, 'tokens/trainable': 549635, 'epoch': 0.22}
 22%|█████████████████████▉                                                                               | 10/46 [01:42<05:24,  9.02s/it] 24%|████████████████████████▏                                                                            | 11/46 [01:51<05:12,  8.94s/it]                                                                                                                                          {'loss': 0.7701, 'grad_norm': 0.06375502794981003, 'learning_rate': 1.900968867902419e-05, 'ppl': 2.15998, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 967.6227416992188, 'tokens/total': 2883584, 'tokens/trainable': 599165, 'epoch': 0.24}
 24%|████████████████████████▏                                                                            | 11/46 [01:51<05:12,  8.94s/it] 26%|██████████████████████████▎                                                                          | 12/46 [02:00<05:03,  8.92s/it]                                                                                                                                          {'loss': 0.7255, 'grad_norm': 0.05587285757064819, 'learning_rate': 1.866025403784439e-05, 'ppl': 2.06576, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1434.523681640625, 'tokens/total': 3145728, 'tokens/trainable': 660574, 'epoch': 0.27}
 26%|██████████████████████████▎                                                                          | 12/46 [02:00<05:03,  8.92s/it] 28%|████████████████████████████▌                                                                        | 13/46 [02:09<04:53,  8.90s/it]                                                                                                                                          {'loss': 0.725, 'grad_norm': 0.05221521481871605, 'learning_rate': 1.826238774315995e-05, 'ppl': 2.06473, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2159.218994140625, 'tokens/total': 3407872, 'tokens/trainable': 720484, 'epoch': 0.29}
 28%|████████████████████████████▌                                                                        | 13/46 [02:09<04:53,  8.90s/it] 30%|██████████████████████████████▋                                                                      | 14/46 [02:18<04:43,  8.86s/it]                                                                                                                                          {'loss': 0.7317, 'grad_norm': 0.05614304915070534, 'learning_rate': 1.78183148246803e-05, 'ppl': 2.07861, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2078.3359375, 'tokens/total': 3670016, 'tokens/trainable': 777518, 'epoch': 0.31}
 30%|██████████████████████████████▋                                                                      | 14/46 [02:18<04:43,  8.86s/it] 33%|████████████████████████████████▉                                                                    | 15/46 [02:27<04:33,  8.83s/it]                                                                                                                                          {'loss': 0.7657, 'grad_norm': 0.05838664621114731, 'learning_rate': 1.7330518718298263e-05, 'ppl': 2.1505, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1735.1202392578125, 'tokens/total': 3932160, 'tokens/trainable': 827785, 'epoch': 0.33}
 33%|████████████████████████████████▉                                                                    | 15/46 [02:27<04:33,  8.83s/it][2026-04-09 03:33:44,880] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...

  0%|                                                                                                              | 0/20 [00:00<?, ?it/s][A
 10%|██████████▏                                                                                           | 2/20 [00:00<00:05,  3.11it/s][A
 15%|███████████████▎                                                                                      | 3/20 [00:01<00:07,  2.19it/s][A
 20%|████████████████████▍                                                                                 | 4/20 [00:01<00:08,  1.89it/s][A
 25%|█████████████████████████▌                                                                            | 5/20 [00:02<00:08,  1.68it/s][A
 30%|██████████████████████████████▌                                                                       | 6/20 [00:03<00:08,  1.63it/s][A
 35%|███████████████████████████████████▋                                                                  | 7/20 [00:03<00:08,  1.61it/s][A
 40%|████████████████████████████████████████▊                                                             | 8/20 [00:04<00:07,  1.60it/s][A
 45%|█████████████████████████████████████████████▉                                                        | 9/20 [00:05<00:07,  1.55it/s][A
 50%|██████████████████████████████████████████████████▌                                                  | 10/20 [00:05<00:06,  1.55it/s][A
 55%|███████████████████████████████████████████████████████▌                                             | 11/20 [00:06<00:05,  1.55it/s][A
 60%|████████████████████████████████████████████████████████████▌                                        | 12/20 [00:07<00:05,  1.55it/s][A
 65%|█████████████████████████████████████████████████████████████████▋                                   | 13/20 [00:07<00:04,  1.51it/s][A
 70%|██████████████████████████████████████████████████████████████████████▋                              | 14/20 [00:08<00:03,  1.53it/s][A
 75%|███████████████████████████████████████████████████████████████████████████▊                         | 15/20 [00:09<00:03,  1.53it/s][A
 80%|████████████████████████████████████████████████████████████████████████████████▊                    | 16/20 [00:09<00:02,  1.54it/s][A
 85%|█████████████████████████████████████████████████████████████████████████████████████▊               | 17/20 [00:10<00:01,  1.50it/s][A
 90%|██████████████████████████████████████████████████████████████████████████████████████████▉          | 18/20 [00:11<00:01,  1.52it/s][A
 95%|███████████████████████████████████████████████████████████████████████████████████████████████▉     | 19/20 [00:11<00:00,  1.53it/s][ATraceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-drksda8g'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-y7if98gw'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-ua1jgn56'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-8mt89jhn'

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.48it/s][A                                                                                                                                          
                                                                                                                                          [A{'eval_loss': 0.7501720190048218, 'eval_runtime': 13.4864, 'eval_samples_per_second': 23.653, 'eval_steps_per_second': 1.483, 'eval_ppl': 2.11736, 'memory/max_active (GiB)': 38.25, 'memory/max_allocated (GiB)': 38.25, 'memory/device_reserved (GiB)': 55.38, 'epoch': 0.33, 'tokens/train_per_sec_per_gpu': 0.0}
 33%|████████████████████████████████▉                                                                    | 15/46 [02:40<04:33,  8.83s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.48it/s][A
                                                                                                                                          [A[2026-04-09 03:33:58,491] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-15
[2026-04-09 03:33:59,958] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
  return func(*args, **kwargs)

 35%|███████████████████████████████████▏                                                                 | 16/46 [02:51<06:50, 13.67s/it]                                                                                                                                          {'loss': 0.804, 'grad_norm': 0.05892722308635712, 'learning_rate': 1.6801727377709195e-05, 'ppl': 2.23446, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 819.9111328125, 'tokens/total': 4194304, 'tokens/trainable': 869822, 'epoch': 0.35}
 35%|███████████████████████████████████▏                                                                 | 16/46 [02:51<06:50, 13.67s/it] 37%|█████████████████████████████████████▎                                                               | 17/46 [03:00<05:53, 12.20s/it]                                                                                                                                          {'loss': 0.7176, 'grad_norm': 0.05681360885500908, 'learning_rate': 1.6234898018587336e-05, 'ppl': 2.04951, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1584.865966796875, 'tokens/total': 4456448, 'tokens/trainable': 921366, 'epoch': 0.38}
 37%|█████████████████████████████████████▎                                                               | 17/46 [03:00<05:53, 12.20s/it] 39%|███████████████████████████████████████▌                                                             | 18/46 [03:09<05:12, 11.16s/it]                                                                                                                                          {'loss': 0.7473, 'grad_norm': 0.05141684040427208, 'learning_rate': 1.563320058063622e-05, 'ppl': 2.11129, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1832.5343017578125, 'tokens/total': 4718592, 'tokens/trainable': 972442, 'epoch': 0.4}
 39%|███████████████████████████████████████▌                                                             | 18/46 [03:09<05:12, 11.16s/it] 41%|█████████████████████████████████████████▋                                                           | 19/46 [03:18<04:41, 10.44s/it]                                                                                                                                          {'loss': 0.7396, 'grad_norm': 0.05582691356539726, 'learning_rate': 1.5000000000000002e-05, 'ppl': 2.0951, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1077.2296142578125, 'tokens/total': 4980736, 'tokens/trainable': 1025341, 'epoch': 0.42}
 41%|█████████████████████████████████████████▋                                                           | 19/46 [03:18<04:41, 10.44s/it] 43%|███████████████████████████████████████████▉                                                         | 20/46 [03:27<04:18,  9.96s/it]                                                                                                                                          {'loss': 0.7589, 'grad_norm': 0.048064205795526505, 'learning_rate': 1.4338837391175582e-05, 'ppl': 2.13593, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1663.6461181640625, 'tokens/total': 5242880, 'tokens/trainable': 1077984, 'epoch': 0.44}
 43%|███████████████████████████████████████████▉                                                         | 20/46 [03:27<04:18,  9.96s/it] 46%|██████████████████████████████████████████████                                                       | 21/46 [03:35<04:00,  9.62s/it]                                                                                                                                          {'loss': 0.7195, 'grad_norm': 0.0480501726269722, 'learning_rate': 1.3653410243663953e-05, 'ppl': 2.05341, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1762.2572021484375, 'tokens/total': 5505024, 'tokens/trainable': 1129840, 'epoch': 0.46}
 46%|██████████████████████████████████████████████                                                       | 21/46 [03:35<04:00,  9.62s/it] 48%|████████████████████████████████████████████████▎                                                    | 22/46 [03:44<03:45,  9.39s/it]                                                                                                                                          {'loss': 0.6898, 'grad_norm': 0.045008592307567596, 'learning_rate': 1.2947551744109044e-05, 'ppl': 1.99332, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1673.069580078125, 'tokens/total': 5767168, 'tokens/trainable': 1189110, 'epoch': 0.49}
 48%|████████████████████████████████████████████████▎                                                    | 22/46 [03:44<03:45,  9.39s/it] 50%|██████████████████████████████████████████████████▌                                                  | 23/46 [03:53<03:31,  9.20s/it]                                                                                                                                          {'loss': 0.776, 'grad_norm': 0.05052850767970085, 'learning_rate': 1.2225209339563144e-05, 'ppl': 2.17276, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1745.7901611328125, 'tokens/total': 6029312, 'tokens/trainable': 1245078, 'epoch': 0.51}
 50%|██████████████████████████████████████████████████▌                                                  | 23/46 [03:53<03:31,  9.20s/it] 52%|████████████████████████████████████████████████████▋                                                | 24/46 [04:02<03:20,  9.12s/it]                                                                                                                                          {'loss': 0.7013, 'grad_norm': 0.04177302494645119, 'learning_rate': 1.1490422661761744e-05, 'ppl': 2.01637, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1498.635986328125, 'tokens/total': 6291456, 'tokens/trainable': 1308912, 'epoch': 0.53}
 52%|████████████████████████████████████████████████████▋                                                | 24/46 [04:02<03:20,  9.12s/it] 54%|██████████████████████████████████████████████████████▉                                              | 25/46 [04:11<03:09,  9.03s/it]                                                                                                                                          {'loss': 0.7452, 'grad_norm': 0.04492776095867157, 'learning_rate': 1.0747300935864245e-05, 'ppl': 2.10686, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1319.31494140625, 'tokens/total': 6553600, 'tokens/trainable': 1364122, 'epoch': 0.55}
 54%|██████████████████████████████████████████████████████▉                                              | 25/46 [04:11<03:09,  9.03s/it] 57%|█████████████████████████████████████████████████████████                                            | 26/46 [04:20<02:59,  8.96s/it]                                                                                                                                          {'loss': 0.7507, 'grad_norm': 0.04114656522870064, 'learning_rate': 1e-05, 'ppl': 2.11848, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1585.4486083984375, 'tokens/total': 6815744, 'tokens/trainable': 1418567, 'epoch': 0.57}
 57%|█████████████████████████████████████████████████████████                                            | 26/46 [04:20<02:59,  8.96s/it] 59%|███████████████████████████████████████████████████████████▎                                         | 27/46 [04:28<02:49,  8.90s/it]                                                                                                                                          {'loss': 0.7872, 'grad_norm': 0.04480674862861633, 'learning_rate': 9.252699064135759e-06, 'ppl': 2.19724, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1023.1653442382812, 'tokens/total': 7077888, 'tokens/trainable': 1472790, 'epoch': 0.6}
 59%|███████████████████████████████████████████████████████████▎                                         | 27/46 [04:28<02:49,  8.90s/it] 61%|█████████████████████████████████████████████████████████████▍                                       | 28/46 [04:37<02:39,  8.84s/it]                                                                                                                                          {'loss': 0.7238, 'grad_norm': 0.04875003173947334, 'learning_rate': 8.509577338238255e-06, 'ppl': 2.06225, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1241.5045166015625, 'tokens/total': 7340032, 'tokens/trainable': 1514813, 'epoch': 0.62}
 61%|█████████████████████████████████████████████████████████████▍                                       | 28/46 [04:37<02:39,  8.84s/it] 63%|███████████████████████████████████████████████████████████████▋                                     | 29/46 [04:46<02:30,  8.82s/it]                                                                                                                                          {'loss': 0.7143, 'grad_norm': 0.044282350689172745, 'learning_rate': 7.774790660436857e-06, 'ppl': 2.04276, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1276.04638671875, 'tokens/total': 7602176, 'tokens/trainable': 1560152, 'epoch': 0.64}
 63%|███████████████████████████████████████████████████████████████▋                                     | 29/46 [04:46<02:30,  8.82s/it] 65%|█████████████████████████████████████████████████████████████████▊                                   | 30/46 [04:55<02:21,  8.82s/it]                                                                                                                                          {'loss': 0.7404, 'grad_norm': 0.04024430364370346, 'learning_rate': 7.052448255890958e-06, 'ppl': 2.09677, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1432.028564453125, 'tokens/total': 7864320, 'tokens/trainable': 1615087, 'epoch': 0.66}
 65%|█████████████████████████████████████████████████████████████████▊                                   | 30/46 [04:55<02:21,  8.82s/it][2026-04-09 03:36:12,977] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...

  0%|                                                                                                              | 0/20 [00:00<?, ?it/s][A
 10%|██████████▏                                                                                           | 2/20 [00:00<00:05,  3.12it/s][A
 15%|███████████████▎                                                                                      | 3/20 [00:01<00:07,  2.19it/s][A
 20%|████████████████████▍                                                                                 | 4/20 [00:01<00:08,  1.89it/s][A
 25%|█████████████████████████▌                                                                            | 5/20 [00:02<00:08,  1.72it/s][A
 30%|██████████████████████████████▌                                                                       | 6/20 [00:03<00:08,  1.62it/s][A
 35%|███████████████████████████████████▋                                                                  | 7/20 [00:03<00:08,  1.60it/s][A
 40%|████████████████████████████████████████▊                                                             | 8/20 [00:04<00:07,  1.59it/s][A
 45%|█████████████████████████████████████████████▉                                                        | 9/20 [00:05<00:07,  1.56it/s][A
 50%|██████████████████████████████████████████████████▌                                                  | 10/20 [00:05<00:06,  1.54it/s][A
 55%|███████████████████████████████████████████████████████▌                                             | 11/20 [00:06<00:05,  1.54it/s][A
 60%|████████████████████████████████████████████████████████████▌                                        | 12/20 [00:07<00:05,  1.54it/s][A
 65%|█████████████████████████████████████████████████████████████████▋                                   | 13/20 [00:07<00:04,  1.51it/s][A
 70%|██████████████████████████████████████████████████████████████████████▋                              | 14/20 [00:08<00:03,  1.52it/s][A
 75%|███████████████████████████████████████████████████████████████████████████▊                         | 15/20 [00:09<00:03,  1.52it/s][A
 80%|████████████████████████████████████████████████████████████████████████████████▊                    | 16/20 [00:09<00:02,  1.54it/s][A
 85%|█████████████████████████████████████████████████████████████████████████████████████▊               | 17/20 [00:10<00:01,  1.50it/s][A
 90%|██████████████████████████████████████████████████████████████████████████████████████████▉          | 18/20 [00:11<00:01,  1.52it/s][A
 95%|███████████████████████████████████████████████████████████████████████████████████████████████▉     | 19/20 [00:11<00:00,  1.53it/s][ATraceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-io0kbzgt'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-d5dg0bpp'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-k3iy6laz'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-4b0lhhv3'

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.50it/s][A                                                                                                                                          
                                                                                                                                          [A{'eval_loss': 0.7381538152694702, 'eval_runtime': 13.7476, 'eval_samples_per_second': 23.204, 'eval_steps_per_second': 1.455, 'eval_ppl': 2.09207, 'memory/max_active (GiB)': 38.25, 'memory/max_allocated (GiB)': 38.25, 'memory/device_reserved (GiB)': 56.53, 'epoch': 0.66, 'tokens/train_per_sec_per_gpu': 0.0}
 65%|█████████████████████████████████████████████████████████████████▊                                   | 30/46 [05:08<02:21,  8.82s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.50it/s][A
                                                                                                                                          [A[2026-04-09 03:36:26,806] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-30
[2026-04-09 03:36:27,759] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
  return func(*args, **kwargs)

 67%|████████████████████████████████████████████████████████████████████                                 | 31/46 [05:20<03:27, 13.82s/it]                                                                                                                                          {'loss': 0.6766, 'grad_norm': 0.04066118597984314, 'learning_rate': 6.34658975633605e-06, 'ppl': 1.96718, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1761.022216796875, 'tokens/total': 8126464, 'tokens/trainable': 1665278, 'epoch': 0.69}
 67%|████████████████████████████████████████████████████████████████████                                 | 31/46 [05:20<03:27, 13.82s/it] 70%|██████████████████████████████████████████████████████████████████████▎                              | 32/46 [05:29<02:52, 12.29s/it]                                                                                                                                          {'loss': 0.6863, 'grad_norm': 0.046845149248838425, 'learning_rate': 5.66116260882442e-06, 'ppl': 1.98635, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1473.9476318359375, 'tokens/total': 8388608, 'tokens/trainable': 1715159, 'epoch': 0.71}
 70%|██████████████████████████████████████████████████████████████████████▎                              | 32/46 [05:29<02:52, 12.29s/it] 72%|████████████████████████████████████████████████████████████████████████▍                            | 33/46 [05:38<02:26, 11.25s/it]                                                                                                                                          {'loss': 0.72, 'grad_norm': 0.038377199321985245, 'learning_rate': 5.000000000000003e-06, 'ppl': 2.05443, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2030.8389892578125, 'tokens/total': 8650752, 'tokens/trainable': 1778054, 'epoch': 0.73}
 72%|████████████████████████████████████████████████████████████████████████▍                            | 33/46 [05:38<02:26, 11.25s/it] 74%|██████████████████████████████████████████████████████████████████████████▋                          | 34/46 [05:46<02:06, 10.53s/it]                                                                                                                                          {'loss': 0.706, 'grad_norm': 0.040965937077999115, 'learning_rate': 4.3667994193637794e-06, 'ppl': 2.02587, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2132.74609375, 'tokens/total': 8912896, 'tokens/trainable': 1836458, 'epoch': 0.75}
 74%|██████████████████████████████████████████████████████████████████████████▋                          | 34/46 [05:46<02:06, 10.53s/it] 76%|████████████████████████████████████████████████████████████████████████████▊                        | 35/46 [05:55<01:50, 10.00s/it]                                                                                                                                          {'loss': 0.7886, 'grad_norm': 0.038428883999586105, 'learning_rate': 3.7651019814126656e-06, 'ppl': 2.20031, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1427.942138671875, 'tokens/total': 9175040, 'tokens/trainable': 1889305, 'epoch': 0.77}
 76%|████████████████████████████████████████████████████████████████████████████▊                        | 35/46 [05:55<01:50, 10.00s/it] 78%|███████████████████████████████████████████████████████████████████████████████                      | 36/46 [06:04<01:36,  9.67s/it]                                                                                                                                          {'loss': 0.7555, 'grad_norm': 0.04247698187828064, 'learning_rate': 3.1982726222908046e-06, 'ppl': 2.12868, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1808.15478515625, 'tokens/total': 9437184, 'tokens/trainable': 1948552, 'epoch': 0.8}
 78%|███████████████████████████████████████████████████████████████████████████████                      | 36/46 [06:04<01:36,  9.67s/it] 80%|█████████████████████████████████████████████████████████████████████████████████▏                   | 37/46 [06:13<01:24,  9.40s/it]                                                                                                                                          {'loss': 0.6227, 'grad_norm': 0.0429508201777935, 'learning_rate': 2.669481281701739e-06, 'ppl': 1.86395, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1936.651123046875, 'tokens/total': 9699328, 'tokens/trainable': 2010307, 'epoch': 0.82}
 80%|█████████████████████████████████████████████████████████████████████████████████▏                   | 37/46 [06:13<01:24,  9.40s/it] 83%|███████████████████████████████████████████████████████████████████████████████████▍                 | 38/46 [06:22<01:13,  9.22s/it]                                                                                                                                          {'loss': 0.6908, 'grad_norm': 0.041411444544792175, 'learning_rate': 2.1816851753197023e-06, 'ppl': 1.99531, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2212.40478515625, 'tokens/total': 9961472, 'tokens/trainable': 2068108, 'epoch': 0.84}
 83%|███████████████████████████████████████████████████████████████████████████████████▍                 | 38/46 [06:22<01:13,  9.22s/it] 85%|█████████████████████████████████████████████████████████████████████████████████████▋               | 39/46 [06:31<01:03,  9.12s/it]                                                                                                                                          {'loss': 0.7247, 'grad_norm': 0.04243037849664688, 'learning_rate': 1.7376122568400533e-06, 'ppl': 2.06411, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1549.3170166015625, 'tokens/total': 10223616, 'tokens/trainable': 2133908, 'epoch': 0.86}
 85%|█████████████████████████████████████████████████████████████████████████████████████▋               | 39/46 [06:31<01:03,  9.12s/it] 87%|███████████████████████████████████████████████████████████████████████████████████████▊             | 40/46 [06:39<00:54,  9.03s/it]                                                                                                                                          {'loss': 0.7514, 'grad_norm': 0.0426081083714962, 'learning_rate': 1.339745962155613e-06, 'ppl': 2.11997, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1715.3697509765625, 'tokens/total': 10485760, 'tokens/trainable': 2194277, 'epoch': 0.88}
 87%|███████████████████████████████████████████████████████████████████████████████████████▊             | 40/46 [06:39<00:54,  9.03s/it] 89%|██████████████████████████████████████████████████████████████████████████████████████████           | 41/46 [06:48<00:44,  8.95s/it]                                                                                                                                          {'loss': 0.725, 'grad_norm': 0.04286476597189903, 'learning_rate': 9.903113209758098e-07, 'ppl': 2.06473, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1690.9439697265625, 'tokens/total': 10747904, 'tokens/trainable': 2243987, 'epoch': 0.91}
 89%|██████████████████████████████████████████████████████████████████████████████████████████           | 41/46 [06:48<00:44,  8.95s/it] 91%|████████████████████████████████████████████████████████████████████████████████████████████▏        | 42/46 [06:57<00:35,  8.94s/it]                                                                                                                                          {'loss': 0.6866, 'grad_norm': 0.0390406996011734, 'learning_rate': 6.912625135579587e-07, 'ppl': 1.98695, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2454.6279296875, 'tokens/total': 11010048, 'tokens/trainable': 2313512, 'epoch': 0.93}
 91%|████████████████████████████████████████████████████████████████████████████████████████████▏        | 42/46 [06:57<00:35,  8.94s/it] 93%|██████████████████████████████████████████████████████████████████████████████████████████████▍      | 43/46 [07:06<00:26,  8.91s/it]                                                                                                                                          {'loss': 0.6431, 'grad_norm': 0.03946515917778015, 'learning_rate': 4.4427194213859216e-07, 'ppl': 1.90237, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1843.193115234375, 'tokens/total': 11272192, 'tokens/trainable': 2373378, 'epoch': 0.95}
 93%|██████████████████████████████████████████████████████████████████████████████████████████████▍      | 43/46 [07:06<00:26,  8.91s/it] 96%|████████████████████████████████████████████████████████████████████████████████████████████████▌    | 44/46 [07:15<00:17,  8.85s/it]                                                                                                                                          {'loss': 0.7175, 'grad_norm': 0.042728617787361145, 'learning_rate': 2.507208781817638e-07, 'ppl': 2.0493, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1632.2830810546875, 'tokens/total': 11534336, 'tokens/trainable': 2416789, 'epoch': 0.97}
 96%|████████████████████████████████████████████████████████████████████████████████████████████████▌    | 44/46 [07:15<00:17,  8.85s/it] 98%|██████████████████████████████████████████████████████████████████████████████████████████████████▊  | 45/46 [07:23<00:08,  8.83s/it]                                                                                                                                          {'loss': 0.769, 'grad_norm': 0.04248933494091034, 'learning_rate': 1.1169173774871478e-07, 'ppl': 2.15761, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1073.7342529296875, 'tokens/total': 11796480, 'tokens/trainable': 2467212, 'epoch': 0.99}
 98%|██████████████████████████████████████████████████████████████████████████████████████████████████▊  | 45/46 [07:23<00:08,  8.83s/it][2026-04-09 03:38:41,816] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...

  0%|                                                                                                              | 0/20 [00:00<?, ?it/s][A
 10%|██████████▏                                                                                           | 2/20 [00:00<00:07,  2.47it/s][A
 15%|███████████████▎                                                                                      | 3/20 [00:01<00:08,  1.98it/s][A
 20%|████████████████████▍                                                                                 | 4/20 [00:02<00:08,  1.79it/s][A
 25%|█████████████████████████▌                                                                            | 5/20 [00:02<00:09,  1.63it/s][A
 30%|██████████████████████████████▌                                                                       | 6/20 [00:03<00:08,  1.59it/s][A
 35%|███████████████████████████████████▋                                                                  | 7/20 [00:04<00:08,  1.59it/s][A
 40%|████████████████████████████████████████▊                                                             | 8/20 [00:04<00:07,  1.58it/s][A
 45%|█████████████████████████████████████████████▉                                                        | 9/20 [00:05<00:07,  1.53it/s][A
 50%|██████████████████████████████████████████████████▌                                                  | 10/20 [00:06<00:06,  1.53it/s][A
 55%|███████████████████████████████████████████████████████▌                                             | 11/20 [00:06<00:05,  1.53it/s][A
 60%|████████████████████████████████████████████████████████████▌                                        | 12/20 [00:07<00:05,  1.54it/s][A
 65%|█████████████████████████████████████████████████████████████████▋                                   | 13/20 [00:08<00:04,  1.52it/s][A
 70%|██████████████████████████████████████████████████████████████████████▋                              | 14/20 [00:08<00:03,  1.52it/s][A
 75%|███████████████████████████████████████████████████████████████████████████▊                         | 15/20 [00:09<00:03,  1.52it/s][A
 80%|████████████████████████████████████████████████████████████████████████████████▊                    | 16/20 [00:10<00:02,  1.54it/s][A
 85%|█████████████████████████████████████████████████████████████████████████████████████▊               | 17/20 [00:10<00:02,  1.50it/s][A
 90%|██████████████████████████████████████████████████████████████████████████████████████████▉          | 18/20 [00:11<00:01,  1.51it/s][A
 95%|███████████████████████████████████████████████████████████████████████████████████████████████▉     | 19/20 [00:12<00:00,  1.52it/s][ATraceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-r7b1eufv'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-dt7h9v18'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-56rdqy1n'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-eu7acacd'

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.46it/s][A                                                                                                                                          
                                                                                                                                          [A{'eval_loss': 0.7366353869438171, 'eval_runtime': 13.6924, 'eval_samples_per_second': 23.298, 'eval_steps_per_second': 1.461, 'eval_ppl': 2.0889, 'memory/max_active (GiB)': 38.25, 'memory/max_allocated (GiB)': 38.25, 'memory/device_reserved (GiB)': 56.53, 'epoch': 0.99, 'tokens/train_per_sec_per_gpu': 0.0}
 98%|██████████████████████████████████████████████████████████████████████████████████████████████████▊  | 45/46 [07:37<00:08,  8.83s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:12<00:00,  1.46it/s][A
                                                                                                                                          [A[2026-04-09 03:38:55,597] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-45
[2026-04-09 03:38:56,600] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
  return func(*args, **kwargs)

Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-ql8p5j0x'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-6rn3wsm8'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-yyyosw1q'
Traceback (most recent call last):
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
    finalizer()
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
    res = self._callback(*self._args, **self._kwargs)
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
    rmtree(tempdir, onerror=onerror)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
    _rmtree_safe_fd(stack, onexc)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
    onexc(func, path, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
    onexc(os.unlink, fullname, err)
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
    return onerror(func, path, exc_info)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
    os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-zvda8j_e'
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [07:42<00:00, 11.71s/it]                                                                                                                                          {'loss': 0.6996, 'grad_norm': 0.08553393185138702, 'learning_rate': 2.796202818819871e-08, 'ppl': 2.01295, 'memory/max_active (GiB)': 45.98, 'memory/max_allocated (GiB)': 45.98, 'memory/device_reserved (GiB)': 56.51, 'tokens/train_per_sec_per_gpu': 3303.278564453125, 'tokens/total': 11862016, 'tokens/trainable': 2474470, 'epoch': 1.0}
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [07:42<00:00, 11.71s/it][2026-04-09 03:39:00,327] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-46
[2026-04-09 03:39:01,510] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
  return func(*args, **kwargs)

                                                                                                                                          {'train_runtime': 471.4066, 'train_samples_per_second': 6.245, 'train_steps_per_second': 0.098, 'train_loss': 0.7364971559980641, 'memory/max_active (GiB)': 15.01, 'memory/max_allocated (GiB)': 15.01, 'memory/device_reserved (GiB)': 56.51, 'epoch': 1.0, 'tokens/train_per_sec_per_gpu': 0.0}
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [07:45<00:00, 11.71s/it]100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 46/46 [07:45<00:00, 10.11s/it]
[2026-04-09 03:39:03,193] [INFO] [axolotl.train.save_trained_model:233] [PID:293655] Training completed! Saving trained model to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure.
[2026-04-09 03:39:03,985] [INFO] [axolotl.train.save_trained_model:351] [PID:293655] Model successfully saved to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure