felixwangg's picture
Upload folder using huggingface_hub
0f0a349 verified
[2026-04-09 03:30:40,680] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:293655] baseline 0.000GB ()
[2026-04-09 03:30:40,681] [INFO] [axolotl.cli.config.load_cfg:259] [PID:293655] config:
{
"activation_offloading": false,
"adapter": "lora",
"axolotl_config_path": "./axolotl_configs/Qwen2.5-coder-7b-instruct/stage_1_2/lora-stage2-secure.yaml",
"base_model": "Qwen/Qwen2.5-Coder-7B-Instruct",
"base_model_config": "Qwen/Qwen2.5-Coder-7B-Instruct",
"batch_size": 64,
"bf16": true,
"capabilities": {
"bf16": true,
"compute_capability": "sm_90",
"fp8": true,
"n_gpu": 4,
"n_node": 1
},
"context_parallel_size": 1,
"dataloader_num_workers": 4,
"dataloader_pin_memory": true,
"dataloader_prefetch_factor": 256,
"dataset_num_proc": 112,
"dataset_prepared_path": "/home/tkwang/scratch/SecSteer/axolotl-datasets/lora/Qwen2.5-Coder-7B/stage2-secure",
"datasets": [
{
"chat_template": "tokenizer_default",
"message_property_mappings": {
"content": "content",
"role": "role"
},
"path": "felixwangg/stage_2_secure",
"split": "train",
"trust_remote_code": false,
"type": "chat_template"
}
],
"ddp": true,
"device": "cuda:0",
"device_map": {
"": 0
},
"dion_rank_fraction": 1.0,
"dion_rank_multiple_of": 1,
"early_stopping_patience": 1000,
"env_capabilities": {
"torch_version": "2.10.0"
},
"eval_batch_size": 4,
"eval_causal_lm_metrics": [
"sacrebleu",
"comet",
"ter",
"chrf"
],
"eval_max_new_tokens": 128,
"eval_sample_packing": false,
"eval_steps": 15,
"eval_table_size": 0,
"experimental_skip_move_to_device": true,
"flash_attention": true,
"fp16": false,
"gradient_accumulation_steps": 4,
"gradient_checkpointing": true,
"gradient_checkpointing_kwargs": {
"use_reentrant": true
},
"include_tkps": true,
"is_falcon_derived_model": false,
"is_llama_derived_model": false,
"is_mistral_derived_model": false,
"learning_rate": 2e-05,
"lisa_layers_attribute": "model.layers",
"load_best_model_at_end": true,
"load_in_4bit": false,
"load_in_8bit": false,
"local_rank": 0,
"logging_steps": 1,
"lora_alpha": 16,
"lora_dropout": 0.05,
"lora_model_dir": "/home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage1-combined/checkpoint-23",
"lora_r": 16,
"lora_target_linear": true,
"loraplus_lr_embedding": 1e-06,
"lr_scheduler": "cosine",
"mean_resizing_embeddings": false,
"merge_lora": true,
"micro_batch_size": 4,
"model_config_type": "qwen2",
"num_epochs": 1.0,
"optimizer": "adamw_torch",
"otel_metrics_host": "localhost",
"otel_metrics_port": 8000,
"output_dir": "/home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure",
"pad_to_sequence_len": true,
"pretrain_multipack_attn": true,
"profiler_steps_start": 0,
"qlora_sharded_model_loading": false,
"ray_num_workers": 1,
"resources_per_worker": {
"GPU": 1
},
"sample_packing": false,
"sample_packing_bin_size": 200,
"sample_packing_group_size": 100000,
"save_only_model": false,
"save_safetensors": true,
"save_steps": 15,
"save_total_limit": 1000,
"sequence_len": 4096,
"shuffle_before_merging_datasets": false,
"shuffle_merged_datasets": true,
"skip_prepare_dataset": false,
"streaming_multipack_buffer_size": 10000,
"strict": false,
"tensor_parallel_size": 1,
"test_datasets": [
{
"chat_template": "tokenizer_default",
"message_property_mappings": {
"content": "content",
"role": "role"
},
"path": "felixwangg/stage_2_secure",
"split": "validation",
"trust_remote_code": false,
"type": "chat_template"
}
],
"tf32": false,
"tiled_mlp_use_original_mlp": true,
"tokenizer_config": "Qwen/Qwen2.5-Coder-7B-Instruct",
"tokenizer_save_jinja_files": true,
"tokenizer_type": "AutoTokenizer",
"torch_dtype": "torch.bfloat16",
"train_on_inputs": false,
"trl": {
"log_completions": false,
"mask_truncated_completions": false,
"ref_model_mixup_alpha": 0.9,
"ref_model_sync_steps": 64,
"scale_rewards": true,
"sync_ref_model": false,
"use_vllm": false,
"vllm_server_host": "0.0.0.0",
"vllm_server_port": 8000
},
"type_of_model": "Qwen2ForCausalLM",
"use_otel_metrics": false,
"use_ray": false,
"use_wandb": true,
"val_set_size": 0.0,
"vllm": {
"device": "auto",
"dtype": "auto",
"gpu_memory_utilization": 0.9,
"host": "0.0.0.0",
"port": 8000
},
"wandb_entity": "wtkuan",
"wandb_log_model": "false",
"wandb_name": "Qwen2.5-Coder-7B-stage2-secure",
"wandb_project": "sft-primevul-sweep-ctx-0",
"wandb_watch": "false",
"warmup_ratio": 0.1,
"weight_decay": 0.02,
"world_size": 4
}
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:293655] EOS: 151645 / <|im_end|>
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:293655] BOS: None / None
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:293655] PAD: 151643 / <|endoftext|>
[2026-04-09 03:30:41,238] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:293655] UNK: None / None
[2026-04-09 03:30:41,812] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:293655] Loading prepared dataset from disk at /home/tkwang/scratch/SecSteer/axolotl-datasets/lora/Qwen2.5-Coder-7B/stage2-secure/372a26636afc98b5e76ce25d2b26305b...
[2026-04-09 03:30:41,827] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:293655] Loading prepared dataset from disk at /home/tkwang/scratch/SecSteer/axolotl-datasets/lora/Qwen2.5-Coder-7B/stage2-secure/d6ee772141270dc60aba6ee42648aaa1...
[2026-04-09 03:30:42,214] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:417] [PID:293655] total_num_tokens: 3_095_863
[2026-04-09 03:30:42,684] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:435] [PID:293655] `total_supervised_tokens: 2_473_951`
[2026-04-09 03:30:42,684] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:533] [PID:293655] total_num_steps: 46
[2026-04-09 03:30:42,684] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:293655] Maximum number of steps set at 46
[2026-04-09 03:30:42,702] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:293655] loading tokenizer... Qwen/Qwen2.5-Coder-7B-Instruct
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:293655] EOS: 151645 / <|im_end|>
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:293655] BOS: None / None
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:293655] PAD: 151643 / <|endoftext|>
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:293655] UNK: None / None
[2026-04-09 03:30:43,188] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:293655] Loading model
[2026-04-09 03:30:43,316] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:293655] Patched Trainer.evaluation_loop with nanmean loss calculation
[2026-04-09 03:30:43,317] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:293655] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
Loading checkpoint shards: 0%| | 0/4 [00:00<?, ?it/s] Loading checkpoint shards: 25%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 1/4 [00:00<00:02, 1.01it/s] Loading checkpoint shards: 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 2/4 [00:02<00:02, 1.06s/it] Loading checkpoint shards: 75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 3/4 [00:03<00:01, 1.11s/it] Loading checkpoint shards: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [00:03<00:00, 1.27it/s] Loading checkpoint shards: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 4/4 [00:03<00:00, 1.12it/s]
[2026-04-09 03:30:48,236] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:347] [PID:293655] Converting modules to torch.bfloat16
[2026-04-09 03:30:48,238] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:293655] Memory usage after model load 17.233GB (+17.233GB allocated, +18.252GB reserved)
[2026-04-09 03:30:48,238] [INFO] [axolotl.loaders.adapter.load_lora:81] [PID:293655] found linear modules: ['down_proj', 'gate_proj', 'k_proj', 'o_proj', 'q_proj', 'up_proj', 'v_proj']
[2026-04-09 03:30:48,239] [DEBUG] [axolotl.loaders.adapter.load_lora:150] [PID:293655] Loading pretrained PEFT - LoRA
trainable params: 40,370,176 || all params: 7,655,986,688 || trainable%: 0.5273
[2026-04-09 03:30:48,731] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:293655] after adapters 14.487GB (+14.487GB allocated, +18.365GB reserved)
[2026-04-09 03:30:53,769] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/trl/extras/vllm_client.py:37: UserWarning: TRL currently supports vLLM versions: 0.10.2, 0.11.0, 0.11.1, 0.11.2, 0.12.0. You have version 0.18.0 installed. We recommend installing a supported version to avoid compatibility issues.
if is_vllm_available():
[2026-04-09 03:30:54,989] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/trl/trainer/grpo_trainer.py:105: UserWarning: TRL currently supports vLLM versions: 0.10.2, 0.11.0, 0.11.1, 0.11.2, 0.12.0. You have version 0.18.0 installed. We recommend installing a supported version to avoid compatibility issues.
if is_vllm_available():
[2026-04-09 03:31:07,404] [INFO] [axolotl.train.save_initial_configs:413] [PID:293655] Pre-saving adapter config to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure...
[2026-04-09 03:31:07,690] [INFO] [axolotl.train.save_initial_configs:417] [PID:293655] Pre-saving tokenizer to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure...
[2026-04-09 03:31:08,990] [INFO] [axolotl.train.save_initial_configs:422] [PID:293655] Pre-saving model config to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure...
[2026-04-09 03:31:09,015] [INFO] [axolotl.train.execute_training:212] [PID:293655] Starting trainer...
wandb: [wandb.login()] Loaded credentials for https://api.wandb.ai from /home/tkwang/.netrc.
wandb: Currently logged in as: wtkuan to https://api.wandb.ai. Use `wandb login --relogin` to force relogin
wandb: β’Ώ Waiting for wandb.init()...
m wandb: β£» Waiting for wandb.init()...
m wandb: β£½ Waiting for wandb.init()...
m wandb: β£Ύ Waiting for wandb.init()...
m wandb: β£· setting up run ba5u2wmv (0.5s)
m wandb: β£― setting up run ba5u2wmv (0.5s)
m wandb: Tracking run with wandb version 0.24.0
wandb: Run data is saved locally in /scratch/tkwang/SecSteer/wandb/run-20260409_033112-ba5u2wmv
wandb: Run `wandb offline` to turn off syncing.
wandb: Syncing run Qwen2.5-Coder-7B-stage2-secure
wandb: ⭐️ View project at https://wandb.ai/wtkuan/sft-primevul-sweep-ctx-0
wandb: πŸš€ View run at https://wandb.ai/wtkuan/sft-primevul-sweep-ctx-0/runs/ba5u2wmv
wandb: Detected [huggingface_hub.inference, mcp, openai] in use.
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
wandb: WARNING Saving files without folders. If you want to preserve subdirectories pass base_path to wandb.save, i.e. wandb.save("/mnt/folder/file.h5", base_path="/mnt")
wandb: WARNING Symlinked 1 file into the W&B run directory; call wandb.save again to sync new files.
[2026-04-09 03:31:17,858] [INFO] [axolotl.utils.callbacks.on_train_begin:757] [PID:293655] The Axolotl config has been saved to the WandB run under files.
0%| | 0/46 [00:00<?, ?it/s][2026-04-09 03:31:17,863] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...
0%| | 0/20 [00:00<?, ?it/s]
10%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 2/20 [00:00<00:05, 3.43it/s]
15%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/20 [00:01<00:07, 2.28it/s]
20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 4/20 [00:01<00:08, 1.94it/s]
25%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/20 [00:02<00:08, 1.77it/s]
30%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/20 [00:03<00:08, 1.66it/s]
35%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/20 [00:03<00:07, 1.63it/s]
40%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 8/20 [00:04<00:07, 1.62it/s]
45%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/20 [00:05<00:07, 1.55it/s]
50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/20 [00:05<00:06, 1.54it/s]
55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 11/20 [00:06<00:05, 1.54it/s]
60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 12/20 [00:07<00:05, 1.55it/s]
65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/20 [00:07<00:04, 1.51it/s]
70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 14/20 [00:08<00:03, 1.52it/s]
75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 15/20 [00:09<00:03, 1.53it/s]
80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 16/20 [00:09<00:02, 1.54it/s]
85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 17/20 [00:10<00:02, 1.50it/s]
90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 18/20 [00:11<00:01, 1.52it/s]
95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 19/20 [00:11<00:00, 1.53it/s]Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-du5o9zz7'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-a37y7u4h'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-c71mro43'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-f1vbkqeq'
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.44it/s]
{'eval_loss': 0.7821827530860901, 'eval_runtime': 14.1138, 'eval_samples_per_second': 22.602, 'eval_steps_per_second': 1.417, 'eval_ppl': 2.18624, 'memory/max_active (GiB)': 37.91, 'memory/max_allocated (GiB)': 37.91, 'memory/device_reserved (GiB)': 41.87, 'epoch': 0}
0%| | 0/46 [00:14<?, ?it/s]
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.44it/s]
 2%|β–ˆβ–ˆβ– | 1/46 [00:23<17:42, 23.61s/it] {'loss': 0.8096, 'grad_norm': 0.0860491394996643, 'learning_rate': 0.0, 'ppl': 2.24701, 'memory/max_active (GiB)': 45.83, 'memory/max_allocated (GiB)': 45.83, 'memory/device_reserved (GiB)': 55.35, 'tokens/train_per_sec_per_gpu': 1219.3382568359375, 'tokens/total': 262144, 'tokens/trainable': 47417, 'epoch': 0.02}
2%|β–ˆβ–ˆβ– | 1/46 [00:23<17:42, 23.61s/it] 4%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 2/46 [00:32<10:55, 14.90s/it] {'loss': 0.7548, 'grad_norm': 0.07023165374994278, 'learning_rate': 5e-06, 'ppl': 2.12719, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.37, 'tokens/train_per_sec_per_gpu': 883.8273315429688, 'tokens/total': 524288, 'tokens/trainable': 104680, 'epoch': 0.04}
4%|β–ˆβ–ˆβ–ˆβ–ˆβ– | 2/46 [00:32<10:55, 14.90s/it] 7%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 3/46 [00:41<08:40, 12.11s/it] {'loss': 0.7674, 'grad_norm': 0.07232996821403503, 'learning_rate': 1e-05, 'ppl': 2.15416, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.37, 'tokens/train_per_sec_per_gpu': 1246.1279296875, 'tokens/total': 786432, 'tokens/trainable': 154792, 'epoch': 0.07}
7%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 3/46 [00:41<08:40, 12.11s/it] 9%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 4/46 [00:50<07:35, 10.85s/it] {'loss': 0.8402, 'grad_norm': 0.0671195313334465, 'learning_rate': 1.5000000000000002e-05, 'ppl': 2.31683, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1250.1392822265625, 'tokens/total': 1048576, 'tokens/trainable': 217458, 'epoch': 0.09}
9%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 4/46 [00:50<07:35, 10.85s/it] 11%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/46 [00:58<06:54, 10.11s/it] {'loss': 0.8154, 'grad_norm': 0.06423480808734894, 'learning_rate': 2e-05, 'ppl': 2.26008, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1093.50244140625, 'tokens/total': 1310720, 'tokens/trainable': 279817, 'epoch': 0.11}
11%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 5/46 [00:58<06:54, 10.11s/it] 13%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 6/46 [01:07<06:27, 9.68s/it] {'loss': 0.7097, 'grad_norm': 0.06509919464588165, 'learning_rate': 1.9972037971811802e-05, 'ppl': 2.03338, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1743.6982421875, 'tokens/total': 1572864, 'tokens/trainable': 337333, 'epoch': 0.13}
13%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 6/46 [01:07<06:27, 9.68s/it] 15%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 7/46 [01:16<06:05, 9.36s/it] {'loss': 0.7733, 'grad_norm': 0.077994205057621, 'learning_rate': 1.9888308262251286e-05, 'ppl': 2.16691, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1229.959228515625, 'tokens/total': 1835008, 'tokens/trainable': 377704, 'epoch': 0.15}
15%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 7/46 [01:16<06:05, 9.36s/it] 17%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/46 [01:25<05:48, 9.18s/it] {'loss': 0.7392, 'grad_norm': 0.06839064508676529, 'learning_rate': 1.9749279121818235e-05, 'ppl': 2.09426, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2193.31396484375, 'tokens/total': 2097152, 'tokens/trainable': 426032, 'epoch': 0.18}
17%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 8/46 [01:25<05:48, 9.18s/it] 20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/46 [01:34<05:35, 9.08s/it] {'loss': 0.7935, 'grad_norm': 0.058815669268369675, 'learning_rate': 1.955572805786141e-05, 'ppl': 2.21112, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2324.8984375, 'tokens/total': 2359296, 'tokens/trainable': 488622, 'epoch': 0.2}
20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/46 [01:34<05:35, 9.08s/it] 22%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 10/46 [01:42<05:24, 9.02s/it] {'loss': 0.6788, 'grad_norm': 0.05653702840209007, 'learning_rate': 1.9308737486442045e-05, 'ppl': 1.97151, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1683.135986328125, 'tokens/total': 2621440, 'tokens/trainable': 549635, 'epoch': 0.22}
22%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 10/46 [01:42<05:24, 9.02s/it] 24%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/46 [01:51<05:12, 8.94s/it] {'loss': 0.7701, 'grad_norm': 0.06375502794981003, 'learning_rate': 1.900968867902419e-05, 'ppl': 2.15998, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 967.6227416992188, 'tokens/total': 2883584, 'tokens/trainable': 599165, 'epoch': 0.24}
24%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 11/46 [01:51<05:12, 8.94s/it] 26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 12/46 [02:00<05:03, 8.92s/it] {'loss': 0.7255, 'grad_norm': 0.05587285757064819, 'learning_rate': 1.866025403784439e-05, 'ppl': 2.06576, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1434.523681640625, 'tokens/total': 3145728, 'tokens/trainable': 660574, 'epoch': 0.27}
26%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 12/46 [02:00<05:03, 8.92s/it] 28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 13/46 [02:09<04:53, 8.90s/it] {'loss': 0.725, 'grad_norm': 0.05221521481871605, 'learning_rate': 1.826238774315995e-05, 'ppl': 2.06473, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2159.218994140625, 'tokens/total': 3407872, 'tokens/trainable': 720484, 'epoch': 0.29}
28%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 13/46 [02:09<04:53, 8.90s/it] 30%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 14/46 [02:18<04:43, 8.86s/it] {'loss': 0.7317, 'grad_norm': 0.05614304915070534, 'learning_rate': 1.78183148246803e-05, 'ppl': 2.07861, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 2078.3359375, 'tokens/total': 3670016, 'tokens/trainable': 777518, 'epoch': 0.31}
30%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 14/46 [02:18<04:43, 8.86s/it] 33%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 15/46 [02:27<04:33, 8.83s/it] {'loss': 0.7657, 'grad_norm': 0.05838664621114731, 'learning_rate': 1.7330518718298263e-05, 'ppl': 2.1505, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 55.38, 'tokens/train_per_sec_per_gpu': 1735.1202392578125, 'tokens/total': 3932160, 'tokens/trainable': 827785, 'epoch': 0.33}
33%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 15/46 [02:27<04:33, 8.83s/it][2026-04-09 03:33:44,880] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...
0%| | 0/20 [00:00<?, ?it/s]
10%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 2/20 [00:00<00:05, 3.11it/s]
15%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/20 [00:01<00:07, 2.19it/s]
20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 4/20 [00:01<00:08, 1.89it/s]
25%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/20 [00:02<00:08, 1.68it/s]
30%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/20 [00:03<00:08, 1.63it/s]
35%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/20 [00:03<00:08, 1.61it/s]
40%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 8/20 [00:04<00:07, 1.60it/s]
45%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/20 [00:05<00:07, 1.55it/s]
50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/20 [00:05<00:06, 1.55it/s]
55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 11/20 [00:06<00:05, 1.55it/s]
60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 12/20 [00:07<00:05, 1.55it/s]
65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/20 [00:07<00:04, 1.51it/s]
70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 14/20 [00:08<00:03, 1.53it/s]
75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 15/20 [00:09<00:03, 1.53it/s]
80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 16/20 [00:09<00:02, 1.54it/s]
85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 17/20 [00:10<00:01, 1.50it/s]
90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 18/20 [00:11<00:01, 1.52it/s]
95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 19/20 [00:11<00:00, 1.53it/s]Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-drksda8g'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-y7if98gw'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-ua1jgn56'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-8mt89jhn'
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.48it/s]
{'eval_loss': 0.7501720190048218, 'eval_runtime': 13.4864, 'eval_samples_per_second': 23.653, 'eval_steps_per_second': 1.483, 'eval_ppl': 2.11736, 'memory/max_active (GiB)': 38.25, 'memory/max_allocated (GiB)': 38.25, 'memory/device_reserved (GiB)': 55.38, 'epoch': 0.33, 'tokens/train_per_sec_per_gpu': 0.0}
33%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 15/46 [02:40<04:33, 8.83s/it]
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.48it/s]
[2026-04-09 03:33:58,491] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-15
[2026-04-09 03:33:59,958] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
return func(*args, **kwargs)
35%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 16/46 [02:51<06:50, 13.67s/it] {'loss': 0.804, 'grad_norm': 0.05892722308635712, 'learning_rate': 1.6801727377709195e-05, 'ppl': 2.23446, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 819.9111328125, 'tokens/total': 4194304, 'tokens/trainable': 869822, 'epoch': 0.35}
35%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 16/46 [02:51<06:50, 13.67s/it] 37%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 17/46 [03:00<05:53, 12.20s/it] {'loss': 0.7176, 'grad_norm': 0.05681360885500908, 'learning_rate': 1.6234898018587336e-05, 'ppl': 2.04951, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1584.865966796875, 'tokens/total': 4456448, 'tokens/trainable': 921366, 'epoch': 0.38}
37%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 17/46 [03:00<05:53, 12.20s/it] 39%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 18/46 [03:09<05:12, 11.16s/it] {'loss': 0.7473, 'grad_norm': 0.05141684040427208, 'learning_rate': 1.563320058063622e-05, 'ppl': 2.11129, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1832.5343017578125, 'tokens/total': 4718592, 'tokens/trainable': 972442, 'epoch': 0.4}
39%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 18/46 [03:09<05:12, 11.16s/it] 41%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 19/46 [03:18<04:41, 10.44s/it] {'loss': 0.7396, 'grad_norm': 0.05582691356539726, 'learning_rate': 1.5000000000000002e-05, 'ppl': 2.0951, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1077.2296142578125, 'tokens/total': 4980736, 'tokens/trainable': 1025341, 'epoch': 0.42}
41%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 19/46 [03:18<04:41, 10.44s/it] 43%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 20/46 [03:27<04:18, 9.96s/it] {'loss': 0.7589, 'grad_norm': 0.048064205795526505, 'learning_rate': 1.4338837391175582e-05, 'ppl': 2.13593, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1663.6461181640625, 'tokens/total': 5242880, 'tokens/trainable': 1077984, 'epoch': 0.44}
43%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 20/46 [03:27<04:18, 9.96s/it] 46%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 21/46 [03:35<04:00, 9.62s/it] {'loss': 0.7195, 'grad_norm': 0.0480501726269722, 'learning_rate': 1.3653410243663953e-05, 'ppl': 2.05341, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1762.2572021484375, 'tokens/total': 5505024, 'tokens/trainable': 1129840, 'epoch': 0.46}
46%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 21/46 [03:35<04:00, 9.62s/it] 48%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 22/46 [03:44<03:45, 9.39s/it] {'loss': 0.6898, 'grad_norm': 0.045008592307567596, 'learning_rate': 1.2947551744109044e-05, 'ppl': 1.99332, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1673.069580078125, 'tokens/total': 5767168, 'tokens/trainable': 1189110, 'epoch': 0.49}
48%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 22/46 [03:44<03:45, 9.39s/it] 50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/46 [03:53<03:31, 9.20s/it] {'loss': 0.776, 'grad_norm': 0.05052850767970085, 'learning_rate': 1.2225209339563144e-05, 'ppl': 2.17276, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1745.7901611328125, 'tokens/total': 6029312, 'tokens/trainable': 1245078, 'epoch': 0.51}
50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 23/46 [03:53<03:31, 9.20s/it] 52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 24/46 [04:02<03:20, 9.12s/it] {'loss': 0.7013, 'grad_norm': 0.04177302494645119, 'learning_rate': 1.1490422661761744e-05, 'ppl': 2.01637, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1498.635986328125, 'tokens/total': 6291456, 'tokens/trainable': 1308912, 'epoch': 0.53}
52%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 24/46 [04:02<03:20, 9.12s/it] 54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 25/46 [04:11<03:09, 9.03s/it] {'loss': 0.7452, 'grad_norm': 0.04492776095867157, 'learning_rate': 1.0747300935864245e-05, 'ppl': 2.10686, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1319.31494140625, 'tokens/total': 6553600, 'tokens/trainable': 1364122, 'epoch': 0.55}
54%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 25/46 [04:11<03:09, 9.03s/it] 57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 26/46 [04:20<02:59, 8.96s/it] {'loss': 0.7507, 'grad_norm': 0.04114656522870064, 'learning_rate': 1e-05, 'ppl': 2.11848, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1585.4486083984375, 'tokens/total': 6815744, 'tokens/trainable': 1418567, 'epoch': 0.57}
57%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 26/46 [04:20<02:59, 8.96s/it] 59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 27/46 [04:28<02:49, 8.90s/it] {'loss': 0.7872, 'grad_norm': 0.04480674862861633, 'learning_rate': 9.252699064135759e-06, 'ppl': 2.19724, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1023.1653442382812, 'tokens/total': 7077888, 'tokens/trainable': 1472790, 'epoch': 0.6}
59%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 27/46 [04:28<02:49, 8.90s/it] 61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 28/46 [04:37<02:39, 8.84s/it] {'loss': 0.7238, 'grad_norm': 0.04875003173947334, 'learning_rate': 8.509577338238255e-06, 'ppl': 2.06225, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1241.5045166015625, 'tokens/total': 7340032, 'tokens/trainable': 1514813, 'epoch': 0.62}
61%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 28/46 [04:37<02:39, 8.84s/it] 63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 29/46 [04:46<02:30, 8.82s/it] {'loss': 0.7143, 'grad_norm': 0.044282350689172745, 'learning_rate': 7.774790660436857e-06, 'ppl': 2.04276, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1276.04638671875, 'tokens/total': 7602176, 'tokens/trainable': 1560152, 'epoch': 0.64}
63%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 29/46 [04:46<02:30, 8.82s/it] 65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 30/46 [04:55<02:21, 8.82s/it] {'loss': 0.7404, 'grad_norm': 0.04024430364370346, 'learning_rate': 7.052448255890958e-06, 'ppl': 2.09677, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1432.028564453125, 'tokens/total': 7864320, 'tokens/trainable': 1615087, 'epoch': 0.66}
65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 30/46 [04:55<02:21, 8.82s/it][2026-04-09 03:36:12,977] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...
0%| | 0/20 [00:00<?, ?it/s]
10%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 2/20 [00:00<00:05, 3.12it/s]
15%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/20 [00:01<00:07, 2.19it/s]
20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 4/20 [00:01<00:08, 1.89it/s]
25%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/20 [00:02<00:08, 1.72it/s]
30%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/20 [00:03<00:08, 1.62it/s]
35%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/20 [00:03<00:08, 1.60it/s]
40%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 8/20 [00:04<00:07, 1.59it/s]
45%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/20 [00:05<00:07, 1.56it/s]
50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/20 [00:05<00:06, 1.54it/s]
55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 11/20 [00:06<00:05, 1.54it/s]
60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 12/20 [00:07<00:05, 1.54it/s]
65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/20 [00:07<00:04, 1.51it/s]
70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 14/20 [00:08<00:03, 1.52it/s]
75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 15/20 [00:09<00:03, 1.52it/s]
80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 16/20 [00:09<00:02, 1.54it/s]
85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 17/20 [00:10<00:01, 1.50it/s]
90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 18/20 [00:11<00:01, 1.52it/s]
95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 19/20 [00:11<00:00, 1.53it/s]Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-io0kbzgt'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-d5dg0bpp'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-k3iy6laz'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-4b0lhhv3'
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.50it/s]
{'eval_loss': 0.7381538152694702, 'eval_runtime': 13.7476, 'eval_samples_per_second': 23.204, 'eval_steps_per_second': 1.455, 'eval_ppl': 2.09207, 'memory/max_active (GiB)': 38.25, 'memory/max_allocated (GiB)': 38.25, 'memory/device_reserved (GiB)': 56.53, 'epoch': 0.66, 'tokens/train_per_sec_per_gpu': 0.0}
65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 30/46 [05:08<02:21, 8.82s/it]
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.50it/s]
[2026-04-09 03:36:26,806] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-30
[2026-04-09 03:36:27,759] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
return func(*args, **kwargs)
67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 31/46 [05:20<03:27, 13.82s/it] {'loss': 0.6766, 'grad_norm': 0.04066118597984314, 'learning_rate': 6.34658975633605e-06, 'ppl': 1.96718, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1761.022216796875, 'tokens/total': 8126464, 'tokens/trainable': 1665278, 'epoch': 0.69}
67%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 31/46 [05:20<03:27, 13.82s/it] 70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 32/46 [05:29<02:52, 12.29s/it] {'loss': 0.6863, 'grad_norm': 0.046845149248838425, 'learning_rate': 5.66116260882442e-06, 'ppl': 1.98635, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1473.9476318359375, 'tokens/total': 8388608, 'tokens/trainable': 1715159, 'epoch': 0.71}
70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 32/46 [05:29<02:52, 12.29s/it] 72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 33/46 [05:38<02:26, 11.25s/it] {'loss': 0.72, 'grad_norm': 0.038377199321985245, 'learning_rate': 5.000000000000003e-06, 'ppl': 2.05443, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2030.8389892578125, 'tokens/total': 8650752, 'tokens/trainable': 1778054, 'epoch': 0.73}
72%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 33/46 [05:38<02:26, 11.25s/it] 74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 34/46 [05:46<02:06, 10.53s/it] {'loss': 0.706, 'grad_norm': 0.040965937077999115, 'learning_rate': 4.3667994193637794e-06, 'ppl': 2.02587, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2132.74609375, 'tokens/total': 8912896, 'tokens/trainable': 1836458, 'epoch': 0.75}
74%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 34/46 [05:46<02:06, 10.53s/it] 76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 35/46 [05:55<01:50, 10.00s/it] {'loss': 0.7886, 'grad_norm': 0.038428883999586105, 'learning_rate': 3.7651019814126656e-06, 'ppl': 2.20031, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1427.942138671875, 'tokens/total': 9175040, 'tokens/trainable': 1889305, 'epoch': 0.77}
76%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 35/46 [05:55<01:50, 10.00s/it] 78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 36/46 [06:04<01:36, 9.67s/it] {'loss': 0.7555, 'grad_norm': 0.04247698187828064, 'learning_rate': 3.1982726222908046e-06, 'ppl': 2.12868, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1808.15478515625, 'tokens/total': 9437184, 'tokens/trainable': 1948552, 'epoch': 0.8}
78%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 36/46 [06:04<01:36, 9.67s/it] 80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/46 [06:13<01:24, 9.40s/it] {'loss': 0.6227, 'grad_norm': 0.0429508201777935, 'learning_rate': 2.669481281701739e-06, 'ppl': 1.86395, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1936.651123046875, 'tokens/total': 9699328, 'tokens/trainable': 2010307, 'epoch': 0.82}
80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 37/46 [06:13<01:24, 9.40s/it] 83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 38/46 [06:22<01:13, 9.22s/it] {'loss': 0.6908, 'grad_norm': 0.041411444544792175, 'learning_rate': 2.1816851753197023e-06, 'ppl': 1.99531, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2212.40478515625, 'tokens/total': 9961472, 'tokens/trainable': 2068108, 'epoch': 0.84}
83%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 38/46 [06:22<01:13, 9.22s/it] 85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 39/46 [06:31<01:03, 9.12s/it] {'loss': 0.7247, 'grad_norm': 0.04243037849664688, 'learning_rate': 1.7376122568400533e-06, 'ppl': 2.06411, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1549.3170166015625, 'tokens/total': 10223616, 'tokens/trainable': 2133908, 'epoch': 0.86}
85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 39/46 [06:31<01:03, 9.12s/it] 87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 40/46 [06:39<00:54, 9.03s/it] {'loss': 0.7514, 'grad_norm': 0.0426081083714962, 'learning_rate': 1.339745962155613e-06, 'ppl': 2.11997, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1715.3697509765625, 'tokens/total': 10485760, 'tokens/trainable': 2194277, 'epoch': 0.88}
87%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 40/46 [06:39<00:54, 9.03s/it] 89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 41/46 [06:48<00:44, 8.95s/it] {'loss': 0.725, 'grad_norm': 0.04286476597189903, 'learning_rate': 9.903113209758098e-07, 'ppl': 2.06473, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1690.9439697265625, 'tokens/total': 10747904, 'tokens/trainable': 2243987, 'epoch': 0.91}
89%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ | 41/46 [06:48<00:44, 8.95s/it] 91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/46 [06:57<00:35, 8.94s/it] {'loss': 0.6866, 'grad_norm': 0.0390406996011734, 'learning_rate': 6.912625135579587e-07, 'ppl': 1.98695, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 2454.6279296875, 'tokens/total': 11010048, 'tokens/trainable': 2313512, 'epoch': 0.93}
91%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 42/46 [06:57<00:35, 8.94s/it] 93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 43/46 [07:06<00:26, 8.91s/it] {'loss': 0.6431, 'grad_norm': 0.03946515917778015, 'learning_rate': 4.4427194213859216e-07, 'ppl': 1.90237, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1843.193115234375, 'tokens/total': 11272192, 'tokens/trainable': 2373378, 'epoch': 0.95}
93%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 43/46 [07:06<00:26, 8.91s/it] 96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 44/46 [07:15<00:17, 8.85s/it] {'loss': 0.7175, 'grad_norm': 0.042728617787361145, 'learning_rate': 2.507208781817638e-07, 'ppl': 2.0493, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1632.2830810546875, 'tokens/total': 11534336, 'tokens/trainable': 2416789, 'epoch': 0.97}
96%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 44/46 [07:15<00:17, 8.85s/it] 98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 45/46 [07:23<00:08, 8.83s/it] {'loss': 0.769, 'grad_norm': 0.04248933494091034, 'learning_rate': 1.1169173774871478e-07, 'ppl': 2.15761, 'memory/max_active (GiB)': 46.14, 'memory/max_allocated (GiB)': 46.14, 'memory/device_reserved (GiB)': 56.53, 'tokens/train_per_sec_per_gpu': 1073.7342529296875, 'tokens/total': 11796480, 'tokens/trainable': 2467212, 'epoch': 0.99}
98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 45/46 [07:23<00:08, 8.83s/it][2026-04-09 03:38:41,816] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:293655] Running evaluation step...
0%| | 0/20 [00:00<?, ?it/s]
10%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 2/20 [00:00<00:07, 2.47it/s]
15%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Ž | 3/20 [00:01<00:08, 1.98it/s]
20%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ– | 4/20 [00:02<00:08, 1.79it/s]
25%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 5/20 [00:02<00:09, 1.63it/s]
30%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 6/20 [00:03<00:08, 1.59it/s]
35%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 7/20 [00:04<00:08, 1.59it/s]
40%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 8/20 [00:04<00:07, 1.58it/s]
45%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 9/20 [00:05<00:07, 1.53it/s]
50%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 10/20 [00:06<00:06, 1.53it/s]
55%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 11/20 [00:06<00:05, 1.53it/s]
60%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Œ | 12/20 [00:07<00:05, 1.54it/s]
65%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 13/20 [00:08<00:04, 1.52it/s]
70%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‹ | 14/20 [00:08<00:03, 1.52it/s]
75%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 15/20 [00:09<00:03, 1.52it/s]
80%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 16/20 [00:10<00:02, 1.54it/s]
85%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 17/20 [00:10<00:02, 1.50it/s]
90%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 18/20 [00:11<00:01, 1.51it/s]
95%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰ | 19/20 [00:12<00:00, 1.52it/s]Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-r7b1eufv'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-dt7h9v18'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-56rdqy1n'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-eu7acacd'
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.46it/s]
{'eval_loss': 0.7366353869438171, 'eval_runtime': 13.6924, 'eval_samples_per_second': 23.298, 'eval_steps_per_second': 1.461, 'eval_ppl': 2.0889, 'memory/max_active (GiB)': 38.25, 'memory/max_allocated (GiB)': 38.25, 'memory/device_reserved (GiB)': 56.53, 'epoch': 0.99, 'tokens/train_per_sec_per_gpu': 0.0}
98%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–Š | 45/46 [07:37<00:08, 8.83s/it]
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 20/20 [00:12<00:00, 1.46it/s]
[2026-04-09 03:38:55,597] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-45
[2026-04-09 03:38:56,600] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
return func(*args, **kwargs)
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-ql8p5j0x'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-6rn3wsm8'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-yyyosw1q'
Traceback (most recent call last):
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 303, in _run_finalizers
finalizer()
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 227, in __call__
res = self._callback(*self._args, **self._kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/multiprocessing/util.py", line 136, in _remove_temp_dir
rmtree(tempdir, onerror=onerror)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 759, in rmtree
_rmtree_safe_fd(stack, onexc)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 703, in _rmtree_safe_fd
onexc(func, path, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 700, in _rmtree_safe_fd
onexc(os.unlink, fullname, err)
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 750, in onexc
return onerror(func, path, exc_info)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/tkwang/.local/share/uv/python/cpython-3.12.12-linux-x86_64-gnu/lib/python3.12/shutil.py", line 698, in _rmtree_safe_fd
os.unlink(entry.name, dir_fd=topfd)
OSError: [Errno 16] Device or resource busy: '/home/tkwang/.cache/tmp/pymp-zvda8j_e'
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 46/46 [07:42<00:00, 11.71s/it] {'loss': 0.6996, 'grad_norm': 0.08553393185138702, 'learning_rate': 2.796202818819871e-08, 'ppl': 2.01295, 'memory/max_active (GiB)': 45.98, 'memory/max_allocated (GiB)': 45.98, 'memory/device_reserved (GiB)': 56.51, 'tokens/train_per_sec_per_gpu': 3303.278564453125, 'tokens/total': 11862016, 'tokens/trainable': 2474470, 'epoch': 1.0}
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 46/46 [07:42<00:00, 11.71s/it][2026-04-09 03:39:00,327] [INFO] [axolotl.core.trainers.base._save:721] [PID:293655] Saving model checkpoint to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure/checkpoint-46
[2026-04-09 03:39:01,510] [WARNING] [py.warnings._showwarnmsg:112] [PID:293655] /scratch/tkwang/SecSteer/venv/lib/python3.12/site-packages/torch/distributed/c10d_logger.py:83: UserWarning: barrier(): using the device under current context. You can specify `device_id` in `init_process_group` to mute this warning.
return func(*args, **kwargs)
{'train_runtime': 471.4066, 'train_samples_per_second': 6.245, 'train_steps_per_second': 0.098, 'train_loss': 0.7364971559980641, 'memory/max_active (GiB)': 15.01, 'memory/max_allocated (GiB)': 15.01, 'memory/device_reserved (GiB)': 56.51, 'epoch': 1.0, 'tokens/train_per_sec_per_gpu': 0.0}
100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 46/46 [07:45<00:00, 11.71s/it] 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 46/46 [07:45<00:00, 10.11s/it]
[2026-04-09 03:39:03,193] [INFO] [axolotl.train.save_trained_model:233] [PID:293655] Training completed! Saving trained model to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure.
[2026-04-09 03:39:03,985] [INFO] [axolotl.train.save_trained_model:351] [PID:293655] Model successfully saved to /home/tkwang/scratch/SecSteer/axolotl-outputs/lora/Qwen2.5-Coder-7B-stage2-secure