[2026-05-25 12:21:25,543] [DEBUG] [axolotl.utils.config.log_gpu_memory_usage:127] [PID:2038090] baseline 0.000GB ()
[2026-05-25 12:21:25,543] [INFO] [axolotl.cli.config.load_cfg:259] [PID:2038090] config:
{
  "activation_offloading": false,
  "adapter": "lora",
  "axolotl_config_path": "out/qwen3-8b-persistent-navigation-20260525_121743/axolotl_config.yaml",
  "base_model": "Qwen/Qwen3-8B",
  "base_model_config": "Qwen/Qwen3-8B",
  "batch_size": 64,
  "bf16": true,
  "capabilities": {
    "bf16": true,
    "compute_capability": "sm_90",
    "fp8": true,
    "n_gpu": 4,
    "n_node": 1
  },
  "context_parallel_size": 1,
  "dataloader_num_workers": 4,
  "dataloader_pin_memory": true,
  "dataloader_prefetch_factor": 256,
  "dataset_num_proc": 288,
  "dataset_prepared_path": "out/prepared_dataset_persistent",
  "datasets": [
    {
      "chat_template": "tokenizer_default",
      "field_messages": "messages",
      "message_property_mappings": {
        "content": "content",
        "role": "role"
      },
      "path": "/e/project1/reformo/salgarkar1/agents_learn/pythonformer-workshop/paired/train/out/paired_data/persistent/navigation/traces.jsonl",
      "roles_to_train": [
        "assistant"
      ],
      "trust_remote_code": false,
      "type": "chat_template"
    }
  ],
  "ddp": true,
  "device": "cuda:0",
  "device_map": {
    "": 0
  },
  "dion_rank_fraction": 1.0,
  "dion_rank_multiple_of": 1,
  "env_capabilities": {
    "torch_version": "2.10.0"
  },
  "eval_batch_size": 1,
  "eval_causal_lm_metrics": [
    "sacrebleu",
    "comet",
    "ter",
    "chrf"
  ],
  "eval_max_new_tokens": 128,
  "eval_steps": 5,
  "eval_table_size": 0,
  "experimental_skip_move_to_device": true,
  "flash_attention": true,
  "fp16": false,
  "gradient_accumulation_steps": 16,
  "gradient_checkpointing": true,
  "gradient_checkpointing_kwargs": {
    "use_reentrant": false
  },
  "include_tkps": true,
  "is_falcon_derived_model": false,
  "is_llama_derived_model": false,
  "is_mistral_derived_model": false,
  "learning_rate": 0.0001,
  "lisa_layers_attribute": "model.layers",
  "load_best_model_at_end": false,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "local_rank": 0,
  "logging_steps": 1,
  "lora_alpha": 128,
  "lora_dropout": 0.05,
  "lora_r": 64,
  "lora_target_linear": false,
  "lora_target_modules": [
    "q_proj",
    "k_proj",
    "v_proj",
    "o_proj",
    "gate_proj",
    "up_proj",
    "down_proj"
  ],
  "loraplus_lr_embedding": 1e-06,
  "lr_scheduler": "cosine",
  "mean_resizing_embeddings": false,
  "micro_batch_size": 1,
  "model_config_type": "qwen3",
  "num_epochs": 3.0,
  "optimizer": "adamw_torch",
  "otel_metrics_host": "localhost",
  "otel_metrics_port": 8000,
  "output_dir": "out/qwen3-8b-persistent-navigation-20260525_121743",
  "pad_to_sequence_len": true,
  "pretrain_multipack_attn": true,
  "profiler_steps_start": 0,
  "qlora_sharded_model_loading": false,
  "ray_num_workers": 1,
  "resources_per_worker": {
    "GPU": 1
  },
  "sample_packing": false,
  "sample_packing_bin_size": 200,
  "sample_packing_group_size": 100000,
  "save_only_model": false,
  "save_safetensors": true,
  "save_strategy": "epoch",
  "save_total_limit": 3,
  "seed": 3407,
  "sequence_len": 16384,
  "shuffle_before_merging_datasets": false,
  "shuffle_merged_datasets": true,
  "skip_prepare_dataset": false,
  "streaming_multipack_buffer_size": 10000,
  "strict": false,
  "tensor_parallel_size": 1,
  "tf32": true,
  "tiled_mlp_use_original_mlp": true,
  "tokenizer_config": "Qwen/Qwen3-8B",
  "tokenizer_save_jinja_files": true,
  "tokenizer_type": "AutoTokenizer",
  "torch_dtype": "torch.bfloat16",
  "train_on_inputs": false,
  "trl": {
    "log_completions": false,
    "mask_truncated_completions": false,
    "ref_model_mixup_alpha": 0.9,
    "ref_model_sync_steps": 64,
    "scale_rewards": true,
    "sync_ref_model": false,
    "use_vllm": false,
    "vllm_server_host": "0.0.0.0",
    "vllm_server_port": 8000
  },
  "trust_remote_code": true,
  "type_of_model": "AutoModelForCausalLM",
  "use_otel_metrics": false,
  "use_ray": false,
  "use_wandb": true,
  "val_set_size": 0.04,
  "vllm": {
    "device": "auto",
    "dtype": "auto",
    "gpu_memory_utilization": 0.9,
    "host": "0.0.0.0",
    "port": 8000
  },
  "wandb_project": "pythonformer",
  "warmup_ratio": 0.03,
  "weight_decay": 0.01,
  "world_size": 4
}
[2026-05-25 12:21:25,545] [INFO] [axolotl.cli.checks.check_user_token:35] [PID:2038090] Skipping HuggingFace token verification because HF_HUB_OFFLINE is set to True. Only local files will be used.
[2026-05-25 12:21:25,915] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:2038090] EOS: 151645 / <|im_end|>
[2026-05-25 12:21:25,915] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:2038090] BOS: None / None
[2026-05-25 12:21:25,915] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:2038090] PAD: 151643 / <|endoftext|>
[2026-05-25 12:21:25,915] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:2038090] UNK: None / None
[2026-05-25 12:22:09,947] [INFO] [axolotl.utils.data.shared.load_preprocessed_dataset:475] [PID:2038090] Loading prepared dataset from disk at out/prepared_dataset_persistent/bbbe19e5e41f9f5546df4716dbd4dec8...
[2026-05-25 12:22:09,959] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:417] [PID:2038090] total_num_tokens: 5_029_951
[2026-05-25 12:22:09,974] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:435] [PID:2038090] `total_supervised_tokens: 1_275_757`
[2026-05-25 12:22:09,974] [DEBUG] [axolotl.utils.trainer.calculate_total_num_steps:533] [PID:2038090] total_num_steps: 45
[2026-05-25 12:22:09,974] [INFO] [axolotl.utils.data.sft._prepare_standard_dataset:121] [PID:2038090] Maximum number of steps set at 45
[2026-05-25 12:22:10,020] [DEBUG] [axolotl.train.setup_model_and_tokenizer:70] [PID:2038090] loading tokenizer... Qwen/Qwen3-8B
[2026-05-25 12:22:10,251] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:285] [PID:2038090] EOS: 151645 / <|im_end|>
[2026-05-25 12:22:10,251] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:286] [PID:2038090] BOS: None / None
[2026-05-25 12:22:10,251] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:287] [PID:2038090] PAD: 151643 / <|endoftext|>
[2026-05-25 12:22:10,251] [DEBUG] [axolotl.loaders.tokenizer.load_tokenizer:288] [PID:2038090] UNK: None / None
[2026-05-25 12:22:10,251] [DEBUG] [axolotl.train.setup_model_and_tokenizer:82] [PID:2038090] Loading model
[2026-05-25 12:22:10,258] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_evaluation_loop:87] [PID:2038090] Patched Trainer.evaluation_loop with nanmean loss calculation
[2026-05-25 12:22:10,258] [DEBUG] [axolotl.monkeypatch.transformers.trainer_loss_calc.patch_maybe_log_save_evaluate:138] [PID:2038090] Patched Trainer._maybe_log_save_evaluate with nanmean loss calculation
Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]Loading checkpoint shards:  20%|██        | 1/5 [00:03<00:14,  3.70s/it]Loading checkpoint shards:  40%|████      | 2/5 [00:05<00:08,  2.86s/it]Loading checkpoint shards:  60%|██████    | 3/5 [00:08<00:05,  2.79s/it]Loading checkpoint shards:  80%|████████  | 4/5 [00:10<00:02,  2.37s/it]Loading checkpoint shards: 100%|██████████| 5/5 [00:11<00:00,  1.88s/it]Loading checkpoint shards: 100%|██████████| 5/5 [00:11<00:00,  2.29s/it]
[2026-05-25 12:22:22,459] [INFO] [axolotl.loaders.model._prepare_model_for_quantization:853] [PID:2038090] converting PEFT model w/ prepare_model_for_kbit_training
[2026-05-25 12:22:22,611] [INFO] [axolotl.loaders.model._configure_embedding_dtypes:347] [PID:2038090] Converting modules to torch.bfloat16
[2026-05-25 12:22:22,624] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:2038090] Memory usage after model load 31.673GB (+31.673GB allocated, +33.244GB reserved)
trainable params: 174,587,904 || all params: 8,365,323,264 || trainable%: 2.0870
[2026-05-25 12:22:23,414] [DEBUG] [axolotl.loaders.model.log_gpu_memory_usage:127] [PID:2038090] after adapters 28.849GB (+28.849GB allocated, +33.578GB reserved)
[2026-05-25 12:23:15,091] [INFO] [axolotl.train.save_initial_configs:413] [PID:2038090] Pre-saving adapter config to out/qwen3-8b-persistent-navigation-20260525_121743...
[2026-05-25 12:23:15,093] [INFO] [axolotl.train.save_initial_configs:417] [PID:2038090] Pre-saving tokenizer to out/qwen3-8b-persistent-navigation-20260525_121743...
[2026-05-25 12:23:15,233] [INFO] [axolotl.train.save_initial_configs:422] [PID:2038090] Pre-saving model config to out/qwen3-8b-persistent-navigation-20260525_121743...
[2026-05-25 12:23:15,238] [INFO] [axolotl.train.execute_training:212] [PID:2038090] Starting trainer...
wandb: Tracking run with wandb version 0.24.2
wandb: W&B syncing is set to `offline` in this directory. Run `wandb online` or set WANDB_MODE=online to enable cloud syncing.
wandb: Run data is saved locally in /e/project1/reformo/salgarkar1/agents_learn/pythonformer-workshop/wandb/offline-run-20260525_122320-3g43yn11
wandb: Detected [huggingface_hub.inference] in use.
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
wandb: WARNING Saving files without folders. If you want to preserve subdirectories pass base_path to wandb.save, i.e. wandb.save("/mnt/folder/file.h5", base_path="/mnt")
wandb: WARNING Symlinked 1 file into the W&B run directory; call wandb.save again to sync new files.
[2026-05-25 12:23:24,701] [INFO] [axolotl.utils.callbacks.on_train_begin:757] [PID:2038090] The Axolotl config has been saved to the WandB run under files.
  0%|          | 0/45 [00:00<?, ?it/s][2026-05-25 12:23:24,704] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.50it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.65it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.41it/s][A
 50%|█████     | 5/10 [00:03<00:03,  1.28it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.22it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.17it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.15it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.14it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.12it/s][A                                      
                                               [A{'eval_loss': 0.9681045413017273, 'eval_runtime': 9.5712, 'eval_samples_per_second': 4.179, 'eval_steps_per_second': 1.045, 'eval_ppl': 2.63295, 'memory/max_active (GiB)': 53.19, 'memory/max_allocated (GiB)': 53.19, 'memory/device_reserved (GiB)': 56.52, 'epoch': 0}
  0%|          | 0/45 [00:09<?, ?it/s]
100%|██████████| 10/10 [00:08<00:00,  1.12it/s][A
                                               [A  2%|▏         | 1/45 [00:59<43:25, 59.23s/it]                                              {'loss': 0.9844, 'grad_norm': 3.6055057048797607, 'learning_rate': 0.0, 'ppl': 2.67621, 'memory/max_active (GiB)': 62.8, 'memory/max_allocated (GiB)': 62.8, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 99.8906478881836, 'tokens/total': 1048576, 'tokens/trainable': 77925, 'epoch': 0.07}
  2%|▏         | 1/45 [00:59<43:25, 59.23s/it]  4%|▍         | 2/45 [01:47<37:55, 52.91s/it]                                              {'loss': 0.9794, 'grad_norm': 3.3668525218963623, 'learning_rate': 5e-05, 'ppl': 2.66286, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 93.9311294555664, 'tokens/total': 2097152, 'tokens/trainable': 163858, 'epoch': 0.13}
  4%|▍         | 2/45 [01:47<37:55, 52.91s/it]  7%|▋         | 3/45 [02:36<35:52, 51.24s/it]                                              {'loss': 0.7875, 'grad_norm': 1.4857083559036255, 'learning_rate': 0.0001, 'ppl': 2.19789, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 90.68009185791016, 'tokens/total': 3145728, 'tokens/trainable': 243851, 'epoch': 0.2}
  7%|▋         | 3/45 [02:36<35:52, 51.24s/it]  9%|▉         | 4/45 [03:26<34:34, 50.60s/it]                                              {'loss': 0.6776, 'grad_norm': 0.5946469902992249, 'learning_rate': 9.986661418317759e-05, 'ppl': 1.96915, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 112.9667739868164, 'tokens/total': 4194304, 'tokens/trainable': 331508, 'epoch': 0.27}
  9%|▉         | 4/45 [03:26<34:34, 50.60s/it] 11%|█         | 5/45 [04:16<33:32, 50.32s/it]                                              {'loss': 0.6197, 'grad_norm': 0.35739865899086, 'learning_rate': 9.946716840375551e-05, 'ppl': 1.85837, 'memory/max_active (GiB)': 64.13, 'memory/max_allocated (GiB)': 64.13, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 111.46574401855469, 'tokens/total': 5242880, 'tokens/trainable': 420940, 'epoch': 0.33}
 11%|█         | 5/45 [04:16<33:32, 50.32s/it][2026-05-25 12:27:41,118] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.85it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.44it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.29it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.22it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.17it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.14it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.12it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A                                              
                                               [A{'eval_loss': 0.5761364102363586, 'eval_runtime': 9.4945, 'eval_samples_per_second': 4.213, 'eval_steps_per_second': 1.053, 'eval_ppl': 1.77915, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 0.33, 'tokens/train_per_sec_per_gpu': 0.0}
 11%|█         | 5/45 [04:25<33:32, 50.32s/it]
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A
                                               [A 13%|█▎        | 6/45 [05:15<34:37, 53.27s/it]                                              {'loss': 0.5976, 'grad_norm': 0.30389270186424255, 'learning_rate': 9.880379387779637e-05, 'ppl': 1.81775, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 102.36698150634766, 'tokens/total': 6291456, 'tokens/trainable': 504211, 'epoch': 0.4}
 13%|█▎        | 6/45 [05:15<34:37, 53.27s/it] 16%|█▌        | 7/45 [06:04<32:51, 51.88s/it]                                              {'loss': 0.5493, 'grad_norm': 0.29107972979545593, 'learning_rate': 9.78800299954203e-05, 'ppl': 1.73204, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 120.0954360961914, 'tokens/total': 7340032, 'tokens/trainable': 586769, 'epoch': 0.47}
 16%|█▌        | 7/45 [06:04<32:51, 51.88s/it] 18%|█▊        | 8/45 [06:53<31:28, 51.05s/it]                                              {'loss': 0.5306, 'grad_norm': 0.2634803354740143, 'learning_rate': 9.67008054366274e-05, 'ppl': 1.69995, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 114.32392883300781, 'tokens/total': 8388608, 'tokens/trainable': 674991, 'epoch': 0.53}
 18%|█▊        | 8/45 [06:53<31:28, 51.05s/it] 20%|██        | 9/45 [07:42<30:17, 50.50s/it]                                              {'loss': 0.4969, 'grad_norm': 0.2511760890483856, 'learning_rate': 9.527241187465734e-05, 'ppl': 1.64362, 'memory/max_active (GiB)': 64.13, 'memory/max_allocated (GiB)': 64.13, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 116.690185546875, 'tokens/total': 9437184, 'tokens/trainable': 752848, 'epoch': 0.6}
 20%|██        | 9/45 [07:42<30:17, 50.50s/it] 22%|██▏       | 10/45 [08:32<29:17, 50.20s/it]                                               {'loss': 0.4796, 'grad_norm': 0.21181228756904602, 'learning_rate': 9.360247040719039e-05, 'ppl': 1.61543, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 100.84420013427734, 'tokens/total': 10485760, 'tokens/trainable': 839780, 'epoch': 0.67}
 22%|██▏       | 10/45 [08:32<29:17, 50.20s/it][2026-05-25 12:31:57,237] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.93it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.47it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.31it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.23it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.17it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.14it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.12it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A                                               
                                               [A{'eval_loss': 0.44040805101394653, 'eval_runtime': 9.4408, 'eval_samples_per_second': 4.237, 'eval_steps_per_second': 1.059, 'eval_ppl': 1.55334, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 0.67, 'tokens/train_per_sec_per_gpu': 0.0}
 22%|██▏       | 10/45 [08:41<29:17, 50.20s/it]
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A
                                               [A 24%|██▍       | 11/45 [09:31<29:56, 52.85s/it]                                               {'loss': 0.4565, 'grad_norm': 0.16248875856399536, 'learning_rate': 9.16998908944939e-05, 'ppl': 1.57854, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 125.30274963378906, 'tokens/total': 11534336, 'tokens/trainable': 929267, 'epoch': 0.73}
 24%|██▍       | 11/45 [09:31<29:56, 52.85s/it] 27%|██▋       | 12/45 [10:20<28:26, 51.70s/it]                                               {'loss': 0.44, 'grad_norm': 0.15751507878303528, 'learning_rate': 8.957482442146272e-05, 'ppl': 1.55271, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 147.85765075683594, 'tokens/total': 12582912, 'tokens/trainable': 1012131, 'epoch': 0.8}
 27%|██▋       | 12/45 [10:20<28:26, 51.70s/it] 29%|██▉       | 13/45 [11:10<27:15, 51.10s/it]                                               {'loss': 0.4461, 'grad_norm': 0.16444867849349976, 'learning_rate': 8.72386091371891e-05, 'ppl': 1.56221, 'memory/max_active (GiB)': 64.13, 'memory/max_allocated (GiB)': 64.13, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 103.92417907714844, 'tokens/total': 13631488, 'tokens/trainable': 1103069, 'epoch': 0.87}
 29%|██▉       | 13/45 [11:10<27:15, 51.10s/it] 31%|███       | 14/45 [12:00<26:13, 50.75s/it]                                               {'loss': 0.4204, 'grad_norm': 0.17406685650348663, 'learning_rate': 8.47037097610317e-05, 'ppl': 1.52257, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 113.34588623046875, 'tokens/total': 14680064, 'tokens/trainable': 1189578, 'epoch': 0.93}
 31%|███       | 14/45 [12:00<26:13, 50.75s/it] 33%|███▎      | 15/45 [12:49<25:13, 50.46s/it]                                               {'loss': 0.4179, 'grad_norm': 0.18400150537490845, 'learning_rate': 8.198365107794457e-05, 'ppl': 1.51877, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 87.22222137451172, 'tokens/total': 15728640, 'tokens/trainable': 1275757, 'epoch': 1.0}
 33%|███▎      | 15/45 [12:49<25:13, 50.46s/it][2026-05-25 12:36:14,602] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.17it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.55it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.35it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.25it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.19it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.15it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.13it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A                                               
                                               [A{'eval_loss': 0.3768181800842285, 'eval_runtime': 9.4968, 'eval_samples_per_second': 4.212, 'eval_steps_per_second': 1.053, 'eval_ppl': 1.45764, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 1.0, 'tokens/train_per_sec_per_gpu': 0.0}
 33%|███▎      | 15/45 [12:59<25:13, 50.46s/it]
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A
                                               [A[2026-05-25 12:36:24,111] [INFO] [axolotl.core.trainers.base._save:721] [PID:2038090] Saving model checkpoint to out/qwen3-8b-persistent-navigation-20260525_121743/checkpoint-15
[2026-05-25 12:36:24,166] [WARNING] [py.warnings._showwarnmsg:112] [PID:2038090] /e/project1/reformo/salgarkar1/agents_learn/pythonformer-workshop/.venv/lib/python3.12/site-packages/peft/utils/save_and_load.py:295: UserWarning: Could not find a config file in Qwen/Qwen3-8B - will assume that the vocabulary was not modified.
  warnings.warn(

 36%|███▌      | 16/45 [13:51<25:56, 53.69s/it]                                               {'loss': 0.3876, 'grad_norm': 0.17585863173007965, 'learning_rate': 7.909294577789766e-05, 'ppl': 1.47344, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 109.04915618896484, 'tokens/total': 16777216, 'tokens/trainable': 1359118, 'epoch': 1.07}
 36%|███▌      | 16/45 [13:51<25:56, 53.69s/it] 38%|███▊      | 17/45 [14:40<24:24, 52.29s/it]                                               {'loss': 0.3814, 'grad_norm': 0.1488562822341919, 'learning_rate': 7.604701702439651e-05, 'ppl': 1.46433, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 106.73249053955078, 'tokens/total': 17825792, 'tokens/trainable': 1443638, 'epoch': 1.13}
 38%|███▊      | 17/45 [14:40<24:24, 52.29s/it] 40%|████      | 18/45 [15:29<23:11, 51.53s/it]                                               {'loss': 0.3648, 'grad_norm': 0.13896878063678741, 'learning_rate': 7.286211616523193e-05, 'ppl': 1.44023, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 93.24516296386719, 'tokens/total': 18874368, 'tokens/trainable': 1527473, 'epoch': 1.2}
 40%|████      | 18/45 [15:29<23:11, 51.53s/it] 42%|████▏     | 19/45 [16:19<22:01, 50.84s/it]                                               {'loss': 0.3512, 'grad_norm': 0.11720948666334152, 'learning_rate': 6.95552360245078e-05, 'ppl': 1.42077, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 87.10844421386719, 'tokens/total': 19922944, 'tokens/trainable': 1615421, 'epoch': 1.27}
 42%|████▏     | 19/45 [16:19<22:01, 50.84s/it] 44%|████▍     | 20/45 [17:08<20:56, 50.28s/it]                                               {'loss': 0.3473, 'grad_norm': 0.12244201451539993, 'learning_rate': 6.614402023857232e-05, 'ppl': 1.41524, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 108.69977569580078, 'tokens/total': 20971520, 'tokens/trainable': 1697031, 'epoch': 1.33}
 44%|████▍     | 20/45 [17:08<20:56, 50.28s/it][2026-05-25 12:40:32,782] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.04it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.50it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.33it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.24it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.18it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.15it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.13it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.11it/s][A                                               
                                               [A{'eval_loss': 0.3374583125114441, 'eval_runtime': 9.3994, 'eval_samples_per_second': 4.256, 'eval_steps_per_second': 1.064, 'eval_ppl': 1.40138, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 1.33, 'tokens/train_per_sec_per_gpu': 0.0}
 44%|████▍     | 20/45 [17:17<20:56, 50.28s/it]
100%|██████████| 10/10 [00:08<00:00,  1.11it/s][A
                                               [A 47%|████▋     | 21/45 [18:07<21:11, 52.98s/it]                                               {'loss': 0.355, 'grad_norm': 0.12614424526691437, 'learning_rate': 6.264666911958404e-05, 'ppl': 1.42618, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 89.88668060302734, 'tokens/total': 22020096, 'tokens/trainable': 1778284, 'epoch': 1.4}
 47%|████▋     | 21/45 [18:07<21:11, 52.98s/it] 49%|████▉     | 22/45 [18:56<19:52, 51.85s/it]                                               {'loss': 0.3504, 'grad_norm': 0.12772953510284424, 'learning_rate': 5.908184254897182e-05, 'ppl': 1.41964, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 133.73704528808594, 'tokens/total': 23068672, 'tokens/trainable': 1863358, 'epoch': 1.47}
 49%|████▉     | 22/45 [18:56<19:52, 51.85s/it] 51%|█████     | 23/45 [19:46<18:45, 51.15s/it]                                               {'loss': 0.3644, 'grad_norm': 0.11631076782941818, 'learning_rate': 5.546856041889373e-05, 'ppl': 1.43965, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 97.81122589111328, 'tokens/total': 24117248, 'tokens/trainable': 1956710, 'epoch': 1.53}
 51%|█████     | 23/45 [19:46<18:45, 51.15s/it] 53%|█████▎    | 24/45 [20:35<17:45, 50.73s/it]                                               {'loss': 0.3211, 'grad_norm': 0.12257759273052216, 'learning_rate': 5.182610115288295e-05, 'ppl': 1.37864, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 102.95890045166016, 'tokens/total': 25165824, 'tokens/trainable': 2036417, 'epoch': 1.6}
 53%|█████▎    | 24/45 [20:35<17:45, 50.73s/it] 56%|█████▌    | 25/45 [21:25<16:45, 50.26s/it]                                               {'loss': 0.3125, 'grad_norm': 0.10963135957717896, 'learning_rate': 4.817389884711705e-05, 'ppl': 1.36684, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 93.00939178466797, 'tokens/total': 26214400, 'tokens/trainable': 2115620, 'epoch': 1.67}
 56%|█████▌    | 25/45 [21:25<16:45, 50.26s/it][2026-05-25 12:44:49,755] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:01<00:04,  1.98it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.49it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.32it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.23it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.18it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.14it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.13it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.11it/s][A                                               
                                               [A{'eval_loss': 0.31419986486434937, 'eval_runtime': 9.4055, 'eval_samples_per_second': 4.253, 'eval_steps_per_second': 1.063, 'eval_ppl': 1.36916, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 1.67, 'tokens/train_per_sec_per_gpu': 0.0}
 56%|█████▌    | 25/45 [21:34<16:45, 50.26s/it]
100%|██████████| 10/10 [00:08<00:00,  1.11it/s][A
                                               [A 58%|█████▊    | 26/45 [22:24<16:46, 52.99s/it]                                               {'loss': 0.3463, 'grad_norm': 0.10058556497097015, 'learning_rate': 4.4531439581106295e-05, 'ppl': 1.41383, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 113.03112030029297, 'tokens/total': 27262976, 'tokens/trainable': 2206816, 'epoch': 1.73}
 58%|█████▊    | 26/45 [22:24<16:46, 52.99s/it] 60%|██████    | 27/45 [23:13<15:33, 51.87s/it]                                               {'loss': 0.3385, 'grad_norm': 0.10328979045152664, 'learning_rate': 4.0918157451028185e-05, 'ppl': 1.40284, 'memory/max_active (GiB)': 64.13, 'memory/max_allocated (GiB)': 64.13, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 94.68313598632812, 'tokens/total': 28311552, 'tokens/trainable': 2296593, 'epoch': 1.8}
 60%|██████    | 27/45 [23:13<15:33, 51.87s/it] 62%|██████▏   | 28/45 [24:03<14:31, 51.27s/it]                                               {'loss': 0.3226, 'grad_norm': 0.0950852483510971, 'learning_rate': 3.735333088041596e-05, 'ppl': 1.38071, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 90.9925537109375, 'tokens/total': 29360128, 'tokens/trainable': 2384752, 'epoch': 1.87}
 62%|██████▏   | 28/45 [24:03<14:31, 51.27s/it] 64%|██████▍   | 29/45 [24:53<13:31, 50.74s/it]                                               {'loss': 0.3298, 'grad_norm': 0.10383325070142746, 'learning_rate': 3.38559797614277e-05, 'ppl': 1.39069, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 101.099853515625, 'tokens/total': 30408704, 'tokens/trainable': 2468836, 'epoch': 1.93}
 64%|██████▍   | 29/45 [24:53<13:31, 50.74s/it] 67%|██████▋   | 30/45 [25:42<12:35, 50.35s/it]                                               {'loss': 0.3015, 'grad_norm': 0.0998990386724472, 'learning_rate': 3.0444763975492208e-05, 'ppl': 1.35189, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 109.46707153320312, 'tokens/total': 31457280, 'tokens/trainable': 2551514, 'epoch': 2.0}
 67%|██████▋   | 30/45 [25:42<12:35, 50.35s/it][2026-05-25 12:49:07,158] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.07it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.52it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.34it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.24it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.18it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.15it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.13it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A                                               
                                               [A{'eval_loss': 0.299793541431427, 'eval_runtime': 9.494, 'eval_samples_per_second': 4.213, 'eval_steps_per_second': 1.053, 'eval_ppl': 1.34958, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 2.0, 'tokens/train_per_sec_per_gpu': 0.0}
 67%|██████▋   | 30/45 [25:51<12:35, 50.35s/it]
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A
                                               [A[2026-05-25 12:49:16,697] [INFO] [axolotl.core.trainers.base._save:721] [PID:2038090] Saving model checkpoint to out/qwen3-8b-persistent-navigation-20260525_121743/checkpoint-30
[2026-05-25 12:49:16,763] [WARNING] [py.warnings._showwarnmsg:112] [PID:2038090] /e/project1/reformo/salgarkar1/agents_learn/pythonformer-workshop/.venv/lib/python3.12/site-packages/peft/utils/save_and_load.py:295: UserWarning: Could not find a config file in Qwen/Qwen3-8B - will assume that the vocabulary was not modified.
  warnings.warn(

 69%|██████▉   | 31/45 [26:44<12:32, 53.72s/it]                                               {'loss': 0.3205, 'grad_norm': 0.10007507354021072, 'learning_rate': 2.7137883834768073e-05, 'ppl': 1.37782, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 78.93754577636719, 'tokens/total': 32505856, 'tokens/trainable': 2637465, 'epoch': 2.07}
 69%|██████▉   | 31/45 [26:44<12:32, 53.72s/it] 71%|███████   | 32/45 [27:33<11:23, 52.58s/it]                                               {'loss': 0.3047, 'grad_norm': 0.09925687313079834, 'learning_rate': 2.3952982975603496e-05, 'ppl': 1.35622, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 89.30579376220703, 'tokens/total': 33554432, 'tokens/trainable': 2720303, 'epoch': 2.13}
 71%|███████   | 32/45 [27:33<11:23, 52.58s/it] 73%|███████▎  | 33/45 [28:23<10:18, 51.55s/it]                                               {'loss': 0.3074, 'grad_norm': 0.10130885988473892, 'learning_rate': 2.090705422210237e-05, 'ppl': 1.35988, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 95.62286376953125, 'tokens/total': 34603008, 'tokens/trainable': 2800842, 'epoch': 2.2}
 73%|███████▎  | 33/45 [28:23<10:18, 51.55s/it] 76%|███████▌  | 34/45 [29:12<09:19, 50.84s/it]                                               {'loss': 0.3049, 'grad_norm': 0.09997802972793579, 'learning_rate': 1.801634892205545e-05, 'ppl': 1.35649, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 103.69766235351562, 'tokens/total': 35651584, 'tokens/trainable': 2881601, 'epoch': 2.27}
 76%|███████▌  | 34/45 [29:12<09:19, 50.84s/it] 78%|███████▊  | 35/45 [30:02<08:26, 50.63s/it]                                               {'loss': 0.3033, 'grad_norm': 0.09192982316017151, 'learning_rate': 1.5296290238968303e-05, 'ppl': 1.35432, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 123.18505096435547, 'tokens/total': 36700160, 'tokens/trainable': 2968243, 'epoch': 2.33}
 78%|███████▊  | 35/45 [30:02<08:26, 50.63s/it][2026-05-25 12:53:27,635] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.00it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.49it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.32it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.23it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.18it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.14it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.13it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A                                               
                                               [A{'eval_loss': 0.2914044260978699, 'eval_runtime': 9.4106, 'eval_samples_per_second': 4.251, 'eval_steps_per_second': 1.063, 'eval_ppl': 1.33831, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 2.33, 'tokens/train_per_sec_per_gpu': 0.0}
 78%|███████▊  | 35/45 [30:12<08:26, 50.63s/it]
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A
                                               [A 80%|████████  | 36/45 [31:01<07:57, 53.04s/it]                                               {'loss': 0.2719, 'grad_norm': 0.08897636085748672, 'learning_rate': 1.2761390862810907e-05, 'ppl': 1.31246, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 101.69590759277344, 'tokens/total': 37748736, 'tokens/trainable': 3046922, 'epoch': 2.4}
 80%|████████  | 36/45 [31:01<07:57, 53.04s/it] 82%|████████▏ | 37/45 [31:50<06:56, 52.01s/it]                                               {'loss': 0.297, 'grad_norm': 0.09379743784666061, 'learning_rate': 1.0425175578537299e-05, 'ppl': 1.34582, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 127.07369232177734, 'tokens/total': 38797312, 'tokens/trainable': 3132586, 'epoch': 2.47}
 82%|████████▏ | 37/45 [31:50<06:56, 52.01s/it] 84%|████████▍ | 38/45 [32:40<05:59, 51.37s/it]                                               {'loss': 0.2927, 'grad_norm': 0.09285531938076019, 'learning_rate': 8.30010910550611e-06, 'ppl': 1.34004, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 94.80797576904297, 'tokens/total': 39845888, 'tokens/trainable': 3214711, 'epoch': 2.53}
 84%|████████▍ | 38/45 [32:40<05:59, 51.37s/it] 87%|████████▋ | 39/45 [33:30<05:05, 50.93s/it]                                               {'loss': 0.3127, 'grad_norm': 0.09318273514509201, 'learning_rate': 6.397529592809614e-06, 'ppl': 1.36711, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 118.26634979248047, 'tokens/total': 40894464, 'tokens/trainable': 3302910, 'epoch': 2.6}
 87%|████████▋ | 39/45 [33:30<05:05, 50.93s/it] 89%|████████▉ | 40/45 [34:19<04:12, 50.46s/it]                                               {'loss': 0.2925, 'grad_norm': 0.09144666790962219, 'learning_rate': 4.727588125342669e-06, 'ppl': 1.33977, 'memory/max_active (GiB)': 64.13, 'memory/max_allocated (GiB)': 64.13, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 76.71216583251953, 'tokens/total': 41943040, 'tokens/trainable': 3386817, 'epoch': 2.67}
 89%|████████▉ | 40/45 [34:19<04:12, 50.46s/it][2026-05-25 12:57:44,544] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.05it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.50it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.33it/s][A
 50%|█████     | 5/10 [00:03<00:04,  1.24it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.18it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.14it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.13it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.12it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A                                               
                                               [A{'eval_loss': 0.28813081979751587, 'eval_runtime': 9.4108, 'eval_samples_per_second': 4.25, 'eval_steps_per_second': 1.063, 'eval_ppl': 1.33393, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 2.67, 'tokens/train_per_sec_per_gpu': 0.0}
 89%|████████▉ | 40/45 [34:29<04:12, 50.46s/it]
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A
                                               [A 91%|█████████ | 41/45 [35:19<03:32, 53.17s/it]                                               {'loss': 0.2942, 'grad_norm': 0.0968620628118515, 'learning_rate': 3.299194563372604e-06, 'ppl': 1.34205, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 94.30477142333984, 'tokens/total': 42991616, 'tokens/trainable': 3470404, 'epoch': 2.73}
 91%|█████████ | 41/45 [35:19<03:32, 53.17s/it] 93%|█████████▎| 42/45 [36:09<02:36, 52.13s/it]                                               {'loss': 0.3013, 'grad_norm': 0.08996280282735825, 'learning_rate': 2.1199700045797077e-06, 'ppl': 1.35161, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 103.78019714355469, 'tokens/total': 44040192, 'tokens/trainable': 3557431, 'epoch': 2.8}
 93%|█████████▎| 42/45 [36:09<02:36, 52.13s/it] 96%|█████████▌| 43/45 [36:58<01:42, 51.25s/it]                                               {'loss': 0.3084, 'grad_norm': 0.09097972512245178, 'learning_rate': 1.196206122203647e-06, 'ppl': 1.36125, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 105.37864685058594, 'tokens/total': 45088768, 'tokens/trainable': 3644837, 'epoch': 2.87}
 96%|█████████▌| 43/45 [36:58<01:42, 51.25s/it] 98%|█████████▊| 44/45 [37:48<00:50, 50.90s/it]                                               {'loss': 0.3283, 'grad_norm': 0.09021352976560593, 'learning_rate': 5.328315962444874e-07, 'ppl': 1.38861, 'memory/max_active (GiB)': 64.13, 'memory/max_allocated (GiB)': 64.13, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 165.2163848876953, 'tokens/total': 46137344, 'tokens/trainable': 3741057, 'epoch': 2.93}
 98%|█████████▊| 44/45 [37:48<00:50, 50.90s/it]100%|██████████| 45/45 [38:38<00:00, 50.59s/it]                                               {'loss': 0.2815, 'grad_norm': 0.09305893629789352, 'learning_rate': 1.333858168224178e-07, 'ppl': 1.32512, 'memory/max_active (GiB)': 64.12, 'memory/max_allocated (GiB)': 64.12, 'memory/device_reserved (GiB)': 66.97, 'tokens/train_per_sec_per_gpu': 118.26636505126953, 'tokens/total': 47185920, 'tokens/trainable': 3827271, 'epoch': 3.0}
100%|██████████| 45/45 [38:38<00:00, 50.59s/it][2026-05-25 13:02:02,930] [INFO] [axolotl.core.trainers.base.evaluate:400] [PID:2038090] Running evaluation step...

  0%|          | 0/10 [00:00<?, ?it/s][A
 20%|██        | 2/10 [00:00<00:03,  2.31it/s][A
 30%|███       | 3/10 [00:01<00:04,  1.59it/s][A
 40%|████      | 4/10 [00:02<00:04,  1.37it/s][A
 50%|█████     | 5/10 [00:03<00:03,  1.26it/s][A
 60%|██████    | 6/10 [00:04<00:03,  1.20it/s][A
 70%|███████   | 7/10 [00:05<00:02,  1.16it/s][A
 80%|████████  | 8/10 [00:06<00:01,  1.14it/s][A
 90%|█████████ | 9/10 [00:07<00:00,  1.13it/s][A
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A                                               
                                               [A{'eval_loss': 0.2875402569770813, 'eval_runtime': 9.4661, 'eval_samples_per_second': 4.226, 'eval_steps_per_second': 1.056, 'eval_ppl': 1.33314, 'memory/max_active (GiB)': 54.54, 'memory/max_allocated (GiB)': 54.54, 'memory/device_reserved (GiB)': 66.97, 'epoch': 3.0, 'tokens/train_per_sec_per_gpu': 0.0}
100%|██████████| 45/45 [38:47<00:00, 50.59s/it]
100%|██████████| 10/10 [00:08<00:00,  1.10it/s][A
                                               [A[2026-05-25 13:02:12,406] [INFO] [axolotl.core.trainers.base._save:721] [PID:2038090] Saving model checkpoint to out/qwen3-8b-persistent-navigation-20260525_121743/checkpoint-45
[2026-05-25 13:02:12,431] [WARNING] [py.warnings._showwarnmsg:112] [PID:2038090] /e/project1/reformo/salgarkar1/agents_learn/pythonformer-workshop/.venv/lib/python3.12/site-packages/peft/utils/save_and_load.py:295: UserWarning: Could not find a config file in Qwen/Qwen3-8B - will assume that the vocabulary was not modified.
  warnings.warn(

                                               {'train_runtime': 2334.9646, 'train_samples_per_second': 1.233, 'train_steps_per_second': 0.019, 'train_loss': 0.41287357343567743, 'memory/max_active (GiB)': 30.89, 'memory/max_allocated (GiB)': 30.89, 'memory/device_reserved (GiB)': 66.97, 'epoch': 3.0, 'tokens/train_per_sec_per_gpu': 0.0}
100%|██████████| 45/45 [38:48<00:00, 50.59s/it]100%|██████████| 45/45 [38:48<00:00, 51.75s/it]
[2026-05-25 13:02:13,909] [INFO] [axolotl.train.save_trained_model:233] [PID:2038090] Training completed! Saving trained model to out/qwen3-8b-persistent-navigation-20260525_121743.
[2026-05-25 13:02:13,933] [WARNING] [py.warnings._showwarnmsg:112] [PID:2038090] /e/project1/reformo/salgarkar1/agents_learn/pythonformer-workshop/.venv/lib/python3.12/site-packages/peft/utils/save_and_load.py:295: UserWarning: Could not find a config file in Qwen/Qwen3-8B - will assume that the vocabulary was not modified.
  warnings.warn(

[2026-05-25 13:02:14,236] [INFO] [axolotl.train.save_trained_model:351] [PID:2038090] Model successfully saved to out/qwen3-8b-persistent-navigation-20260525_121743