{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "444cce3d",
   "metadata": {},
   "source": [
    "## To find the best hyperparameters we use optuna and wandb."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a380209d-f30f-4d6d-b50c-f41d877e6c8a",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting optuna\n",
      "  Downloading optuna-4.6.0-py3-none-any.whl.metadata (17 kB)\n",
      "Collecting alembic>=1.5.0 (from optuna)\n",
      "  Downloading alembic-1.17.1-py3-none-any.whl.metadata (7.2 kB)\n",
      "Collecting colorlog (from optuna)\n",
      "  Downloading colorlog-6.10.1-py3-none-any.whl.metadata (11 kB)\n",
      "Requirement already satisfied: numpy in /usr/local/lib/python3.12/dist-packages (from optuna) (2.3.4)\n",
      "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from optuna) (25.0)\n",
      "Collecting sqlalchemy>=1.4.2 (from optuna)\n",
      "  Downloading sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.5 kB)\n",
      "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from optuna) (4.67.1)\n",
      "Requirement already satisfied: PyYAML in /usr/local/lib/python3.12/dist-packages (from optuna) (6.0.3)\n",
      "Collecting Mako (from alembic>=1.5.0->optuna)\n",
      "  Downloading mako-1.3.10-py3-none-any.whl.metadata (2.9 kB)\n",
      "Requirement already satisfied: typing-extensions>=4.12 in /usr/local/lib/python3.12/dist-packages (from alembic>=1.5.0->optuna) (4.15.0)\n",
      "Collecting greenlet>=1 (from sqlalchemy>=1.4.2->optuna)\n",
      "  Downloading greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (4.1 kB)\n",
      "Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.12/dist-packages (from Mako->alembic>=1.5.0->optuna) (3.0.3)\n",
      "Downloading optuna-4.6.0-py3-none-any.whl (404 kB)\n",
      "Downloading alembic-1.17.1-py3-none-any.whl (247 kB)\n",
      "Downloading sqlalchemy-2.0.44-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.3/3.3 MB\u001b[0m \u001b[31m21.6 MB/s\u001b[0m  \u001b[33m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (607 kB)\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m607.6/607.6 kB\u001b[0m \u001b[31m91.8 MB/s\u001b[0m  \u001b[33m0:00:00\u001b[0m\n",
      "\u001b[?25hDownloading colorlog-6.10.1-py3-none-any.whl (11 kB)\n",
      "Downloading mako-1.3.10-py3-none-any.whl (78 kB)\n",
      "Installing collected packages: Mako, greenlet, colorlog, sqlalchemy, alembic, optuna\n",
      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m6/6\u001b[0m [optuna]2m5/6\u001b[0m [optuna]]my]\n",
      "\u001b[1A\u001b[2KSuccessfully installed Mako-1.3.10 alembic-1.17.1 colorlog-6.10.1 greenlet-3.2.4 optuna-4.6.0 sqlalchemy-2.0.44\n",
      "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager, possibly rendering your system unusable. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv. Use the --root-user-action option if you know what you are doing and want to suppress this warning.\u001b[0m\u001b[33m\n",
      "\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m25.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m25.3\u001b[0m\n",
      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3.12 -m pip install --upgrade pip\u001b[0m\n",
      "Note: you may need to restart the kernel to use updated packages.\n"
     ]
    }
   ],
   "source": [
    "pip install optuna"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "fe66cb2c-72d2-4d31-a5af-58c48867c879",
   "metadata": {},
   "outputs": [],
   "source": [
    "%%capture\n",
    "import os, importlib.util\n",
    "!pip install --upgrade -qqq uv\n",
    "if importlib.util.find_spec(\"torch\") is None or \"COLAB_\" in \"\".join(os.environ.keys()):\n",
    "    try: import numpy; get_numpy = f\"numpy=={numpy.__version__}\"\n",
    "    except: get_numpy = \"numpy\"\n",
    "    !uv pip install -qqq \\\n",
    "        \"torch>=2.8.0\" \"triton>=3.4.0\" {get_numpy} torchvision bitsandbytes \"transformers==4.56.2\" trackio \\\n",
    "        \"unsloth_zoo[base] @ git+https://github.com/unslothai/unsloth-zoo\" \\\n",
    "        \"unsloth[base] @ git+https://github.com/unslothai/unsloth\" \\\n",
    "        git+https://github.com/triton-lang/triton.git@05b2c186c1b6c9a08375389d5efe9cb4c401c075#subdirectory=python/triton_kernels\n",
    "elif importlib.util.find_spec(\"unsloth\") is None:\n",
    "    !uv pip install -qqq unsloth trackio\n",
    "!uv pip install --upgrade --no-deps transformers==4.56.2 tokenizers trl==0.22.2 unsloth unsloth_zoo wandb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c0709848-bba6-4daf-b1be-6c93a5990f91",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mjdmasciano2\u001b[0m (\u001b[33mjdmasciano2-university-of-lagos\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n"
     ]
    }
   ],
   "source": [
    "!wandb login"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "29bf54f0-46a9-45c3-9431-d364f11e153b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Overwriting fine_tune.py\n"
     ]
    }
   ],
   "source": [
    "%%writefile fine_tune.py\n",
    "import os\n",
    "import json\n",
    "import torch\n",
    "import re\n",
    "from datasets import Dataset, DatasetDict\n",
    "from unsloth import FastLanguageModel\n",
    "from trl import SFTTrainer, SFTConfig\n",
    "import optuna # Import Optuna\n",
    "from unsloth.chat_templates import train_on_responses_only\n",
    "\n",
    "# --- 1. Define the Objective Function for Optuna ---\n",
    "def objective(trial):\n",
    "    \"\"\"\n",
    "    This function will be called by Optuna for each trial.\n",
    "    It defines the hyperparameters to search, trains the model,\n",
    "    and returns the evaluation loss which Optuna will aim to minimize.\n",
    "    \"\"\"\n",
    "    # --- A. Define the search space for hyperparameters based on the Unsloth guide ---\n",
    "    learning_rate = trial.suggest_float(\"learning_rate\", 5e-6, 2e-4, log=True)\n",
    "    lora_rank = trial.suggest_categorical(\"lora_rank\", [32, 64, 128])\n",
    "    # The guide recommends lora_alpha = 2 * lora_rank. We derive it directly.\n",
    "    lora_alpha = lora_rank * 2\n",
    "    weight_decay = trial.suggest_float(\"weight_decay\", 0.0, 0.1)\n",
    "\n",
    "    print(f\"\\n--- Starting Trial {trial.number} with parameters: ---\")\n",
    "    print(f\"  - learning_rate: {learning_rate:.2e}\")\n",
    "    print(f\"  - lora_rank: {lora_rank}\")\n",
    "    print(f\"  - lora_alpha: {lora_alpha}\")\n",
    "    print(f\"  - weight_decay: {weight_decay:.3f}\")\n",
    "\n",
    "    # --- B. Model and Tokenizer Loading ---\n",
    "    max_seq_length = 4096\n",
    "    model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "        model_name=\"unsloth/gpt-oss-20b-BF16\",\n",
    "        load_in_4bit=False, # Set to True if you need to save VRAM (QLoRA)\n",
    "        max_seq_length=max_seq_length,\n",
    "    )\n",
    "\n",
    "    model = FastLanguageModel.get_peft_model(\n",
    "        model,\n",
    "        r=lora_rank,  # From Optuna\n",
    "        target_modules=[\n",
    "            \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "            \"gate_proj\", \"up_proj\", \"down_proj\",\n",
    "        ],\n",
    "        lora_alpha=lora_alpha, # From Optuna\n",
    "        use_gradient_checkpointing=\"unsloth\",\n",
    "        random_state=3407,\n",
    "    )\n",
    "\n",
    "    # --- C. Dataset Loading and Preprocessing (remains the same) ---\n",
    "    ROOT_DIR = \"/workspace/AIAC\"\n",
    "    DATASET_FILE_PATH = os.path.join(ROOT_DIR, \"dipg_sft_.jsonl\")\n",
    "    with open(DATASET_FILE_PATH, \"r\") as f:\n",
    "        raw_data = [json.loads(line) for line in f if line.strip()]\n",
    "    dataset = Dataset.from_list(raw_data)\n",
    "    split_dataset = dataset.train_test_split(test_size=0.1, seed=42)\n",
    "    dataset = DatasetDict({\"train\": split_dataset[\"train\"], \"test\": split_dataset[\"test\"]})\n",
    "\n",
    "    def normalize_messages(messages):\n",
    "        normalized = []\n",
    "        for msg in messages:\n",
    "            if msg[\"role\"] != \"assistant\":\n",
    "                normalized.append(msg)\n",
    "                continue\n",
    "            content = msg[\"content\"]\n",
    "            channels = re.findall(r\"<\\|channel\\|>(.*?)<\\|message\\|>(.*?)<\\|end\\|>\", content, re.DOTALL)\n",
    "            if channels:\n",
    "                thinking, final = \"\", \"\"\n",
    "                for ch, text in channels:\n",
    "                    ch, text = ch.strip(), text.strip()\n",
    "                    if ch == \"analysis\": thinking += text + \"\\n\"\n",
    "                    elif ch == \"proof\": thinking += f\"\\n[Proof Section]\\n{text}\\n\"\n",
    "                    elif ch == \"final\": final += text\n",
    "                normalized.append({\"role\": \"assistant\", \"thinking\": thinking.strip(), \"content\": final.strip()})\n",
    "            else:\n",
    "                normalized.append(msg)\n",
    "        return normalized\n",
    "\n",
    "    def formatting_prompts_func(examples):\n",
    "        convos = [normalize_messages(convo) for convo in examples[\"messages\"]]\n",
    "        return {\"text\": [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]}\n",
    "\n",
    "    dataset = dataset.map(formatting_prompts_func, batched=True)\n",
    "\n",
    "    # --- D. SFTTrainer with Dynamic Hyperparameters ---\n",
    "    trainer = SFTTrainer(\n",
    "        model=model,\n",
    "        tokenizer=tokenizer,\n",
    "        train_dataset=dataset['train'],\n",
    "        eval_dataset=dataset['test'],\n",
    "        args=SFTConfig(\n",
    "            dataset_text_field=\"text\",\n",
    "            per_device_train_batch_size=2,      # Fixed based on your script\n",
    "            gradient_accumulation_steps=4,   # Fixed based on your script\n",
    "            warmup_steps=10,\n",
    "            max_seq_length=4096,\n",
    "            max_steps=11, # Keep this low for a quick hyperparameter search\n",
    "            learning_rate=learning_rate,       # From Optuna\n",
    "            logging_steps=5,\n",
    "            optim=\"adamw_8bit\",\n",
    "            weight_decay=weight_decay,       # From Optuna\n",
    "            lr_scheduler_type=\"linear\",\n",
    "            seed=3407,\n",
    "            eval_strategy=\"steps\",\n",
    "            eval_steps=10,\n",
    "            output_dir=f\"sft_outputs_trial_{trial.number}\", # Unique output dir\n",
    "            report_to=\"wandb\",\n",
    "            run_name=f\"trial-{trial.number}-lr-{learning_rate:.2e}-r-{lora_rank}\" # Descriptive W&B run name\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # This part for training on responses only remains unchanged\n",
    "    gpt_oss_kwargs = dict(instruction_part=\"<|start|>user<|message|>\", response_part=\"<|start|>assistant\")\n",
    "    trainer = train_on_responses_only(trainer, **gpt_oss_kwargs)\n",
    "\n",
    "    # --- E. Train and Evaluate ---\n",
    "    print(f\"--- Starting SFT Training for Trial {trial.number} ---\")\n",
    "    trainer.train()\n",
    "    print(\"--- SFT Training Complete ---\")\n",
    "\n",
    "    eval_results = trainer.evaluate()\n",
    "    eval_loss = eval_results[\"eval_loss\"]\n",
    "    print(f\"--- Trial {trial.number} finished with Eval Loss: {eval_loss} ---\")\n",
    "    \n",
    "    # Clean up to free VRAM for the next trial\n",
    "    del model\n",
    "    del trainer\n",
    "    torch.cuda.empty_cache()\n",
    "\n",
    "    return eval_loss\n",
    "\n",
    "# --- 2. Run the Hyperparameter Search ---\n",
    "if __name__ == \"__main__\":\n",
    "    # Create a study object and specify the direction to optimize.\n",
    "    study = optuna.create_study(direction=\"minimize\", study_name=\"unsloth_finetuning\")\n",
    "    \n",
    "    # Start the optimization. Optuna will call the 'objective' function 'n_trials' times.\n",
    "    # Increase n_trials for a more thorough search (e.g., 20-50).\n",
    "    study.optimize(objective, n_trials=10)\n",
    "\n",
    "    print(\"\\n\\n--- Hyperparameter Search Complete ---\")\n",
    "    print(\"Best trial:\")\n",
    "    best_trial = study.best_trial\n",
    "    \n",
    "    print(f\"  Value (min eval_loss): {best_trial.value}\")\n",
    "    \n",
    "    print(\"  Best Parameters: \")\n",
    "    for key, value in best_trial.params.items():\n",
    "        print(f\"    {key}: {value}\")\n",
    "    \n",
    "    # You can also get a dataframe with all trial results\n",
    "    df = study.trials_dataframe()\n",
    "    print(\"\\n--- All Trials ---\")\n",
    "    print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "26a0654b-2768-40d4-a9e0-c6001f163b8f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
      "#### Unsloth: `hf_xet==1.1.10` and `ipykernel>6.30.1` breaks progress bars. Disabling for now in XET.\n",
      "#### Unsloth: To re-enable progress bars, please downgrade to `ipykernel==6.30.1` or wait for a fix to\n",
      "https://github.com/huggingface/xet-core/issues/526\n",
      "INFO 11-13 15:12:29 [__init__.py:225] Automatically detected platform rocm.\n",
      "🦥 Unsloth Zoo will now patch everything to make training faster!\n",
      "\u001b[32m[I 2025-11-13 15:12:33,207]\u001b[0m A new study created in memory with name: unsloth_finetuning\u001b[0m\n",
      "\n",
      "--- Starting Trial 0 with parameters: ---\n",
      "  - learning_rate: 2.09e-05\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 256\n",
      "  - weight_decay: 0.039\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:19<00:00,  2.19s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 6437.47 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 6146.85 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:10<00:00, 83.14 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:08<00:00, 11.47 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:01<00:00, 849.22 examples/s]\n",
      "Map (num_proc=24): 100%|██████████████| 100/100 [00:00<00:00, 108.45 examples/s]\n",
      "--- Starting SFT Training for Trial 0 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mjdmasciano2\u001b[0m (\u001b[33mjdmasciano2-university-of-lagos\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m Waiting for wandb.init()...\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m Waiting for wandb.init()...\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.23.0\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/workspace/AIAC/OpenEnv/wandb/run-20251113_151330-z9cgpnww\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mtrial-0-lr-2.09e-05-r-128\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/huggingface\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/huggingface/runs/z9cgpnww\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Detected [huggingface_hub.inference, openai] in use.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/\n",
      "  0%|                                                    | 0/11 [00:00<?, ?it/s]Unsloth: Will smartly offload gradients to save VRAM!\n",
      "{'loss': 2.7869, 'grad_norm': 77.70904541015625, 'learning_rate': 8.371268304374135e-06, 'epoch': 0.04}\n",
      "{'loss': 1.3593, 'grad_norm': 9.689935684204102, 'learning_rate': 1.8835353684841802e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:49<00:06,  6.45s/it]Unsloth: Not an error, but GptOssForCausalLM does not accept `num_items_in_batch`.\n",
      "Using gradient accumulation will be very slightly less accurate.\n",
      "Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient\n",
      "\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:01<00:07,  2.78it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:02<00:12,  1.68it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:03<00:14,  1.34it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:04<00:16,  1.17it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:05<00:16,  1.11it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:06<00:15,  1.07it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:07<00:15,  1.03it/s]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:08<00:15,  1.00s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:09<00:14,  1.01s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:10<00:13,  1.03s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:11<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:12<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:13<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:14<00:09,  1.07s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:15<00:08,  1.06s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:16<00:07,  1.06s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.07s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.06s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.06s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.07s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:22<00:02,  1.09s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:23<00:01,  1.09s/it]\u001b[A\n",
      "                                                                                \u001b[A\n",
      "\u001b[A{'eval_loss': 0.9976342916488647, 'eval_runtime': 28.1507, 'eval_samples_per_second': 3.552, 'eval_steps_per_second': 0.888, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [02:17<00:06,  6.45s/it]\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.06s/it]\u001b[A\n",
      "{'train_runtime': 144.9926, 'train_samples_per_second': 0.607, 'train_steps_per_second': 0.076, 'train_loss': 1.9628322883085771, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [02:24<00:00, 13.09s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.22it/s]\n",
      "--- Trial 0 finished with Eval Loss: 0.9015486240386963 ---\n",
      "\u001b[32m[I 2025-11-13 15:16:15,716]\u001b[0m Trial 0 finished with value: 0.9015486240386963 and parameters: {'learning_rate': 2.0928170760935337e-05, 'lora_rank': 128, 'weight_decay': 0.0394570371953359}. Best is trial 0 with value: 0.9015486240386963.\u001b[0m\n",
      "\n",
      "--- Starting Trial 1 with parameters: ---\n",
      "  - learning_rate: 7.14e-06\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 256\n",
      "  - weight_decay: 0.090\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:19<00:00,  2.19s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8329.52 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 5767.59 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:12<00:00, 70.14 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.81 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:03<00:00, 249.24 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:03<00:00, 29.36 examples/s]\n",
      "--- Starting SFT Training for Trial 1 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "{'loss': 3.0305, 'grad_norm': 118.49494171142578, 'learning_rate': 2.8576317041930847e-06, 'epoch': 0.04}\n",
      "{'loss': 1.9805, 'grad_norm': 14.71914005279541, 'learning_rate': 6.42967133443444e-06, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:59<00:05,  5.90s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.4098992347717285, 'eval_runtime': 28.8196, 'eval_samples_per_second': 3.47, 'eval_steps_per_second': 0.867, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:27<00:05,  5.90s/it]\u001b[A\n",
      "{'train_runtime': 94.7375, 'train_samples_per_second': 0.929, 'train_steps_per_second': 0.116, 'train_loss': 2.392287102612582, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:34<00:00,  8.61s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 1 finished with Eval Loss: 1.3500356674194336 ---\n",
      "\u001b[32m[I 2025-11-13 15:19:20,365]\u001b[0m Trial 1 finished with value: 1.3500356674194336 and parameters: {'learning_rate': 7.144079260482711e-06, 'lora_rank': 128, 'weight_decay': 0.09035875217529261}. Best is trial 0 with value: 0.9015486240386963.\u001b[0m\n",
      "\n",
      "--- Starting Trial 2 with parameters: ---\n",
      "  - learning_rate: 8.19e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 64\n",
      "  - weight_decay: 0.087\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:19<00:00,  2.22s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 7358.37 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 6590.98 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:12<00:00, 69.38 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.68 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:03<00:00, 233.66 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:03<00:00, 26.49 examples/s]\n",
      "--- Starting SFT Training for Trial 2 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 2.7912, 'grad_norm': 37.56709671020508, 'learning_rate': 3.277168417926272e-05, 'epoch': 0.04}\n",
      "{'loss': 1.3364, 'grad_norm': 4.98887825012207, 'learning_rate': 7.373628940334112e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:59<00:05,  5.94s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.86it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 0.9429023265838623, 'eval_runtime': 26.2323, 'eval_samples_per_second': 3.812, 'eval_steps_per_second': 0.953, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:25<00:05,  5.94s/it]\u001b[A\n",
      "{'train_runtime': 92.4158, 'train_samples_per_second': 0.952, 'train_steps_per_second': 0.119, 'train_loss': 1.9490963816642761, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:32<00:00,  8.40s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 2 finished with Eval Loss: 0.8443965315818787 ---\n",
      "\u001b[32m[I 2025-11-13 15:22:23,369]\u001b[0m Trial 2 finished with value: 0.8443965315818787 and parameters: {'learning_rate': 8.192921044815679e-05, 'lora_rank': 32, 'weight_decay': 0.08678581832249577}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "--- Starting Trial 3 with parameters: ---\n",
      "  - learning_rate: 7.04e-06\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 256\n",
      "  - weight_decay: 0.049\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:20<00:00,  2.25s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8249.92 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 5909.47 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:13<00:00, 65.44 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.50 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 218.52 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:03<00:00, 26.54 examples/s]\n",
      "--- Starting SFT Training for Trial 3 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "{'loss': 3.037, 'grad_norm': 124.63041687011719, 'learning_rate': 2.815465834940478e-06, 'epoch': 0.04}\n",
      "{'loss': 1.99, 'grad_norm': 15.042612075805664, 'learning_rate': 6.334798128616076e-06, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:59<00:05,  5.93s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.06s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.4143542051315308, 'eval_runtime': 26.2652, 'eval_samples_per_second': 3.807, 'eval_steps_per_second': 0.952, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:25<00:05,  5.93s/it]\u001b[A\n",
      "{'train_runtime': 92.5776, 'train_samples_per_second': 0.951, 'train_steps_per_second': 0.119, 'train_loss': 2.4003116434270684, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:32<00:00,  8.42s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.22it/s]\n",
      "--- Trial 3 finished with Eval Loss: 1.3569890260696411 ---\n",
      "\u001b[32m[I 2025-11-13 15:25:29,294]\u001b[0m Trial 3 finished with value: 1.3569890260696411 and parameters: {'learning_rate': 7.038664587351195e-06, 'lora_rank': 128, 'weight_decay': 0.049447486967482614}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "--- Starting Trial 4 with parameters: ---\n",
      "  - learning_rate: 2.24e-05\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 256\n",
      "  - weight_decay: 0.090\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:20<00:00,  2.27s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8475.22 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 6209.74 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:13<00:00, 64.70 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.70 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:03<00:00, 227.32 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 24.70 examples/s]\n",
      "--- Starting SFT Training for Trial 4 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "{'loss': 2.7525, 'grad_norm': 64.54015350341797, 'learning_rate': 8.948000978762086e-06, 'epoch': 0.04}\n",
      "{'loss': 1.3287, 'grad_norm': 9.296405792236328, 'learning_rate': 2.0133002202214694e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:59<00:05,  5.99s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 0.9672366976737976, 'eval_runtime': 26.2347, 'eval_samples_per_second': 3.812, 'eval_steps_per_second': 0.953, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:25<00:05,  5.99s/it]\u001b[A\n",
      "{'train_runtime': 93.0178, 'train_samples_per_second': 0.946, 'train_steps_per_second': 0.118, 'train_loss': 1.9303940534591675, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:33<00:00,  8.46s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 4 finished with Eval Loss: 0.8703588843345642 ---\n",
      "\u001b[32m[I 2025-11-13 15:28:35,479]\u001b[0m Trial 4 finished with value: 0.8703588843345642 and parameters: {'learning_rate': 2.2370002446905215e-05, 'lora_rank': 128, 'weight_decay': 0.08975212186644935}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "--- Starting Trial 5 with parameters: ---\n",
      "  - learning_rate: 1.52e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 64\n",
      "  - weight_decay: 0.045\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:20<00:00,  2.30s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8275.78 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 5283.56 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:14<00:00, 60.47 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.62 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 219.41 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 24.59 examples/s]\n",
      "--- Starting SFT Training for Trial 5 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 3.0616, 'grad_norm': 61.25464630126953, 'learning_rate': 6.087775821319552e-06, 'epoch': 0.04}\n",
      "{'loss': 2.5212, 'grad_norm': 28.188945770263672, 'learning_rate': 1.3697495597968993e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:59<00:05,  5.92s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.07s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.6733813285827637, 'eval_runtime': 26.2051, 'eval_samples_per_second': 3.816, 'eval_steps_per_second': 0.954, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:25<00:05,  5.92s/it]\u001b[A\n",
      "{'train_runtime': 92.0661, 'train_samples_per_second': 0.956, 'train_steps_per_second': 0.119, 'train_loss': 2.6633119799874048, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:32<00:00,  8.37s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 5 finished with Eval Loss: 1.5352014303207397 ---\n",
      "\u001b[32m[I 2025-11-13 15:31:42,240]\u001b[0m Trial 5 finished with value: 1.5352014303207397 and parameters: {'learning_rate': 1.521943955329888e-05, 'lora_rank': 32, 'weight_decay': 0.04545375486923469}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "--- Starting Trial 6 with parameters: ---\n",
      "  - learning_rate: 2.15e-05\n",
      "  - lora_rank: 64\n",
      "  - lora_alpha: 128\n",
      "  - weight_decay: 0.033\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:20<00:00,  2.33s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8405.76 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 5811.38 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:14<00:00, 64.15 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.39 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 212.98 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 24.07 examples/s]\n",
      "--- Starting SFT Training for Trial 6 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 31,850,496 of 20,946,607,680 (0.15% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 2.9658, 'grad_norm': 75.81187438964844, 'learning_rate': 8.616232381310336e-06, 'epoch': 0.04}\n",
      "{'loss': 1.6797, 'grad_norm': 9.429723739624023, 'learning_rate': 1.9386522857948256e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:59<00:05,  5.92s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.86it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.2612221240997314, 'eval_runtime': 26.2416, 'eval_samples_per_second': 3.811, 'eval_steps_per_second': 0.953, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:25<00:05,  5.92s/it]\u001b[A\n",
      "{'train_runtime': 92.1837, 'train_samples_per_second': 0.955, 'train_steps_per_second': 0.119, 'train_loss': 2.213920690796592, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:32<00:00,  8.38s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 6 finished with Eval Loss: 1.170252799987793 ---\n",
      "\u001b[32m[I 2025-11-13 15:34:48,516]\u001b[0m Trial 6 finished with value: 1.170252799987793 and parameters: {'learning_rate': 2.154058095327584e-05, 'lora_rank': 64, 'weight_decay': 0.03255593976857176}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "--- Starting Trial 7 with parameters: ---\n",
      "  - learning_rate: 1.68e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 64\n",
      "  - weight_decay: 0.077\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:20<00:00,  2.33s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8197.35 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 5942.54 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:15<00:00, 58.61 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.45 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 212.04 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 23.61 examples/s]\n",
      "--- Starting SFT Training for Trial 7 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 3.0582, 'grad_norm': 59.004791259765625, 'learning_rate': 6.713181835898287e-06, 'epoch': 0.04}\n",
      "{'loss': 2.4503, 'grad_norm': 19.93524742126465, 'learning_rate': 1.5104659130771146e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:59<00:05,  5.97s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.88it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.18it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.03s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.02s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.03s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.04s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.05s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.07s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.04s/it]\u001b[A\n",
      "{'eval_loss': 1.5939886569976807, 'eval_runtime': 26.1096, 'eval_samples_per_second': 3.83, 'eval_steps_per_second': 0.958, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:26<00:05,  5.97s/it]\u001b[A\n",
      "{'train_runtime': 92.7195, 'train_samples_per_second': 0.949, 'train_steps_per_second': 0.119, 'train_loss': 2.627083019776778, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:32<00:00,  8.43s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 7 finished with Eval Loss: 1.4856449365615845 ---\n",
      "\u001b[32m[I 2025-11-13 15:37:57,122]\u001b[0m Trial 7 finished with value: 1.4856449365615845 and parameters: {'learning_rate': 1.6782954589745716e-05, 'lora_rank': 32, 'weight_decay': 0.07741041599887828}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "--- Starting Trial 8 with parameters: ---\n",
      "  - learning_rate: 8.98e-06\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 256\n",
      "  - weight_decay: 0.046\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:21<00:00,  2.37s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 7913.48 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 5414.66 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:14<00:00, 60.60 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:12<00:00,  8.25 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 207.85 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 23.49 examples/s]\n",
      "--- Starting SFT Training for Trial 8 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "{'loss': 3.0073, 'grad_norm': 115.39823150634766, 'learning_rate': 3.5921144815540457e-06, 'epoch': 0.04}\n",
      "{'loss': 1.7997, 'grad_norm': 13.34203052520752, 'learning_rate': 8.082257583496602e-06, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:00<00:06,  6.02s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.337899923324585, 'eval_runtime': 26.2353, 'eval_samples_per_second': 3.812, 'eval_steps_per_second': 0.953, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:26<00:06,  6.02s/it]\u001b[A\n",
      "{'train_runtime': 93.4668, 'train_samples_per_second': 0.942, 'train_steps_per_second': 0.118, 'train_loss': 2.293670654296875, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:33<00:00,  8.50s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 8 finished with Eval Loss: 1.2611559629440308 ---\n",
      "\u001b[32m[I 2025-11-13 15:41:07,265]\u001b[0m Trial 8 finished with value: 1.2611559629440308 and parameters: {'learning_rate': 8.980286203885114e-06, 'lora_rank': 128, 'weight_decay': 0.0457745011744784}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "--- Starting Trial 9 with parameters: ---\n",
      "  - learning_rate: 2.86e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 64\n",
      "  - weight_decay: 0.022\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:21<00:00,  2.42s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8017.86 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 6851.98 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:14<00:00, 62.93 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:12<00:00,  8.24 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 211.99 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 23.57 examples/s]\n",
      "--- Starting SFT Training for Trial 9 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 3.0237, 'grad_norm': 56.65327072143555, 'learning_rate': 1.14373741019382e-05, 'epoch': 0.04}\n",
      "{'loss': 1.925, 'grad_norm': 7.131019592285156, 'learning_rate': 2.573409172936095e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:00<00:06,  6.03s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.3935586214065552, 'eval_runtime': 26.2375, 'eval_samples_per_second': 3.811, 'eval_steps_per_second': 0.953, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:26<00:06,  6.03s/it]\u001b[A\n",
      "{'train_runtime': 93.1052, 'train_samples_per_second': 0.945, 'train_steps_per_second': 0.118, 'train_loss': 2.362549890171398, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:33<00:00,  8.46s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 9 finished with Eval Loss: 1.3239474296569824 ---\n",
      "\u001b[32m[I 2025-11-13 15:44:16,446]\u001b[0m Trial 9 finished with value: 1.3239474296569824 and parameters: {'learning_rate': 2.8593435254845497e-05, 'lora_rank': 32, 'weight_decay': 0.021526086062592188}. Best is trial 2 with value: 0.8443965315818787.\u001b[0m\n",
      "\n",
      "\n",
      "--- Hyperparameter Search Complete ---\n",
      "Best trial:\n",
      "  Value (min eval_loss): 0.8443965315818787\n",
      "  Best Parameters: \n",
      "    learning_rate: 8.192921044815679e-05\n",
      "    lora_rank: 32\n",
      "    weight_decay: 0.08678581832249577\n",
      "\n",
      "--- All Trials ---\n",
      "   number     value  ... params_weight_decay     state\n",
      "0       0  0.901549  ...            0.039457  COMPLETE\n",
      "1       1  1.350036  ...            0.090359  COMPLETE\n",
      "2       2  0.844397  ...            0.086786  COMPLETE\n",
      "3       3  1.356989  ...            0.049447  COMPLETE\n",
      "4       4  0.870359  ...            0.089752  COMPLETE\n",
      "5       5  1.535201  ...            0.045454  COMPLETE\n",
      "6       6  1.170253  ...            0.032556  COMPLETE\n",
      "7       7  1.485645  ...            0.077410  COMPLETE\n",
      "8       8  1.261156  ...            0.045775  COMPLETE\n",
      "9       9  1.323947  ...            0.021526  COMPLETE\n",
      "\n",
      "[10 rows x 9 columns]\n",
      "\u001b[1;34mwandb\u001b[0m: \n",
      "\u001b[1;34mwandb\u001b[0m: 🚀 View run \u001b[33mtrial-0-lr-2.09e-05-r-128\u001b[0m at: \u001b[34m\u001b[0m\n",
      "\u001b[1;34mwandb\u001b[0m: Find logs at: \u001b[1;35mwandb/run-20251113_151330-z9cgpnww/logs\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!python fine_tune.py"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ed291deb",
   "metadata": {},
   "source": [
    "## We do a separate run using lora_alpha = lora_rank."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b3386884-1eb7-491e-b3cf-4745470e88d7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing f_tune.py\n"
     ]
    }
   ],
   "source": [
    "%%writefile f_tune.py\n",
    "import os\n",
    "import json\n",
    "import torch\n",
    "import re\n",
    "from datasets import Dataset, DatasetDict\n",
    "from unsloth import FastLanguageModel\n",
    "from trl import SFTTrainer, SFTConfig\n",
    "import optuna # Import Optuna\n",
    "from unsloth.chat_templates import train_on_responses_only\n",
    "\n",
    "# --- 1. Define the Objective Function for Optuna ---\n",
    "def objective(trial):\n",
    "    \"\"\"\n",
    "    This function will be called by Optuna for each trial.\n",
    "    It defines the hyperparameters to search, trains the model,\n",
    "    and returns the evaluation loss which Optuna will aim to minimize.\n",
    "    \"\"\"\n",
    "    # --- A. Define the search space for hyperparameters based on the Unsloth guide ---\n",
    "    learning_rate = trial.suggest_float(\"learning_rate\", 5e-6, 2e-4, log=True)\n",
    "    lora_rank = trial.suggest_categorical(\"lora_rank\", [32, 64, 128])\n",
    "    # The guide also talks about lora_alpha = lora_rank. We derive it directly.\n",
    "    lora_alpha = lora_rank\n",
    "    weight_decay = trial.suggest_float(\"weight_decay\", 0.0, 0.1)\n",
    "\n",
    "    print(f\"\\n--- Starting Trial {trial.number} with parameters: ---\")\n",
    "    print(f\"  - learning_rate: {learning_rate:.2e}\")\n",
    "    print(f\"  - lora_rank: {lora_rank}\")\n",
    "    print(f\"  - lora_alpha: {lora_alpha}\")\n",
    "    print(f\"  - weight_decay: {weight_decay:.3f}\")\n",
    "\n",
    "    # --- B. Model and Tokenizer Loading ---\n",
    "    max_seq_length = 4096\n",
    "    model, tokenizer = FastLanguageModel.from_pretrained(\n",
    "        model_name=\"unsloth/gpt-oss-20b-BF16\",\n",
    "        load_in_4bit=False, # Set to True if you need to save VRAM (QLoRA)\n",
    "        max_seq_length=max_seq_length,\n",
    "    )\n",
    "\n",
    "    model = FastLanguageModel.get_peft_model(\n",
    "        model,\n",
    "        r=lora_rank,  # From Optuna\n",
    "        target_modules=[\n",
    "            \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "            \"gate_proj\", \"up_proj\", \"down_proj\",\n",
    "        ],\n",
    "        lora_alpha=lora_alpha, # From Optuna\n",
    "        use_gradient_checkpointing=\"unsloth\",\n",
    "        random_state=3407,\n",
    "    )\n",
    "\n",
    "    # --- C. Dataset Loading and Preprocessing (remains the same) ---\n",
    "    ROOT_DIR = \"/workspace/AIAC\"\n",
    "    DATASET_FILE_PATH = os.path.join(ROOT_DIR, \"dipg_sft_.jsonl\")\n",
    "    with open(DATASET_FILE_PATH, \"r\") as f:\n",
    "        raw_data = [json.loads(line) for line in f if line.strip()]\n",
    "    dataset = Dataset.from_list(raw_data)\n",
    "    split_dataset = dataset.train_test_split(test_size=0.1, seed=42)\n",
    "    dataset = DatasetDict({\"train\": split_dataset[\"train\"], \"test\": split_dataset[\"test\"]})\n",
    "\n",
    "    def normalize_messages(messages):\n",
    "        normalized = []\n",
    "        for msg in messages:\n",
    "            if msg[\"role\"] != \"assistant\":\n",
    "                normalized.append(msg)\n",
    "                continue\n",
    "            content = msg[\"content\"]\n",
    "            channels = re.findall(r\"<\\|channel\\|>(.*?)<\\|message\\|>(.*?)<\\|end\\|>\", content, re.DOTALL)\n",
    "            if channels:\n",
    "                thinking, final = \"\", \"\"\n",
    "                for ch, text in channels:\n",
    "                    ch, text = ch.strip(), text.strip()\n",
    "                    if ch == \"analysis\": thinking += text + \"\\n\"\n",
    "                    elif ch == \"proof\": thinking += f\"\\n[Proof Section]\\n{text}\\n\"\n",
    "                    elif ch == \"final\": final += text\n",
    "                normalized.append({\"role\": \"assistant\", \"thinking\": thinking.strip(), \"content\": final.strip()})\n",
    "            else:\n",
    "                normalized.append(msg)\n",
    "        return normalized\n",
    "\n",
    "    def formatting_prompts_func(examples):\n",
    "        convos = [normalize_messages(convo) for convo in examples[\"messages\"]]\n",
    "        return {\"text\": [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]}\n",
    "\n",
    "    dataset = dataset.map(formatting_prompts_func, batched=True)\n",
    "\n",
    "    # --- D. SFTTrainer with Dynamic Hyperparameters ---\n",
    "    trainer = SFTTrainer(\n",
    "        model=model,\n",
    "        tokenizer=tokenizer,\n",
    "        train_dataset=dataset['train'],\n",
    "        eval_dataset=dataset['test'],\n",
    "        args=SFTConfig(\n",
    "            dataset_text_field=\"text\",\n",
    "            per_device_train_batch_size=2,      # Fixed based on your script\n",
    "            gradient_accumulation_steps=4,   # Fixed based on your script\n",
    "            warmup_steps=10,\n",
    "            max_seq_length=4096,\n",
    "            max_steps=11, # Keep this low for a quick hyperparameter search\n",
    "            learning_rate=learning_rate,       # From Optuna\n",
    "            logging_steps=5,\n",
    "            optim=\"adamw_8bit\",\n",
    "            weight_decay=weight_decay,       # From Optuna\n",
    "            lr_scheduler_type=\"linear\",\n",
    "            seed=3407,\n",
    "            eval_strategy=\"steps\",\n",
    "            eval_steps=10,\n",
    "            output_dir=f\"sft_outputs_trial_{trial.number}\", # Unique output dir\n",
    "            report_to=\"wandb\",\n",
    "            run_name=f\"trial-{trial.number}-lr-{learning_rate:.2e}-r-{lora_rank}\" # Descriptive W&B run name\n",
    "        ),\n",
    "    )\n",
    "\n",
    "    # This part for training on responses only remains unchanged\n",
    "    gpt_oss_kwargs = dict(instruction_part=\"<|start|>user<|message|>\", response_part=\"<|start|>assistant\")\n",
    "    trainer = train_on_responses_only(trainer, **gpt_oss_kwargs)\n",
    "\n",
    "    # --- E. Train and Evaluate ---\n",
    "    print(f\"--- Starting SFT Training for Trial {trial.number} ---\")\n",
    "    trainer.train()\n",
    "    print(\"--- SFT Training Complete ---\")\n",
    "\n",
    "    eval_results = trainer.evaluate()\n",
    "    eval_loss = eval_results[\"eval_loss\"]\n",
    "    print(f\"--- Trial {trial.number} finished with Eval Loss: {eval_loss} ---\")\n",
    "    \n",
    "    # Clean up to free VRAM for the next trial\n",
    "    del model\n",
    "    del trainer\n",
    "    torch.cuda.empty_cache()\n",
    "\n",
    "    return eval_loss\n",
    "\n",
    "# --- 2. Run the Hyperparameter Search ---\n",
    "if __name__ == \"__main__\":\n",
    "    # Create a study object and specify the direction to optimize.\n",
    "    study = optuna.create_study(direction=\"minimize\", study_name=\"unsloth_finetuning_l_r\")\n",
    "    \n",
    "    # Start the optimization. Optuna will call the 'objective' function 'n_trials' times.\n",
    "    # Increase n_trials for a more thorough search (e.g., 20-50).\n",
    "    study.optimize(objective, n_trials=10)\n",
    "\n",
    "    print(\"\\n\\n--- Hyperparameter Search Complete ---\")\n",
    "    print(\"Best trial:\")\n",
    "    best_trial = study.best_trial\n",
    "    \n",
    "    print(f\"  Value (min eval_loss): {best_trial.value}\")\n",
    "    \n",
    "    print(\"  Best Parameters: \")\n",
    "    for key, value in best_trial.params.items():\n",
    "        print(f\"    {key}: {value}\")\n",
    "    \n",
    "    # You can also get a dataframe with all trial results\n",
    "    df = study.trials_dataframe()\n",
    "    print(\"\\n--- All Trials ---\")\n",
    "    print(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "2b646549-f498-4524-a1d9-1951ff6831da",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.\n",
      "#### Unsloth: `hf_xet==1.1.10` and `ipykernel>6.30.1` breaks progress bars. Disabling for now in XET.\n",
      "#### Unsloth: To re-enable progress bars, please downgrade to `ipykernel==6.30.1` or wait for a fix to\n",
      "https://github.com/huggingface/xet-core/issues/526\n",
      "INFO 11-13 15:53:42 [__init__.py:225] Automatically detected platform rocm.\n",
      "🦥 Unsloth Zoo will now patch everything to make training faster!\n",
      "\u001b[32m[I 2025-11-13 15:53:45,819]\u001b[0m A new study created in memory with name: unsloth_finetuning_l_r\u001b[0m\n",
      "\n",
      "--- Starting Trial 0 with parameters: ---\n",
      "  - learning_rate: 1.15e-05\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 128\n",
      "  - weight_decay: 0.083\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:15<00:00,  1.74s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8761.46 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 9272.87 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:07<00:00, 112.84\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:05<00:00, 18.24 \n",
      "Map (num_proc=24): 100%|█████████████| 900/900 [00:00<00:00, 1166.64 examples/s]\n",
      "Map (num_proc=24): 100%|██████████████| 100/100 [00:00<00:00, 167.06 examples/s]\n",
      "--- Starting SFT Training for Trial 0 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mjdmasciano2\u001b[0m (\u001b[33mjdmasciano2-university-of-lagos\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m Waiting for wandb.init()...\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m Waiting for wandb.init()...\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.23.0\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/workspace/AIAC/OpenEnv/wandb/run-20251113_155424-j2kz7jrl\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mtrial-0-lr-1.15e-05-r-128\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/huggingface\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/jdmasciano2-university-of-lagos/huggingface/runs/j2kz7jrl\u001b[0m\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Detected [huggingface_hub.inference, openai] in use.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.\n",
      "\u001b[34m\u001b[1mwandb\u001b[0m: For more information, check out the docs at: https://weave-docs.wandb.ai/\n",
      "  0%|                                                    | 0/11 [00:00<?, ?it/s]Unsloth: Will smartly offload gradients to save VRAM!\n",
      "{'loss': 3.0351, 'grad_norm': 58.99226379394531, 'learning_rate': 4.591996104844592e-06, 'epoch': 0.04}\n",
      "{'loss': 2.1693, 'grad_norm': 9.480472564697266, 'learning_rate': 1.0331991235900333e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:26<00:05,  5.09s/it]Unsloth: Not an error, but GptOssForCausalLM does not accept `num_items_in_batch`.\n",
      "Using gradient accumulation will be very slightly less accurate.\n",
      "Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient\n",
      "\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:01<00:07,  2.82it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:02<00:12,  1.70it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:03<00:14,  1.35it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:04<00:16,  1.17it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:05<00:16,  1.12it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:06<00:15,  1.07it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:07<00:15,  1.03it/s]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:08<00:15,  1.00s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:09<00:14,  1.00s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:10<00:13,  1.03s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:11<00:12,  1.02s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:12<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:13<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:14<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:15<00:08,  1.06s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:16<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:17<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.06s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.06s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.07s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:22<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:23<00:01,  1.08s/it]\u001b[A\n",
      "                                                                                \u001b[A\n",
      "\u001b[A{'eval_loss': 1.4603428840637207, 'eval_runtime': 27.7609, 'eval_samples_per_second': 3.602, 'eval_steps_per_second': 0.901, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:54<00:05,  5.09s/it]\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.06s/it]\u001b[A\n",
      "{'train_runtime': 120.7813, 'train_samples_per_second': 0.729, 'train_steps_per_second': 0.091, 'train_loss': 2.4844610691070557, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:59<00:00, 10.89s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.22it/s]\n",
      "--- Trial 0 finished with Eval Loss: 1.403989553451538 ---\n",
      "\u001b[32m[I 2025-11-13 15:56:45,950]\u001b[0m Trial 0 finished with value: 1.403989553451538 and parameters: {'learning_rate': 1.147999026211148e-05, 'lora_rank': 128, 'weight_decay': 0.08303899112718628}. Best is trial 0 with value: 1.403989553451538.\u001b[0m\n",
      "\n",
      "--- Starting Trial 1 with parameters: ---\n",
      "  - learning_rate: 2.02e-05\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 128\n",
      "  - weight_decay: 0.100\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:15<00:00,  1.68s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|██████████████████████████| 900/900 [00:00<00:00, 11571.42 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 8980.80 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:09<00:00, 97.26 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:07<00:00, 13.22 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:02<00:00, 348.80 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:02<00:00, 39.19 examples/s]\n",
      "--- Starting SFT Training for Trial 1 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "{'loss': 2.981, 'grad_norm': 56.9263916015625, 'learning_rate': 8.084106928858149e-06, 'epoch': 0.04}\n",
      "{'loss': 1.7144, 'grad_norm': 6.855598449707031, 'learning_rate': 1.8189240589930836e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:48<00:04,  4.77s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.86it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.2819827795028687, 'eval_runtime': 26.8086, 'eval_samples_per_second': 3.73, 'eval_steps_per_second': 0.933, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:15<00:04,  4.77s/it]\u001b[A\n",
      "{'train_runtime': 81.1825, 'train_samples_per_second': 1.084, 'train_steps_per_second': 0.135, 'train_loss': 2.2385106086730957, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:21<00:00,  7.38s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 1 finished with Eval Loss: 1.1960686445236206 ---\n",
      "\u001b[32m[I 2025-11-13 15:59:13,651]\u001b[0m Trial 1 finished with value: 1.1960686445236206 and parameters: {'learning_rate': 2.021026732214537e-05, 'lora_rank': 128, 'weight_decay': 0.09959635071503486}. Best is trial 1 with value: 1.1960686445236206.\u001b[0m\n",
      "\n",
      "--- Starting Trial 2 with parameters: ---\n",
      "  - learning_rate: 3.10e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 32\n",
      "  - weight_decay: 0.097\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:15<00:00,  1.73s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|██████████████████████████| 900/900 [00:00<00:00, 11442.27 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 8786.64 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:11<00:00, 76.83 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:08<00:00, 12.37 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:02<00:00, 308.40 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:02<00:00, 35.14 examples/s]\n",
      "--- Starting SFT Training for Trial 2 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 3.0653, 'grad_norm': 29.019315719604492, 'learning_rate': 1.2411594169876688e-05, 'epoch': 0.04}\n",
      "{'loss': 2.4765, 'grad_norm': 10.762125968933105, 'learning_rate': 2.7926086882222547e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:48<00:04,  4.68s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.03s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.04s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.05s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.05s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.07s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.04s/it]\u001b[A\n",
      "{'eval_loss': 1.5995140075683594, 'eval_runtime': 26.1037, 'eval_samples_per_second': 3.831, 'eval_steps_per_second': 0.958, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:14<00:04,  4.68s/it]\u001b[A\n",
      "{'train_runtime': 79.4451, 'train_samples_per_second': 1.108, 'train_steps_per_second': 0.138, 'train_loss': 2.6429914344440806, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:19<00:00,  7.22s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 2 finished with Eval Loss: 1.490930199623108 ---\n",
      "\u001b[32m[I 2025-11-13 16:01:42,755]\u001b[0m Trial 2 finished with value: 1.490930199623108 and parameters: {'learning_rate': 3.102898542469172e-05, 'lora_rank': 32, 'weight_decay': 0.09654699582306547}. Best is trial 1 with value: 1.1960686445236206.\u001b[0m\n",
      "\n",
      "--- Starting Trial 3 with parameters: ---\n",
      "  - learning_rate: 3.45e-05\n",
      "  - lora_rank: 128\n",
      "  - lora_alpha: 128\n",
      "  - weight_decay: 0.098\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:15<00:00,  1.70s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|██████████████████████████| 900/900 [00:00<00:00, 11373.08 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 8687.82 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:10<00:00, 87.19 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:08<00:00, 12.26 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:02<00:00, 306.24 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:02<00:00, 36.19 examples/s]\n",
      "--- Starting SFT Training for Trial 3 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 63,700,992 of 20,978,458,176 (0.30% trained)\n",
      "{'loss': 2.8565, 'grad_norm': 46.28248596191406, 'learning_rate': 1.378915778652628e-05, 'epoch': 0.04}\n",
      "{'loss': 1.4256, 'grad_norm': 5.490672588348389, 'learning_rate': 3.102560501968413e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:46<00:04,  4.64s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.40it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.18it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.09it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.03s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.02s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.03s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.04s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.05s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.05s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.07s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.04s/it]\u001b[A\n",
      "{'eval_loss': 1.0575447082519531, 'eval_runtime': 26.0995, 'eval_samples_per_second': 3.831, 'eval_steps_per_second': 0.958, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:12<00:04,  4.64s/it]\u001b[A\n",
      "{'train_runtime': 77.9698, 'train_samples_per_second': 1.129, 'train_steps_per_second': 0.141, 'train_loss': 2.0301032174717295, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:17<00:00,  7.09s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 3 finished with Eval Loss: 0.9548640251159668 ---\n",
      "\u001b[32m[I 2025-11-13 16:04:09,535]\u001b[0m Trial 3 finished with value: 0.9548640251159668 and parameters: {'learning_rate': 3.44728944663157e-05, 'lora_rank': 128, 'weight_decay': 0.09770164992687808}. Best is trial 3 with value: 0.9548640251159668.\u001b[0m\n",
      "\n",
      "--- Starting Trial 4 with parameters: ---\n",
      "  - learning_rate: 1.98e-04\n",
      "  - lora_rank: 64\n",
      "  - lora_alpha: 64\n",
      "  - weight_decay: 0.050\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:15<00:00,  1.74s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|██████████████████████████| 900/900 [00:00<00:00, 11426.23 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 8859.21 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:10<00:00, 86.71 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:07<00:00, 13.14 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:03<00:00, 293.83 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:02<00:00, 34.83 examples/s]\n",
      "--- Starting SFT Training for Trial 4 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 31,850,496 of 20,946,607,680 (0.15% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 2.4456, 'grad_norm': 5.208934307098389, 'learning_rate': 7.935189921342137e-05, 'epoch': 0.04}\n",
      "{'loss': 0.983, 'grad_norm': 2.5113980770111084, 'learning_rate': 0.00017854177323019807, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:46<00:04,  4.65s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.40it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.18it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.09it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.02it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.01s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.03s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.02s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.03s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.04s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.05s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.07s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 0.674797534942627, 'eval_runtime': 26.0984, 'eval_samples_per_second': 3.832, 'eval_steps_per_second': 0.958, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:12<00:04,  4.65s/it]\u001b[A\n",
      "{'train_runtime': 78.0403, 'train_samples_per_second': 1.128, 'train_steps_per_second': 0.141, 'train_loss': 1.6093213612383062, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:18<00:00,  7.09s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.22it/s]\n",
      "--- Trial 4 finished with Eval Loss: 0.5428810119628906 ---\n",
      "\u001b[32m[I 2025-11-13 16:06:36,511]\u001b[0m Trial 4 finished with value: 0.5428810119628906 and parameters: {'learning_rate': 0.0001983797480335534, 'lora_rank': 64, 'weight_decay': 0.0499444938434432}. Best is trial 4 with value: 0.5428810119628906.\u001b[0m\n",
      "\n",
      "--- Starting Trial 5 with parameters: ---\n",
      "  - learning_rate: 2.58e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 32\n",
      "  - weight_decay: 0.071\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:15<00:00,  1.75s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|██████████████████████████| 900/900 [00:00<00:00, 11344.68 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 8688.72 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:10<00:00, 87.14 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:08<00:00, 12.47 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:02<00:00, 300.09 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:02<00:00, 35.87 examples/s]\n",
      "--- Starting SFT Training for Trial 5 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 3.05, 'grad_norm': 28.769529342651367, 'learning_rate': 1.030265096849273e-05, 'epoch': 0.04}\n",
      "{'loss': 2.6254, 'grad_norm': 16.35968589782715, 'learning_rate': 2.3180964679108644e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:46<00:04,  4.60s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.04s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.07s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.733009934425354, 'eval_runtime': 26.1862, 'eval_samples_per_second': 3.819, 'eval_steps_per_second': 0.955, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:12<00:04,  4.60s/it]\u001b[A\n",
      "{'train_runtime': 77.3283, 'train_samples_per_second': 1.138, 'train_steps_per_second': 0.142, 'train_loss': 2.7078862732106987, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:17<00:00,  7.03s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 5 finished with Eval Loss: 1.580967664718628 ---\n",
      "\u001b[32m[I 2025-11-13 16:09:02,724]\u001b[0m Trial 5 finished with value: 1.580967664718628 and parameters: {'learning_rate': 2.5756627421231826e-05, 'lora_rank': 32, 'weight_decay': 0.07083982700297768}. Best is trial 4 with value: 0.5428810119628906.\u001b[0m\n",
      "\n",
      "--- Starting Trial 6 with parameters: ---\n",
      "  - learning_rate: 3.75e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 32\n",
      "  - weight_decay: 0.010\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:15<00:00,  1.75s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|██████████████████████████| 900/900 [00:00<00:00, 11497.02 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 8761.68 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:10<00:00, 86.12 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:07<00:00, 12.81 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:02<00:00, 300.43 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:02<00:00, 36.10 examples/s]\n",
      "--- Starting SFT Training for Trial 6 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 3.0517, 'grad_norm': 29.413124084472656, 'learning_rate': 1.5014242573826686e-05, 'epoch': 0.04}\n",
      "{'loss': 2.2875, 'grad_norm': 6.527860164642334, 'learning_rate': 3.378204579111004e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:46<00:04,  4.65s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.18it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.02s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.02s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.03s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.04s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.07s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.4962964057922363, 'eval_runtime': 26.1341, 'eval_samples_per_second': 3.826, 'eval_steps_per_second': 0.957, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:12<00:04,  4.65s/it]\u001b[A\n",
      "{'train_runtime': 78.6131, 'train_samples_per_second': 1.119, 'train_steps_per_second': 0.14, 'train_loss': 2.5478180755268443, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:18<00:00,  7.15s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 6 finished with Eval Loss: 1.427107334136963 ---\n",
      "\u001b[32m[I 2025-11-13 16:11:29,916]\u001b[0m Trial 6 finished with value: 1.427107334136963 and parameters: {'learning_rate': 3.753560643456671e-05, 'lora_rank': 32, 'weight_decay': 0.009569577884517889}. Best is trial 4 with value: 0.5428810119628906.\u001b[0m\n",
      "\n",
      "--- Starting Trial 7 with parameters: ---\n",
      "  - learning_rate: 9.53e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 32\n",
      "  - weight_decay: 0.083\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:18<00:00,  2.09s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8871.31 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 6430.61 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:14<00:00, 62.58 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.57 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 181.69 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 20.41 examples/s]\n",
      "--- Starting SFT Training for Trial 7 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 2.9306, 'grad_norm': 27.784711837768555, 'learning_rate': 3.813384548592036e-05, 'epoch': 0.04}\n",
      "{'loss': 1.563, 'grad_norm': 3.4875640869140625, 'learning_rate': 8.580115234332081e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:54<00:05,  5.42s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.03s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.04s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:21<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.143123984336853, 'eval_runtime': 26.205, 'eval_samples_per_second': 3.816, 'eval_steps_per_second': 0.954, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:20<00:05,  5.42s/it]\u001b[A\n",
      "{'train_runtime': 86.4533, 'train_samples_per_second': 1.018, 'train_steps_per_second': 0.127, 'train_loss': 2.134390289133245, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:26<00:00,  7.86s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 7 finished with Eval Loss: 1.0248439311981201 ---\n",
      "\u001b[32m[I 2025-11-13 16:14:27,222]\u001b[0m Trial 7 finished with value: 1.0248439311981201 and parameters: {'learning_rate': 9.53346137148009e-05, 'lora_rank': 32, 'weight_decay': 0.08294191360078847}. Best is trial 4 with value: 0.5428810119628906.\u001b[0m\n",
      "\n",
      "--- Starting Trial 8 with parameters: ---\n",
      "  - learning_rate: 5.67e-06\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 32\n",
      "  - weight_decay: 0.000\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:18<00:00,  2.10s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8937.66 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 6473.29 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:14<00:00, 63.65 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:11<00:00,  8.95 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 188.75 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 21.65 examples/s]\n",
      "--- Starting SFT Training for Trial 8 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 3.0789, 'grad_norm': 31.837247848510742, 'learning_rate': 2.2687006767374344e-06, 'epoch': 0.04}\n",
      "{'loss': 3.1011, 'grad_norm': 29.073768615722656, 'learning_rate': 5.104576522659227e-06, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:54<00:05,  5.42s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.87it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:18,  1.02s/it]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:17,  1.02s/it]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.03s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.04s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.03s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:21<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.07s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 2.9400031566619873, 'eval_runtime': 26.337, 'eval_samples_per_second': 3.797, 'eval_steps_per_second': 0.949, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:20<00:05,  5.42s/it]\u001b[A\n",
      "{'train_runtime': 86.5548, 'train_samples_per_second': 1.017, 'train_steps_per_second': 0.127, 'train_loss': 3.043077750639482, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:26<00:00,  7.87s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.23it/s]\n",
      "--- Trial 8 finished with Eval Loss: 2.8971292972564697 ---\n",
      "\u001b[32m[I 2025-11-13 16:17:24,039]\u001b[0m Trial 8 finished with value: 2.8971292972564697 and parameters: {'learning_rate': 5.671751691843585e-06, 'lora_rank': 32, 'weight_decay': 0.0001430304128610338}. Best is trial 4 with value: 0.5428810119628906.\u001b[0m\n",
      "\n",
      "--- Starting Trial 9 with parameters: ---\n",
      "  - learning_rate: 7.91e-05\n",
      "  - lora_rank: 32\n",
      "  - lora_alpha: 32\n",
      "  - weight_decay: 0.052\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "Unsloth: AMD currently is not stable with 4bit bitsandbytes. Disabling for now.\n",
      "==((====))==  Unsloth 2025.10.9: Fast Gpt_Oss patching. Transformers: 4.56.2. vLLM: 0.11.1rc3.dev39+gf417746ad.rocm700.\n",
      "   \\\\   /|    . Num GPUs = 1. Max memory: 191.688 GB. Platform: Linux.\n",
      "O^O/ \\_/ \\    Torch: 2.9.0a0+git1c57644. ROCm Toolkit: 7.0.51831-a3e329ad8. Triton: 3.4.0\n",
      "\\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]\n",
      " \"-____-\"     Free license: http://github.com/unslothai/unsloth\n",
      "Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!\n",
      "Loading checkpoint shards: 100%|██████████████████| 9/9 [00:18<00:00,  2.06s/it]\n",
      "Unsloth: Making `model.base_model.model.model` require gradients\n",
      "Map: 100%|███████████████████████████| 900/900 [00:00<00:00, 8992.57 examples/s]\n",
      "Map: 100%|███████████████████████████| 100/100 [00:00<00:00, 6718.52 examples/s]\n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 900/900 [00:13<00:00, 64.58 \n",
      "Unsloth: Tokenizing [\"text\"] (num_proc=24): 100%|█| 100/100 [00:10<00:00,  9.13 \n",
      "Map (num_proc=24): 100%|██████████████| 900/900 [00:04<00:00, 181.25 examples/s]\n",
      "Map (num_proc=24): 100%|███████████████| 100/100 [00:04<00:00, 22.51 examples/s]\n",
      "--- Starting SFT Training for Trial 9 ---\n",
      "The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': 199998}.\n",
      "==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1\n",
      "   \\\\   /|    Num examples = 900 | Num Epochs = 1 | Total steps = 11\n",
      "O^O/ \\_/ \\    Batch size per device = 2 | Gradient accumulation steps = 4\n",
      "\\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8\n",
      " \"-____-\"     Trainable parameters = 15,925,248 of 20,930,682,432 (0.08% trained)\n",
      "A ConfigError was raised whilst setting the number of model parameters in Weights & Biases config.\n",
      "{'loss': 2.9823, 'grad_norm': 27.964017868041992, 'learning_rate': 3.165123098294908e-05, 'epoch': 0.04}\n",
      "{'loss': 1.6682, 'grad_norm': 3.715291976928711, 'learning_rate': 7.121526971163543e-05, 'epoch': 0.09}\n",
      " 91%|███████████████████████████████████████    | 10/11 [00:54<00:05,  5.41s/it]\n",
      "  0%|                                                    | 0/25 [00:00<?, ?it/s]\u001b[A\n",
      "  8%|███▌                                        | 2/25 [00:01<00:12,  1.86it/s]\u001b[A\n",
      " 12%|█████▎                                      | 3/25 [00:02<00:15,  1.39it/s]\u001b[A\n",
      " 16%|███████                                     | 4/25 [00:03<00:17,  1.17it/s]\u001b[A\n",
      " 20%|████████▊                                   | 5/25 [00:04<00:18,  1.08it/s]\u001b[A\n",
      " 24%|██████████▌                                 | 6/25 [00:05<00:18,  1.02it/s]\u001b[A\n",
      " 28%|████████████▎                               | 7/25 [00:06<00:17,  1.02it/s]\u001b[A\n",
      " 32%|██████████████                              | 8/25 [00:07<00:16,  1.01it/s]\u001b[A\n",
      " 36%|███████████████▊                            | 9/25 [00:08<00:16,  1.01s/it]\u001b[A\n",
      " 40%|█████████████████▏                         | 10/25 [00:09<00:15,  1.03s/it]\u001b[A\n",
      " 44%|██████████████████▉                        | 11/25 [00:10<00:14,  1.02s/it]\u001b[A\n",
      " 48%|████████████████████▋                      | 12/25 [00:11<00:13,  1.04s/it]\u001b[A\n",
      " 52%|██████████████████████▎                    | 13/25 [00:12<00:12,  1.03s/it]\u001b[A\n",
      " 56%|████████████████████████                   | 14/25 [00:13<00:11,  1.04s/it]\u001b[A\n",
      " 60%|█████████████████████████▊                 | 15/25 [00:14<00:10,  1.04s/it]\u001b[A\n",
      " 64%|███████████████████████████▌               | 16/25 [00:15<00:09,  1.06s/it]\u001b[A\n",
      " 68%|█████████████████████████████▏             | 17/25 [00:16<00:08,  1.05s/it]\u001b[A\n",
      " 72%|██████████████████████████████▉            | 18/25 [00:17<00:07,  1.05s/it]\u001b[A\n",
      " 76%|████████████████████████████████▋          | 19/25 [00:18<00:06,  1.06s/it]\u001b[A\n",
      " 80%|██████████████████████████████████▍        | 20/25 [00:19<00:05,  1.05s/it]\u001b[A\n",
      " 84%|████████████████████████████████████       | 21/25 [00:20<00:04,  1.05s/it]\u001b[A\n",
      " 88%|█████████████████████████████████████▊     | 22/25 [00:22<00:03,  1.06s/it]\u001b[A\n",
      " 92%|███████████████████████████████████████▌   | 23/25 [00:23<00:02,  1.08s/it]\u001b[A\n",
      " 96%|█████████████████████████████████████████▎ | 24/25 [00:24<00:01,  1.08s/it]\u001b[A\n",
      "100%|███████████████████████████████████████████| 25/25 [00:25<00:00,  1.05s/it]\u001b[A\n",
      "{'eval_loss': 1.2308024168014526, 'eval_runtime': 26.2305, 'eval_samples_per_second': 3.812, 'eval_steps_per_second': 0.953, 'epoch': 0.09}\n",
      "\n",
      " 91%|███████████████████████████████████████    | 10/11 [01:20<00:05,  5.41s/it]\u001b[A\n",
      "{'train_runtime': 86.3925, 'train_samples_per_second': 1.019, 'train_steps_per_second': 0.127, 'train_loss': 2.2137471437454224, 'epoch': 0.1}\n",
      "100%|███████████████████████████████████████████| 11/11 [01:26<00:00,  7.85s/it]\n",
      "--- SFT Training Complete ---\n",
      "100%|███████████████████████████████████████████| 25/25 [00:20<00:00,  1.24it/s]\n",
      "--- Trial 9 finished with Eval Loss: 1.121924877166748 ---\n",
      "\u001b[32m[I 2025-11-13 16:20:19,596]\u001b[0m Trial 9 finished with value: 1.121924877166748 and parameters: {'learning_rate': 7.91280774573727e-05, 'lora_rank': 32, 'weight_decay': 0.051606358421891985}. Best is trial 4 with value: 0.5428810119628906.\u001b[0m\n",
      "\n",
      "\n",
      "--- Hyperparameter Search Complete ---\n",
      "Best trial:\n",
      "  Value (min eval_loss): 0.5428810119628906\n",
      "  Best Parameters: \n",
      "    learning_rate: 0.0001983797480335534\n",
      "    lora_rank: 64\n",
      "    weight_decay: 0.0499444938434432\n",
      "\n",
      "--- All Trials ---\n",
      "   number     value  ... params_weight_decay     state\n",
      "0       0  1.403990  ...            0.083039  COMPLETE\n",
      "1       1  1.196069  ...            0.099596  COMPLETE\n",
      "2       2  1.490930  ...            0.096547  COMPLETE\n",
      "3       3  0.954864  ...            0.097702  COMPLETE\n",
      "4       4  0.542881  ...            0.049944  COMPLETE\n",
      "5       5  1.580968  ...            0.070840  COMPLETE\n",
      "6       6  1.427107  ...            0.009570  COMPLETE\n",
      "7       7  1.024844  ...            0.082942  COMPLETE\n",
      "8       8  2.897129  ...            0.000143  COMPLETE\n",
      "9       9  1.121925  ...            0.051606  COMPLETE\n",
      "\n",
      "[10 rows x 9 columns]\n",
      "\u001b[1;34mwandb\u001b[0m: \n",
      "\u001b[1;34mwandb\u001b[0m: 🚀 View run \u001b[33mtrial-0-lr-1.15e-05-r-128\u001b[0m at: \u001b[34m\u001b[0m\n",
      "\u001b[1;34mwandb\u001b[0m: Find logs at: \u001b[1;35mwandb/run-20251113_155424-j2kz7jrl/logs\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "!python f_tune.py"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f97af38b",
   "metadata": {},
   "source": [
    "### The Final Comparison\n",
    "\n",
    "This is where your experimentation becomes so valuable. We can now directly compare the best results from both runs:\n",
    "\n",
    "| Hyperparameter Strategy | Best Eval Loss | Optimal LoRA Rank | Optimal Learning Rate |\n",
    "| :--- | :--- | :--- | :--- |\n",
    "| Run 1: `lora_alpha = 2 * r` | 0.844 | 32 | `8.19e-5` |\n",
    "| **Run 2: `lora_alpha = r`** | **0.543** | **64** | **`1.98e-4`** |\n",
    "\n",
    "###  Final, Optimized Hyperparameters\n",
    "\n",
    "Based on your successful search, these are the champion parameters you should use for your final model training:\n",
    "\n",
    "*   **`learning_rate`**: `0.000198` (or `2e-4` for simplicity)\n",
    "*   **`lora_rank`**: `64`\n",
    "*   **`lora_alpha`**: `64` (since `alpha = rank`)\n",
    "*   **`weight_decay`**: `0.0499` (or `0.05`)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}