File size: 14,932 Bytes

ecadbd9

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6de60419",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "IGNORE_INDEX = -100\n",
    "PROMPT = (\n",
    "    \"Below is an instruction that describes a task. \"\n",
    "    \"Write a response that appropriately completes the request.\\n\\n\"\n",
    "    \"### Instruction:\\n{instruction}\\n\\n### Response:\"\n",
    ")\n",
    "\n",
    "def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer) -> Dict:\n",
    "    \"\"\"Tokenize a list of strings.\"\"\"\n",
    "    tokenized_list = [\n",
    "        tokenizer(\n",
    "            text,\n",
    "            return_tensors=\"pt\",\n",
    "            padding=\"longest\",\n",
    "            max_length=tokenizer.model_max_length,\n",
    "            truncation=True,\n",
    "        )\n",
    "        for text in strings\n",
    "    ]\n",
    "    input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]\n",
    "    input_ids_lens = labels_lens = [\n",
    "        tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list\n",
    "    ]\n",
    "    return dict(\n",
    "        input_ids=input_ids,\n",
    "        labels=labels,\n",
    "        input_ids_lens=input_ids_lens,\n",
    "        labels_lens=labels_lens,\n",
    "    )\n",
    "\n",
    "def preprocess(\n",
    "        sources: Sequence[str],\n",
    "        targets: Sequence[str],\n",
    "        tokenizer: transformers.PreTrainedTokenizer,\n",
    ") -> Dict:\n",
    "    \"\"\"Preprocess the data by tokenizing.\"\"\"\n",
    "    examples = [s + t for s, t in zip(sources, targets)]\n",
    "    examples_tokenized, sources_tokenized = [_tokenize_fn(strings, tokenizer) for strings in (examples, sources)]\n",
    "    input_ids = examples_tokenized[\"input_ids\"]\n",
    "    labels = copy.deepcopy(input_ids)\n",
    "    for label, source_len in zip(labels, sources_tokenized[\"input_ids_lens\"]):\n",
    "        label[:source_len] = IGNORE_INDEX\n",
    "    return dict(input_ids=input_ids, labels=labels)\n",
    "\n",
    "def train_tokenize_function(examples, tokenizer, query, response):\n",
    "    sources = [PROMPT.format_map(dict(instruction=instruction)) for instruction in examples[query]]\n",
    "    targets = [f\"{output}{tokenizer.eos_token}\" for output in examples[response]]\n",
    "    data_dict = preprocess(sources, targets, tokenizer)\n",
    "    return data_dict"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ed5bd587",
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "import os\n",
    "\n",
    "def detect_numpy_source():\n",
    "    \"\"\"\n",
    "    Identifies the actual loaded NumPy version and its location on disk.\n",
    "    This helps distinguish between Conda metadata and actual imported files.\n",
    "    \"\"\"\n",
    "    print(f\"{'='*40}\")\n",
    "    print(f\"Debug Process ID: {os.getpid()}\")\n",
    "    print(f\"Python Executable: {sys.executable}\")\n",
    "    \n",
    "    try:\n",
    "        import numpy\n",
    "        print(f\"{'='*40}\")\n",
    "        print(f\"ACTUAL IMPORTED NUMPY VERSION: {numpy.__version__}\")\n",
    "        print(f\"Location on disk: {numpy.__file__}\")\n",
    "        print(f\"{'='*40}\")\n",
    "        \n",
    "        # Check if version matches expected 1.26.4\n",
    "        if numpy.__version__ == \"1.26.4\":\n",
    "            print(\"Status: The environment seems correct. The issue might be in the worker process.\")\n",
    "        else:\n",
    "            print(f\"Status: CONFLICT DETECTED! Conda says 1.26.4, but Python loads {numpy.__version__}.\")\n",
    "            \n",
    "    except ImportError as e:\n",
    "        print(f\"Error importing numpy: {e}\")\n",
    "\n",
    "    # Print sys.path to see if ~/.local or other paths are leaking in\n",
    "    print(\"\\n--- Python Search Paths (sys.path) ---\")\n",
    "    for p in sys.path:\n",
    "        print(p)\n",
    "\n",
    "if __name__ == \"__main__\":\n",
    "    detect_numpy_source()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a2693b63",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--- PyTorch CUDA Status ---\n",
      "✅ torch.cuda.is_available() is True\n",
      "Number of GPUs: 1\n",
      "Current GPU: NVIDIA H200\n",
      "PyTorch built with CUDA version: 12.8\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "def check_cuda_status():\n",
    "    \"\"\"Checks and prints the CUDA status.\"\"\"\n",
    "    print(\"--- PyTorch CUDA Status ---\")\n",
    "    if torch.cuda.is_available():\n",
    "        print(\"✅ torch.cuda.is_available() is True\")\n",
    "        print(f\"Number of GPUs: {torch.cuda.device_count()}\")\n",
    "        print(f\"Current GPU: {torch.cuda.get_device_name(0)}\")\n",
    "        print(f\"PyTorch built with CUDA version: {torch.version.cuda}\")\n",
    "    else:\n",
    "        print(\"❌ torch.cuda.is_available() is False\")\n",
    "        print(f\"PyTorch built with CUDA version: {torch.version.cuda}\")\n",
    "\n",
    "check_cuda_status()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3e39c876",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2026-01-06 16:35:30\n",
      "/home/work/miniconda3/envs/sama/bin/python: Error while finding module specification for 'src.cms_main' (ModuleNotFoundError: No module named 'src')\n",
      "Traceback (most recent call last):\n",
      "  File \"/home/work/miniconda3/envs/sama/bin/accelerate\", line 7, in <module>\n",
      "    sys.exit(main())\n",
      "             ^^^^^^\n",
      "  File \"/home/work/miniconda3/envs/sama/lib/python3.11/site-packages/accelerate/commands/accelerate_cli.py\", line 50, in main\n",
      "    args.func(args)\n",
      "  File \"/home/work/miniconda3/envs/sama/lib/python3.11/site-packages/accelerate/commands/launch.py\", line 1281, in launch_command\n",
      "    simple_launcher(args)\n",
      "  File \"/home/work/miniconda3/envs/sama/lib/python3.11/site-packages/accelerate/commands/launch.py\", line 869, in simple_launcher\n",
      "    raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)\n",
      "subprocess.CalledProcessError: Command '['/home/work/miniconda3/envs/sama/bin/python', '-m', 'src.cms_main', '--config_path', './config/sama_cms_lla3.yaml', '--trainer_args.learning_rate=5e-4', '--trainer_args.output_dir', './Llama3_B8', '--trainer_args.load_best_model_at_end', 'True', '--trainer_args.save_strategy', '\"steps\"', '--sama_adapter.col_L', '32', '--sama_adapter.row_R', '32', '--trainer_args.num_train_epochs', '2', '--trainer_args.report_to', 'wandb', '--trainer_args.save_steps', '300', '--trainer_args.eval_steps', '300', '--trainer_args.logging_steps', '300', '--sama_adapter.num_unique_blocks_L', '32', '--sama_adapter.num_unique_blocks_R', '32', '--sama_adapter.target_modules', '[\"q_proj\", \"v_proj\", \"k_proj\", \"up_proj\",\"down_proj\"]', '--data.path', 'ft_training_set/commonsense_147k.json', '--trainer_args.eval_delay', '9000']' returned non-zero exit status 1.\n",
      "2026-01-06 16:35:34\n"
     ]
    }
   ],
   "source": [
    "cd ../\n",
    "!bash scripts/cms_l3_train.sh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "8bb54d45",
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'seaborn'",
     "output_type": "error",
     "traceback": [
      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
      "\u001b[31mModuleNotFoundError\u001b[39m                       Traceback (most recent call last)",
      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpd\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msns\u001b[39;00m\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m      4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mticker\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mticker\u001b[39;00m\n",
      "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'seaborn'"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import seaborn as sns\n",
    "import matplotlib.pyplot as plt\n",
    "import matplotlib.ticker as ticker\n",
    "\n",
    "# ---------------------------------------------------------\n",
    "# 1. DATA PREPARATION\n",
    "# ---------------------------------------------------------\n",
    "# I have manually transcribed the data from your uploaded image.\n",
    "# For the second model, you can simply append its data to this dataframe.\n",
    "data = {\n",
    "    'Method': ['OFT', 'BOFT', 'HRA', 'LoCo'] * 3,\n",
    "    'Batch_Size': [16]*4 + [32]*4 + [64]*4,\n",
    "    # Time per step (ms) - taking the average from the image\n",
    "    'Time_ms': [\n",
    "        119.7, 202.5, 311.0, 166.7,  # Batch 16\n",
    "        195.1, 269.2, 316.0, 221.4,  # Batch 32\n",
    "        357.3, 410.2, 398.8, 374.3   # Batch 64\n",
    "    ],\n",
    "    # Peak Memory (GB)\n",
    "    'Peak_Mem_GB': [\n",
    "        16.21, 19.73, 21.85, 16.26,  # Batch 16\n",
    "        31.55, 35.01, 37.07, 31.67,  # Batch 32\n",
    "        62.24, 65.69, 67.62, 62.50   # Batch 64\n",
    "    ],\n",
    "    # Add a 'Model' column if you have data for 2 models\n",
    "    'Model_Name': ['Model A'] * 12 \n",
    "}\n",
    "\n",
    "df = pd.read_json(pd.DataFrame(data).to_json())\n",
    "\n",
    "# Set the visual style for scientific publication\n",
    "sns.set_theme(style=\"whitegrid\", context=\"paper\", font_scale=1.2)\n",
    "\n",
    "\n",
    "# ---------------------------------------------------------\n",
    "# OPTION 1: SCALABILITY CHARTS (Bar Charts)\n",
    "# Good for showing how metrics increase with batch size\n",
    "# ---------------------------------------------------------\n",
    "def plot_scalability(df):\n",
    "    \"\"\"\n",
    "    Creates a figure with 2 subplots:\n",
    "    1. Time per step vs Batch Size\n",
    "    2. Peak Memory vs Batch Size\n",
    "    \"\"\"\n",
    "    fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n",
    "\n",
    "    # --- Plot 1: Latency (Time) ---\n",
    "    sns.barplot(\n",
    "        data=df, x='Batch_Size', y='Time_ms', hue='Method',\n",
    "        ax=axes[0], palette=\"viridis\", edgecolor=\"black\", alpha=0.9\n",
    "    )\n",
    "    axes[0].set_title(\"Training Latency (Lower is better)\", fontweight='bold')\n",
    "    axes[0].set_ylabel(\"Time per step (ms)\")\n",
    "    axes[0].set_xlabel(\"Batch Size\")\n",
    "    axes[0].legend(title='Method')\n",
    "\n",
    "    # --- Plot 2: Memory Footprint ---\n",
    "    sns.barplot(\n",
    "        data=df, x='Batch_Size', y='Peak_Mem_GB', hue='Method',\n",
    "        ax=axes[1], palette=\"viridis\", edgecolor=\"black\", alpha=0.9\n",
    "    )\n",
    "    axes[1].set_title(\"Peak Memory Usage (Lower is better)\", fontweight='bold')\n",
    "    axes[1].set_ylabel(\"Peak Memory (GB)\")\n",
    "    axes[1].set_xlabel(\"Batch Size\")\n",
    "    axes[1].get_legend().remove() # Remove legend to avoid duplication\n",
    "\n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "\n",
    "# ---------------------------------------------------------\n",
    "# OPTION 2: EFFICIENCY TRADE-OFF (Scatter Plot)\n",
    "# Best for showing the balance between Speed and Memory\n",
    "# ---------------------------------------------------------\n",
    "def plot_tradeoff(df):\n",
    "    \"\"\"\n",
    "    Creates a scatter plot where:\n",
    "    X-axis: Memory\n",
    "    Y-axis: Time\n",
    "    Marker Shape: Batch Size\n",
    "    Color: Method\n",
    "    \"\"\"\n",
    "    plt.figure(figsize=(10, 8))\n",
    "    \n",
    "    # Create scatter plot\n",
    "    sns.scatterplot(\n",
    "        data=df, \n",
    "        x='Peak_Mem_GB', \n",
    "        y='Time_ms', \n",
    "        hue='Method', \n",
    "        style='Batch_Size', # Different shapes for batch sizes\n",
    "        s=200, # Marker size\n",
    "        palette=\"deep\",\n",
    "        edgecolor=\"black\"\n",
    "    )\n",
    "\n",
    "    # Add connecting lines for the same method to show the trend\n",
    "    for method in df['Method'].unique():\n",
    "        subset = df[df['Method'] == method]\n",
    "        plt.plot(subset['Peak_Mem_GB'], subset['Time_ms'], linestyle='--', alpha=0.5, color='gray')\n",
    "\n",
    "    plt.title(\"Efficiency Trade-off: Latency vs. Memory\", fontweight='bold')\n",
    "    plt.xlabel(\"Peak Memory (GB)\")\n",
    "    plt.ylabel(\"Time per step (ms)\")\n",
    "    \n",
    "    # Grid customization\n",
    "    plt.grid(True, which='both', linestyle='--', linewidth=0.5)\n",
    "    \n",
    "    plt.tight_layout()\n",
    "    plt.show()\n",
    "\n",
    "# ---------------------------------------------------------\n",
    "# MAIN EXECUTION\n",
    "# ---------------------------------------------------------\n",
    "if __name__ == \"__main__\":\n",
    "    print(\"Generating Scalability Chart (Option 1)...\")\n",
    "    plot_scalability(df)\n",
    "    \n",
    "    print(\"Generating Trade-off Chart (Option 2)...\")\n",
    "    plot_tradeoff(df)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "sama",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.14"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}