{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "6de60419", "metadata": {}, "outputs": [], "source": [ "\n", "IGNORE_INDEX = -100\n", "PROMPT = (\n", " \"Below is an instruction that describes a task. \"\n", " \"Write a response that appropriately completes the request.\\n\\n\"\n", " \"### Instruction:\\n{instruction}\\n\\n### Response:\"\n", ")\n", "\n", "def _tokenize_fn(strings: Sequence[str], tokenizer: transformers.PreTrainedTokenizer) -> Dict:\n", " \"\"\"Tokenize a list of strings.\"\"\"\n", " tokenized_list = [\n", " tokenizer(\n", " text,\n", " return_tensors=\"pt\",\n", " padding=\"longest\",\n", " max_length=tokenizer.model_max_length,\n", " truncation=True,\n", " )\n", " for text in strings\n", " ]\n", " input_ids = labels = [tokenized.input_ids[0] for tokenized in tokenized_list]\n", " input_ids_lens = labels_lens = [\n", " tokenized.input_ids.ne(tokenizer.pad_token_id).sum().item() for tokenized in tokenized_list\n", " ]\n", " return dict(\n", " input_ids=input_ids,\n", " labels=labels,\n", " input_ids_lens=input_ids_lens,\n", " labels_lens=labels_lens,\n", " )\n", "\n", "def preprocess(\n", " sources: Sequence[str],\n", " targets: Sequence[str],\n", " tokenizer: transformers.PreTrainedTokenizer,\n", ") -> Dict:\n", " \"\"\"Preprocess the data by tokenizing.\"\"\"\n", " examples = [s + t for s, t in zip(sources, targets)]\n", " examples_tokenized, sources_tokenized = [_tokenize_fn(strings, tokenizer) for strings in (examples, sources)]\n", " input_ids = examples_tokenized[\"input_ids\"]\n", " labels = copy.deepcopy(input_ids)\n", " for label, source_len in zip(labels, sources_tokenized[\"input_ids_lens\"]):\n", " label[:source_len] = IGNORE_INDEX\n", " return dict(input_ids=input_ids, labels=labels)\n", "\n", "def train_tokenize_function(examples, tokenizer, query, response):\n", " sources = [PROMPT.format_map(dict(instruction=instruction)) for instruction in examples[query]]\n", " targets = [f\"{output}{tokenizer.eos_token}\" for output in examples[response]]\n", " data_dict = preprocess(sources, targets, tokenizer)\n", " return data_dict" ] }, { "cell_type": "code", "execution_count": null, "id": "ed5bd587", "metadata": {}, "outputs": [], "source": [ "import sys\n", "import os\n", "\n", "def detect_numpy_source():\n", " \"\"\"\n", " Identifies the actual loaded NumPy version and its location on disk.\n", " This helps distinguish between Conda metadata and actual imported files.\n", " \"\"\"\n", " print(f\"{'='*40}\")\n", " print(f\"Debug Process ID: {os.getpid()}\")\n", " print(f\"Python Executable: {sys.executable}\")\n", " \n", " try:\n", " import numpy\n", " print(f\"{'='*40}\")\n", " print(f\"ACTUAL IMPORTED NUMPY VERSION: {numpy.__version__}\")\n", " print(f\"Location on disk: {numpy.__file__}\")\n", " print(f\"{'='*40}\")\n", " \n", " # Check if version matches expected 1.26.4\n", " if numpy.__version__ == \"1.26.4\":\n", " print(\"Status: The environment seems correct. The issue might be in the worker process.\")\n", " else:\n", " print(f\"Status: CONFLICT DETECTED! Conda says 1.26.4, but Python loads {numpy.__version__}.\")\n", " \n", " except ImportError as e:\n", " print(f\"Error importing numpy: {e}\")\n", "\n", " # Print sys.path to see if ~/.local or other paths are leaking in\n", " print(\"\\n--- Python Search Paths (sys.path) ---\")\n", " for p in sys.path:\n", " print(p)\n", "\n", "if __name__ == \"__main__\":\n", " detect_numpy_source()\n", "\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "a2693b63", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--- PyTorch CUDA Status ---\n", "✅ torch.cuda.is_available() is True\n", "Number of GPUs: 1\n", "Current GPU: NVIDIA H200\n", "PyTorch built with CUDA version: 12.8\n" ] } ], "source": [ "import torch\n", "\n", "def check_cuda_status():\n", " \"\"\"Checks and prints the CUDA status.\"\"\"\n", " print(\"--- PyTorch CUDA Status ---\")\n", " if torch.cuda.is_available():\n", " print(\"✅ torch.cuda.is_available() is True\")\n", " print(f\"Number of GPUs: {torch.cuda.device_count()}\")\n", " print(f\"Current GPU: {torch.cuda.get_device_name(0)}\")\n", " print(f\"PyTorch built with CUDA version: {torch.version.cuda}\")\n", " else:\n", " print(\"❌ torch.cuda.is_available() is False\")\n", " print(f\"PyTorch built with CUDA version: {torch.version.cuda}\")\n", "\n", "check_cuda_status()" ] }, { "cell_type": "code", "execution_count": null, "id": "3e39c876", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2026-01-06 16:35:30\n", "/home/work/miniconda3/envs/sama/bin/python: Error while finding module specification for 'src.cms_main' (ModuleNotFoundError: No module named 'src')\n", "Traceback (most recent call last):\n", " File \"/home/work/miniconda3/envs/sama/bin/accelerate\", line 7, in \n", " sys.exit(main())\n", " ^^^^^^\n", " File \"/home/work/miniconda3/envs/sama/lib/python3.11/site-packages/accelerate/commands/accelerate_cli.py\", line 50, in main\n", " args.func(args)\n", " File \"/home/work/miniconda3/envs/sama/lib/python3.11/site-packages/accelerate/commands/launch.py\", line 1281, in launch_command\n", " simple_launcher(args)\n", " File \"/home/work/miniconda3/envs/sama/lib/python3.11/site-packages/accelerate/commands/launch.py\", line 869, in simple_launcher\n", " raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd)\n", "subprocess.CalledProcessError: Command '['/home/work/miniconda3/envs/sama/bin/python', '-m', 'src.cms_main', '--config_path', './config/sama_cms_lla3.yaml', '--trainer_args.learning_rate=5e-4', '--trainer_args.output_dir', './Llama3_B8', '--trainer_args.load_best_model_at_end', 'True', '--trainer_args.save_strategy', '\"steps\"', '--sama_adapter.col_L', '32', '--sama_adapter.row_R', '32', '--trainer_args.num_train_epochs', '2', '--trainer_args.report_to', 'wandb', '--trainer_args.save_steps', '300', '--trainer_args.eval_steps', '300', '--trainer_args.logging_steps', '300', '--sama_adapter.num_unique_blocks_L', '32', '--sama_adapter.num_unique_blocks_R', '32', '--sama_adapter.target_modules', '[\"q_proj\", \"v_proj\", \"k_proj\", \"up_proj\",\"down_proj\"]', '--data.path', 'ft_training_set/commonsense_147k.json', '--trainer_args.eval_delay', '9000']' returned non-zero exit status 1.\n", "2026-01-06 16:35:34\n" ] } ], "source": [ "cd ../\n", "!bash scripts/cms_l3_train.sh" ] }, { "cell_type": "code", "execution_count": 1, "id": "8bb54d45", "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'seaborn'", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpandas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mpd\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mseaborn\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01msns\u001b[39;00m\n\u001b[32m 3\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mpyplot\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mplt\u001b[39;00m\n\u001b[32m 4\u001b[39m \u001b[38;5;28;01mimport\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mmatplotlib\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mticker\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mas\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mticker\u001b[39;00m\n", "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'seaborn'" ] } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker as ticker\n", "\n", "# ---------------------------------------------------------\n", "# 1. DATA PREPARATION\n", "# ---------------------------------------------------------\n", "# I have manually transcribed the data from your uploaded image.\n", "# For the second model, you can simply append its data to this dataframe.\n", "data = {\n", " 'Method': ['OFT', 'BOFT', 'HRA', 'LoCo'] * 3,\n", " 'Batch_Size': [16]*4 + [32]*4 + [64]*4,\n", " # Time per step (ms) - taking the average from the image\n", " 'Time_ms': [\n", " 119.7, 202.5, 311.0, 166.7, # Batch 16\n", " 195.1, 269.2, 316.0, 221.4, # Batch 32\n", " 357.3, 410.2, 398.8, 374.3 # Batch 64\n", " ],\n", " # Peak Memory (GB)\n", " 'Peak_Mem_GB': [\n", " 16.21, 19.73, 21.85, 16.26, # Batch 16\n", " 31.55, 35.01, 37.07, 31.67, # Batch 32\n", " 62.24, 65.69, 67.62, 62.50 # Batch 64\n", " ],\n", " # Add a 'Model' column if you have data for 2 models\n", " 'Model_Name': ['Model A'] * 12 \n", "}\n", "\n", "df = pd.read_json(pd.DataFrame(data).to_json())\n", "\n", "# Set the visual style for scientific publication\n", "sns.set_theme(style=\"whitegrid\", context=\"paper\", font_scale=1.2)\n", "\n", "\n", "# ---------------------------------------------------------\n", "# OPTION 1: SCALABILITY CHARTS (Bar Charts)\n", "# Good for showing how metrics increase with batch size\n", "# ---------------------------------------------------------\n", "def plot_scalability(df):\n", " \"\"\"\n", " Creates a figure with 2 subplots:\n", " 1. Time per step vs Batch Size\n", " 2. Peak Memory vs Batch Size\n", " \"\"\"\n", " fig, axes = plt.subplots(1, 2, figsize=(16, 6))\n", "\n", " # --- Plot 1: Latency (Time) ---\n", " sns.barplot(\n", " data=df, x='Batch_Size', y='Time_ms', hue='Method',\n", " ax=axes[0], palette=\"viridis\", edgecolor=\"black\", alpha=0.9\n", " )\n", " axes[0].set_title(\"Training Latency (Lower is better)\", fontweight='bold')\n", " axes[0].set_ylabel(\"Time per step (ms)\")\n", " axes[0].set_xlabel(\"Batch Size\")\n", " axes[0].legend(title='Method')\n", "\n", " # --- Plot 2: Memory Footprint ---\n", " sns.barplot(\n", " data=df, x='Batch_Size', y='Peak_Mem_GB', hue='Method',\n", " ax=axes[1], palette=\"viridis\", edgecolor=\"black\", alpha=0.9\n", " )\n", " axes[1].set_title(\"Peak Memory Usage (Lower is better)\", fontweight='bold')\n", " axes[1].set_ylabel(\"Peak Memory (GB)\")\n", " axes[1].set_xlabel(\"Batch Size\")\n", " axes[1].get_legend().remove() # Remove legend to avoid duplication\n", "\n", " plt.tight_layout()\n", " plt.show()\n", "\n", "# ---------------------------------------------------------\n", "# OPTION 2: EFFICIENCY TRADE-OFF (Scatter Plot)\n", "# Best for showing the balance between Speed and Memory\n", "# ---------------------------------------------------------\n", "def plot_tradeoff(df):\n", " \"\"\"\n", " Creates a scatter plot where:\n", " X-axis: Memory\n", " Y-axis: Time\n", " Marker Shape: Batch Size\n", " Color: Method\n", " \"\"\"\n", " plt.figure(figsize=(10, 8))\n", " \n", " # Create scatter plot\n", " sns.scatterplot(\n", " data=df, \n", " x='Peak_Mem_GB', \n", " y='Time_ms', \n", " hue='Method', \n", " style='Batch_Size', # Different shapes for batch sizes\n", " s=200, # Marker size\n", " palette=\"deep\",\n", " edgecolor=\"black\"\n", " )\n", "\n", " # Add connecting lines for the same method to show the trend\n", " for method in df['Method'].unique():\n", " subset = df[df['Method'] == method]\n", " plt.plot(subset['Peak_Mem_GB'], subset['Time_ms'], linestyle='--', alpha=0.5, color='gray')\n", "\n", " plt.title(\"Efficiency Trade-off: Latency vs. Memory\", fontweight='bold')\n", " plt.xlabel(\"Peak Memory (GB)\")\n", " plt.ylabel(\"Time per step (ms)\")\n", " \n", " # Grid customization\n", " plt.grid(True, which='both', linestyle='--', linewidth=0.5)\n", " \n", " plt.tight_layout()\n", " plt.show()\n", "\n", "# ---------------------------------------------------------\n", "# MAIN EXECUTION\n", "# ---------------------------------------------------------\n", "if __name__ == \"__main__\":\n", " print(\"Generating Scalability Chart (Option 1)...\")\n", " plot_scalability(df)\n", " \n", " print(\"Generating Trade-off Chart (Option 2)...\")\n", " plot_tradeoff(df)" ] } ], "metadata": { "kernelspec": { "display_name": "sama", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.14" } }, "nbformat": 4, "nbformat_minor": 5 }