hunterbown
/

shannon-control-unit

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Shannon Control Unit — Dial-in LLM regularization (Colab demo)\n\n",
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/hmbown/shannon-control-unit/blob/main/notebooks/SCU_Demo.ipynb)\n\n",
+    "Held-out: Base 3.920 BPT (ppl 15.14) → SCU 3.676 (ppl 12.78), Δ −0.244 BPT ≈ −15.6% ppl.\n\n",
+    "Adapters inherit Meta Llama 3.2 license; SCU code Apache-2.0. U.S. patent pending (provisional filed Sep 2025).\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1) Setup\n",
+    "import os, sys, subprocess, random, json\n",
+    "from pathlib import Path\n",
+    "\n",
+    "# Minimal deps; bitsandbytes only on CUDA\n",
+    "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"\n",
+    "def _pip_install(pkgs):\n",
+    "    cmd = [sys.executable, '-m', 'pip', 'install', '-q'] + list(pkgs)\n",
+    "    return subprocess.call(cmd)\n",
+    "\n",
+    "# Ensure core libs; rely on preinstalled torch\n",
+    "_pip_install(['transformers', 'peft', 'accelerate', 'huggingface_hub', 'matplotlib', 'numpy', 'pandas'])\n",
+    "\n",
+    "# Optional: install bitsandbytes only if CUDA is available\n",
+    "cuda_avail = False\n",
+    "try:\n",
+    "    import torch\n",
+    "    cuda_avail = torch.cuda.is_available()\n",
+    "except Exception:\n",
+    "    pass\n",
+    "if cuda_avail:\n",
+    "    _ = _pip_install(['bitsandbytes'])\n",
+    "\n",
+    "# Optional: login to Hugging Face to access gated models (accept Llama 3.2 terms).\n",
+    "# from huggingface_hub import login\n",
+    "# login()  # Ensure you've accepted https://huggingface.co/meta-llama/Llama-3.2-1B and 3B\n",
+    "\n",
+    "# Seed everything deterministically\n",
+    "import numpy as np\n",
+    "random.seed(42)\n",
+    "np.random.seed(42)\n",
+    "try:\n",
+    "    import torch\n",
+    "    torch.manual_seed(42)\n",
+    "    if torch.cuda.is_available():\n",
+    "        torch.cuda.manual_seed_all(42)\n",
+    "except Exception:\n",
+    "    pass\n",
+    "\n",
+    "print('Setup complete.')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 2) Device & precision detection\n",
+    "import torch\n",
+    "from pathlib import Path\n",
+    "device = 'cuda' if torch.cuda.is_available() else ('mps' if torch.backends.mps.is_available() else 'cpu')\n",
+    "print('Device:', device, '| Torch:', torch.__version__, '| CUDA:', torch.version.cuda)\n",
+    "\n",
+    "bnb_config = None\n",
+    "if device == 'cuda':\n",
+    "    try:\n",
+    "        from transformers import BitsAndBytesConfig\n",
+    "        bnb_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)\n",
+    "        four_bit_active = True\n",
+    "    except Exception as e:\n",
+    "        print('bitsandbytes not available; falling back to fp16/fp32.')\n",
+    "        bnb_config = None\n",
+    "        four_bit_active = False\n",
+    "else:\n",
+    "    four_bit_active = False\n",
+    "\n",
+    "IS_CUDA = device == 'cuda'\n",
+    "IS_MPS = device == 'mps'\n",
+    "IS_CPU = device == 'cpu'\n",
+    "print('4-bit active:' , four_bit_active)\n",
+    "if IS_MPS:\n",
+    "    print('Running fp32 on Apple Silicon (MPS).')\n",
+    "if IS_CPU:\n",
+    "    print('WARNING: Using CPU; training is disabled and steps reduced.')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 3) Config\n",
+    "MODEL_SIZE = '1B'  # '1B' or '3B'\n",
+    "TARGET_S = 0.01\n",
+    "STEPS = 250 if IS_CUDA else (120 if IS_MPS else 40)\n",
+    "BLOCK_SIZE = 1024\n",
+    "BATCH_SIZE = 1\n",
+    "GRAD_ACCUM = 4\n",
+    "PRIOR_SIGMA = 0.01\n",
+    "\n",
+    "root = Path.cwd()\n",
+    "out_dir = Path('outputs/PI/demo_run')\n",
+    "fig_dir = Path('assets/figures')\n",
+    "out_dir.mkdir(parents=True, exist_ok=True)\n",
+    "fig_dir.mkdir(parents=True, exist_ok=True)\n",
+    "print('Outputs ->', out_dir.resolve())\n",
+    "print('Figures ->', fig_dir.resolve())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 4) Load base model + optional adapter\n",
+    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
+    "base_id = 'meta-llama/Llama-3.2-1B' if MODEL_SIZE == '1B' else 'meta-llama/Llama-3.2-3B'\n",
+    "if MODEL_SIZE == '3B':\n",
+    "    print('Note: 3B may OOM on Colab T4; prefer 1B for demo.')\n",
+    "\n",
+    "try:\n",
+    "    tok = AutoTokenizer.from_pretrained(base_id, use_fast=True)\n",
+    "    if tok.pad_token is None:\n",
+    "        tok.pad_token = tok.eos_token\n",
+    "    if IS_CUDA and bnb_config is not None:\n",
+    "        model = AutoModelForCausalLM.from_pretrained(\n",
+    "            base_id, quantization_config=bnb_config, device_map='auto'\n",
+    "        )\n",
+    "    else:\n",
+    "        model = AutoModelForCausalLM.from_pretrained(\n",
+    "            base_id, torch_dtype=torch.float32, device_map='auto' if not IS_CPU else None\n",
+    "        )\n",
+    "    model.config.pad_token_id = tok.pad_token_id\n",
+    "    try:\n",
+    "        model.config.use_cache = False\n",
+    "    except Exception:\n",
+    "        pass\n",
+    "    model.eval()\n",
+    "    total_params = sum(p.numel() for p in model.parameters())/1e6\n",
+    "    print(f'Loaded base: {base_id} | params: {total_params:.1f}M')\n",
+    "    print('LoRA adapters: none loaded')\n",
+    "except Exception as e:\n",
+    "    print('ERROR: Could not load base model/tokenizer.\n\\n'\n",
+    "          'This model is gated. Ensure you are logged in to Hugging Face '\n",
+    "          'and have accepted the license terms for Llama 3.2.\n\\n'\n",
+    "          f'Visit: https://huggingface.co/{base_id}', sep='')\n",
+    "    print('Original error:', repr(e))\n",
+    "    model = None\n",
+    "    tok = None\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 5) Quick generation sanity\n",
+    "def generate_text(prompt, max_new_tokens=64):\n",
+    "    if model is None or tok is None:\n",
+    "        return '[model not available]'\n",
+    "    inputs = tok(prompt, return_tensors='pt').to(model.device)\n",
+    "    with torch.no_grad():\n",
+    "        out = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False, \n",
+    "                            pad_token_id=tok.pad_token_id, eos_token_id=tok.eos_token_id)\n",
+    "    return tok.decode(out[0], skip_special_tokens=True)\n",
+    "\n",
+    "for p in [\n",
+    "    'Explain Shannon Control Unit (SCU) in one paragraph.',\n",
+    "    'Write a haiku about control loops in AI.',\n",
+    "    'List three practical uses of LoRA adapters.'\n",
+    "]:\n",
+    "    print('\n--- Prompt ---')\n",
+    "    print(p)\n",
+    "    print('\n--- Output ---')\n",
+    "    print(generate_text(p))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 6) Metrics utilities\n",
+    "import math\n",
+    "\n",
+    "def calculate_bpt(model, text, tok, max_len=512):\n",
+    "    enc = tok(text, return_tensors='pt', truncation=True, max_length=max_len)\n",
+    "    enc = {k: v.to(model.device) for k, v in enc.items()}\n",
+    "    labels = enc['input_ids'].clone()\n",
+    "    with torch.no_grad():\n",
+    "        out = model(**enc, labels=labels)\n",
+    "    return out.loss.item() / math.log(2)  # bits per token\n",
+    "\n",
+    "def param_bpt_lora(model, prior_sigma=0.01, tokens_norm=512_000):\n",
+    "    quad = 0.0\n",
+    "    for name, p in model.named_parameters():\n",
+    "        if p.requires_grad and ('lora' in name.lower() or 'lora_' in name.lower()):\n",
+    "            quad += (p.float() ** 2).sum().item()\n",
+    "    nats = quad / (2.0 * (prior_sigma ** 2))\n",
+    "    bits = nats / math.log(2)\n",
+    "    return bits / max(tokens_norm, 1)\n",
+    "\n",
+    "def compute_S(data_bpt, param_bpt):\n",
+    "    return param_bpt / max(data_bpt + param_bpt, 1e-12)\n",
+    "\n",
+    "def bpt_to_ppl(bpt):\n",
+    "    return 2.0 ** bpt\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 7) Reproduce validation (Base vs SCU)\n",
+    "from peft import PeftModel\n",
+    "import pandas as pd\n",
+    "\n",
+    "def load_val_texts():\n",
+    "    # Prefer data/val.txt if present, else small built-in list\n",
+    "    path = Path('data/val.txt')\n",
+    "    if path.exists():\n",
+    "        return [line.strip() for line in path.read_text().splitlines() if line.strip()][:25]\n",
+    "    return [\n",
+    "        'Quantum error correction protects information from decoherence and noise.',\n",
+    "        'The SCU adjusts regularization strength to track a target parameter ratio.',\n",
+    "        'LoRA adapters enable efficient fine-tuning of large language models.',\n",
+    "        'Perplexity is an exponential function of bits per token.',\n",
+    "        'PI control uses proportional and integral action to reduce steady-state error.',\n",
+    "        'Evaluation on held-out documents ensures generalization beyond training.'\n",
+    "    ]\n",
+    "\n",
+    "def try_load_adapter_into(model):\n",
+    "    # 1) Local demo adapter if exists\n",
+    "    local = out_dir\n",
+    "    if (local / 'adapter_config.json').exists():\n",
+    "        print(f'Loading local adapter: {local}')\n",
+    "        return PeftModel.from_pretrained(model, local, is_trainable=False)\n",
+    "    # 2) Published adapter (if available)\n",
+    "    for repo_id in ['hunterbown/shannon-control-unit']:\n",
+    "        try:\n",
+    "            print(f'Trying to load adapter from HF: {repo_id}')\n",
+    "            return PeftModel.from_pretrained(model, repo_id, is_trainable=False)\n",
+    "        except Exception as e:\n",
+    "            print(f'Could not load {repo_id}:', repr(e))\n",
+    "    return None\n",
+    "\n",
+    "val_texts = load_val_texts()\n",
+    "print(f'Validation texts: {len(val_texts)}')\n",
+    "\n",
+    "base_bpts = []\n",
+    "if model is not None and tok is not None:\n",
+    "    for t in val_texts:\n",
+    "        try:\n",
+    "            base_bpts.append(calculate_bpt(model, t, tok))\n",
+    "        except Exception as e:\n",
+    "            print('Eval error on base model:', repr(e))\n",
+    "            break\n",
+    "\n",
+    "adapter = None\n",
+    "scu_bpts = []\n",
+    "param_bpt = None\n",
+    "if model is not None and tok is not None:\n",
+    "    try:\n",
+    "        adapter = try_load_adapter_into(model)\n",
+    "    except Exception as e:\n",
+    "        print('Adapter load error:', repr(e))\n",
+    "\n",
+    "    if adapter is not None:\n",
+    "        adapter.eval()\n",
+    "        # Evaluate with adapter\n",
+    "        for t in val_texts:\n",
+    "            try:\n",
+    "                scu_bpts.append(calculate_bpt(adapter, t, tok))\n",
+    "            except Exception as e:\n",
+    "                print('Eval error with adapter:', repr(e))\n",
+    "                break\n",
+    "        # ParamBPT for LoRA\n",
+    "        try:\n",
+    "            param_bpt = param_bpt_lora(adapter, prior_sigma=PRIOR_SIGMA, tokens_norm=512_000)\n",
+    "        except Exception as e:\n",
+    "            print('ParamBPT error:', repr(e))\n",
+    "\n",
+    "def summarize_rows(base_bpts, scu_bpts, param_bpt):\n",
+    "    rows = []\n",
+    "    if base_bpts:\n",
+    "        bbpt = float(np.mean(base_bpts))\n",
+    "        rows.append(['Base', bbpt, np.nan, 0.0, bpt_to_ppl(bbpt)])\n",
+    "    if scu_bpts:\n",
+    "        sbpt = float(np.mean(scu_bpts))\n",
+    "        pb = float(param_bpt) if param_bpt is not None else np.nan\n",
+    "        S = compute_S(sbpt, pb) if pb == pb else np.nan\n",
+    "        rows.append(['SCU', sbpt, pb, S, bpt_to_ppl(sbpt)])\n",
+    "    return pd.DataFrame(rows, columns=['Model', 'DataBPT', 'ParamBPT', 'S', 'PPL'])\n",
+    "\n",
+    "df_val = summarize_rows(base_bpts, scu_bpts, param_bpt)\n",
+    "if not df_val.empty:\n",
+    "    with pd.option_context('display.precision', 4):\n",
+    "        print(df_val)\n",
+    "    if len(df_val) == 2:\n",
+    "        delta_bpt = df_val.loc[0, 'DataBPT'] - df_val.loc[1, 'DataBPT']\n",
+    "        base_ppl = df_val.loc[0, 'PPL']\n",
+    "        scu_ppl = df_val.loc[1, 'PPL']\n",
+    "        ppl_drop_pct = 100.0 * (base_ppl - scu_ppl) / max(base_ppl, 1e-9)\n",
+    "        print(f"ΔBPT = {delta_bpt:.3f} | PPL drop ≈ {ppl_drop_pct:.1f}%")\n",
+    "else:\n",
+    "    print('Validation skipped (model or adapter unavailable).')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 8) Control demonstration (training run)\n",
+    "import shlex, platform, time\n",
+    "\n",
+    "def find_upwards(rel_path, max_up=3):\n",
+    "    p = Path(rel_path)\n",
+    "    if p.exists():\n",
+    "        return p\n",
+    "    cur = Path.cwd()\n",
+    "    for _ in range(max_up):\n",
+    "        cand = cur / rel_path\n",
+    "        if cand.exists():\n",
+    "            return cand\n",
+    "        cur = cur.parent\n",
+    "    return None\n",
+    "\n",
+    "script_path = find_upwards('scripts/train_scu.py')\n",
+    "print('Trainer script:', script_path)\n",
+    "\n",
+    "log_csv = out_dir / 'train_log.csv'\n",
+    "metadata_json = out_dir / 'metadata.json'\n",
+    "\n",
+    "should_train = IS_CUDA or IS_MPS\n",
+    "if not should_train:\n",
+    "    print('CPU detected: skipping training. Will simulate control log if needed.')\n",
+    "\n",
+    "if should_train and script_path and script_path.exists():\n",
+    "    base_flag = 'meta-llama/Llama-3.2-1B' if MODEL_SIZE == '1B' else 'meta-llama/Llama-3.2-3B'\n",
+    "    cmd = f\"{sys.executable} {script_path} --base_model {base_flag} --adapter_out {out_dir} \\\n",
+    "          --steps {STEPS} --batch_size {BATCH_SIZE} --gradient_accumulation_steps {GRAD_ACCUM} \\\n",
+    "          --block_size {BLOCK_SIZE} --prior_sigma {PRIOR_SIGMA} \\\n",
+    "          --target_s {TARGET_S} --kp 0.8 --ki 0.15 --log_csv {log_csv} --train_data data/train.txt\"\n",
+    "    print('Launching trainer:')\n",
+    "    print(cmd)\n",
+    "    try:\n",
+    "        subprocess.run(shlex.split(cmd), check=True)\n",
+    "    except subprocess.CalledProcessError as e:\n",
+    "        print('Training failed:', e)\n",
+    "        print('Falling back to simulated log.')\n",
+    "\n",
+    "# If log missing (CPU or failure), create a toy control log so plots exist\n",
+    "if not log_csv.exists():\n",
+    "    import pandas as pd\n",
+    "    import numpy as np\n",
+    "    steps = 120 if IS_MPS else (250 if IS_CUDA else 80)\n",
+    "    xs = np.arange(steps)\n",
+    "    # Toy S(t): first-order approach to TARGET_S with small noise\n",
+    "    S = TARGET_S + 0.3*TARGET_S*np.exp(-xs/25.0) * np.cos(xs/10.0) + 0.02*TARGET_S*np.random.default_rng(42).normal(size=steps)\n",
+    "    lam = np.clip(1.0 + 2.0*np.exp(-xs/35.0), 1e-4, 10.0)\n",
+    "    data_bpt = 3.9 - 0.0015*xs + 0.02*np.random.default_rng(0).normal(size=steps)\n",
+    "    param_bpt = S * np.maximum(data_bpt + 1e-6, 1e-6) / np.maximum(1 - S, 1e-6)\n",
+    "    df_sim = pd.DataFrame({\n",
+    "        'step': xs, 'data_bpt': data_bpt, 'param_bpt': param_bpt,\n",
+    "        'S': S, 'lambda': lam, 'I': np.cumsum(S - TARGET_S)*0.001, 'wall_time_s': xs * 0.5\n",
+    "    })\n",
+    "    out_dir.mkdir(parents=True, exist_ok=True)\n",
+    "    df_sim.to_csv(log_csv, index=False)\n",
+    "    with open(metadata_json, 'w') as f:\n",
+    "        json.dump({'target_s': float(TARGET_S)}, f)\n",
+    "\n",
+    "# Show the tail of the log\n",
+    "import pandas as pd\n",
+    "if log_csv.exists():\n",
+    "    df_tail = pd.read_csv(log_csv).tail(8)\n",
+    "    print(df_tail)\n",
+    "else:\n",
+    "    print('No training log found at', log_csv)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 9) Plot S(t) & λ(t)\n",
+    "import pandas as pd, matplotlib.pyplot as plt, numpy as np\n",
+    "from pathlib import Path\n",
+    "log_path = Path(out_dir) / 'train_log.csv'\n",
+    "df = pd.read_csv(log_path)\n",
+    "meta_path = Path(out_dir) / 'metadata.json'\n",
+    "if meta_path.exists():\n",
+    "    metadata = json.loads(meta_path.read_text())\n",
+    "    S_target = 100.0 * float(metadata.get('target_s', TARGET_S))\n",
+    "else:\n",
+    "    S_target = 100.0 * float(TARGET_S)\n",
+    "\n",
+    "# S(t) with target band\n",
+    "plt.figure(figsize=(10,6), dpi=200)\n",
+    "plt.plot(df['step'], 100.0*df['S'], label='S(t)')\n",
+    "band = 0.2\n",
+    "plt.axhspan(S_target - band, S_target + band, alpha=0.15, color='tab:blue')\n",
+    "plt.xlabel('Step'); plt.ylabel('S (%)'); plt.title('S(t) tracking')\n",
+    "plt.legend(loc='best')\n",
+    "fig_dir.mkdir(parents=True, exist_ok=True)\n",
+    "plt.tight_layout(); plt.savefig(fig_dir / 's_curve.png')\n",
+    "plt.show()\n",
+    "\n",
+    "# λ(t) log-y\n",
+    "plt.figure(figsize=(10,6), dpi=200)\n",
+    "plt.semilogy(df['step'], df['lambda'], label='λ(t)')\n",
+    "plt.xlabel('Step'); plt.ylabel('λ'); plt.title('λ(t) bounded (log scale)')\n",
+    "plt.legend(loc='best')\n",
+    "plt.tight_layout(); plt.savefig(fig_dir / 'lambda_curve.png')\n",
+    "plt.show()\n",
+    "\n",
+    "# Settling time and steady-state error\n",
+    "S_pct = 100.0 * df['S'].values\n",
+    "steps = df['step'].values.astype(int)\n",
+    "lower, upper = S_target - band, S_target + band\n",
+    "settle_idx = None\n",
+    "window = 25\n",
+    "for i in range(len(S_pct) - window):\n",
+    "    seg = S_pct[i:i+window]\n",
+    "    if np.all((seg >= lower) & (seg <= upper)):\n",
+    "        settle_idx = int(steps[i])\n",
+    "        break\n",
+    "if settle_idx is None:\n",
+    "    print('Settling time: not settled within band')\n",
+    "else:\n",
+    "    print('Settling time (first in-band for ≥25 steps):', settle_idx)\n",
+    "# Steady-state error over last 20%\n",
+    "cut = int(0.8 * len(S_pct))\n",
+    "ss_err = float(np.mean(np.abs(S_pct[cut:] - S_target)))\n",
+    "print(f'Steady-state |S−S*| over last 20%: {ss_err:.3f} pp')\n",
+    "print('Saved figures:', fig_dir / 's_curve.png', '|', fig_dir / 'lambda_curve.png')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 10) Minimal ablations (optional/short)\n",
+    "import shlex\n",
+    "abl_script = Path('scripts/run_ablation.py')\n",
+    "if abl_script.exists():\n",
+    "    small_steps = 80 if IS_CUDA else (40 if IS_MPS else 10)\n",
+    "    base_flag = 'meta-llama/Llama-3.2-1B' if MODEL_SIZE == '1B' else 'meta-llama/Llama-3.2-3B'\n",
+    "    try:\n",
+    "        print('Running fixed-λ ablation (short) ...')\n",
+    "        cmd = f\"{sys.executable} {abl_script} --mode fixed-lambda --steps {small_steps} --batch_size 1 --base_model {base_flag} --output figures/ablations_fixed_lambda.md\"\n",
+    "        subprocess.run(shlex.split(cmd), check=True)\n",
+    "        print('Running target-sweep ablation (short) ...')\n",
+    "        cmd = f\"{sys.executable} {abl_script} --mode target-sweep --steps {small_steps} --batch_size 1 --base_model {base_flag} --output figures/ablations_target_sweep.md\"\n",
+    "        subprocess.run(shlex.split(cmd), check=True)\n",
+    "        # Summarize a couple results if present\n",
+    "        summ_rows = []\n",
+    "        for p in Path('ablations').rglob('eval_results.json'):\n",
+    "            try:\n",
+    "                d = json.loads(p.read_text())\n",
+    "                summ_rows.append({'path': str(p.parent), 'scu_bpt': d.get('scu_bpt'), 'delta_bpt': d.get('delta_bpt')})\n",
+    "            except Exception:\n",
+    "                pass\n",
+    "        if summ_rows:\n",
+    "            df_summ = pd.DataFrame(summ_rows)\n",
+    "            print(df_summ.head(10))\n",
+    "        else:\n",
+    "            print('Ablation summaries not found (may be gated or skipped).')\n",
+    "    except Exception as e:\n",
+    "        print('Ablations skipped:', repr(e))\n",
+    "        print('Hint: try running locally with more memory if needed.')\n",
+    "else:\n",
+    "    print('No ablation script found; skipping.')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 11) Export figures & links\n",
+    "- Saved: `assets/figures/s_curve.png`\n",
+    "- Saved: `assets/figures/lambda_curve.png`\n",
+    "- Site URLs (if hosting the repo website):\n",
+    "  - `/assets/figures/s_curve.png`\n",
+    "  - `/assets/figures/lambda_curve.png`\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Zip and download figures if on Colab\n",
+    "import os, shutil\n",
+    "if 'COLAB_RELEASE_TAGS' in os.environ or 'COLAB_GPU' in os.environ:\n",
+    "    shutil.make_archive('figures', 'zip', root_dir='assets', base_dir='figures')\n",
+    "    try:\n",
+    "        from google.colab import files\n",
+    "        files.download('figures.zip')\n",
+    "    except Exception:\n",
+    "        print('Zip created at figures.zip')\n",
+    "else:\n",
+    "    print('Not running on Colab; skipping download.')\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 12) Troubleshooting\n",
+    "- MPS OOM: use `batch_size=1`, `gradient_accumulation_steps=4`, `block_size=1024`, enable gradient checkpointing, and set `model.config.use_cache=False`.\n",
+    "- CUDA path: ensure `bitsandbytes` installed; on A100/V100 you can try fp16 instead of 4-bit if memory allows.\n",
+    "- HF access: accept the Meta Llama 3.2 license and login via `huggingface_hub.login()`.\n",
+    "- CPU mode: training is disabled; the notebook will still evaluate (if models load) and will simulate control logs to emit figures.\n",
+    "\n",
+    "Note: Adapters inherit Meta Llama 3.2 license; SCU code Apache-2.0. U.S. patent pending (provisional filed Sep 2025).\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.x"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}