jjsprockel
/

medgemma27b-luad-qlora

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "0c2f74ec",
+   "metadata": {},
+   "source": [
+    "\n",
+    "# 🩺 MedGemma-27B (QLoRA) — Inference Notebook (Colab, A100 80GB)\n",
+    "This notebook loads the base **`google/medgemma-27b-it`** model and your **QLoRA adapter** **`jjsprockel/medgemma27b-luad-qlora`** to predict **subtipos de adenocarcinoma de pulmón** a partir de una imagen H&E.\n",
+    "\n",
+    "**Requisitos recomendados en Colab Pro/Pro+:**\n",
+    "- **GPU:** A100 **80 GB** (Runtime → Change runtime type → GPU → A100; luego *Reconnect*).\n",
+    "- **Python:** 3.10+  \n",
+    "- **Transformers:** 4.44+\n",
+    "\n",
+    "> Nota: Si el repo de Hugging Face es privado, inicia sesión con tu token en la celda correspondiente.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99aaec3f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "import torch, platform, sys, subprocess, os\n",
+    "\n",
+    "print(\"Python:\", sys.version)\n",
+    "print(\"PyTorch:\", torch.__version__)\n",
+    "print(\"CUDA available:\", torch.cuda.is_available())\n",
+    "if torch.cuda.is_available():\n",
+    "    print(\"GPU name:\", torch.cuda.get_device_name(0))\n",
+    "    print(\"Total VRAM (GB):\", round(torch.cuda.get_device_properties(0).total_memory / 1e9, 2))\n",
+    "\n",
+    "# Strongly suggest A100 80GB\n",
+    "if torch.cuda.is_available():\n",
+    "    vram_gb = torch.cuda.get_device_properties(0).total_memory / (1024**3)\n",
+    "    if vram_gb < 70:\n",
+    "        print(\"\\n[WARNING] Detected <70 GB VRAM. 4-bit quantization is enabled, but you may still hit OOM with very large images.\")\n",
+    "else:\n",
+    "    print(\"[WARNING] No GPU detected. Please switch to a GPU runtime (A100 preferred).\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "05184873",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "%%bash\n",
+    "pip -q install --upgrade pip\n",
+    "pip -q install 'transformers>=4.44.2' 'accelerate>=0.34.2' 'bitsandbytes>=0.43.3' 'peft>=0.12.0'                 'huggingface_hub>=0.24.6' 'safetensors>=0.4.4' 'Pillow' 'torchvision'\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71b17a36",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# OPTIONAL: Only needed if your repos are private.\n",
+    "# from huggingface_hub import login\n",
+    "# login()  # <- paste your HF token when prompted\n",
+    "pass\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f914903",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from transformers import AutoModelForImageTextToText, AutoProcessor, BitsAndBytesConfig\n",
+    "from peft import PeftModel\n",
+    "import torch, io, json, re, requests\n",
+    "from PIL import Image\n",
+    "from typing import Optional\n",
+    "\n",
+    "# ---- IDs ----\n",
+    "BASE_ID = \"google/medgemma-27b-it\"\n",
+    "ADAPTER_ID = \"jjsprockel/medgemma27b-luad-qlora\"\n",
+    "\n",
+    "# ---- Class list ----\n",
+    "SUBTYPES = [\"lepidic\",\"acinar\",\"papillary\",\"micropapillary\",\"solid\",\"invasive mucinous\",\"colloid\",\"fetal\",\"enteric\"]\n",
+    "\n",
+    "# ---- Quantization (4-bit) ----\n",
+    "bnb_cfg = BitsAndBytesConfig(\n",
+    "    load_in_4bit=True,\n",
+    "    bnb_4bit_use_double_quant=True,\n",
+    "    bnb_4bit_quant_type=\"nf4\",\n",
+    "    bnb_4bit_compute_dtype=torch.bfloat16\n",
+    ")\n",
+    "\n",
+    "# ---- Load base and adapter ----\n",
+    "print(\"Loading base model:\", BASE_ID)\n",
+    "base = AutoModelForImageTextToText.from_pretrained(\n",
+    "    BASE_ID,\n",
+    "    quantization_config=bnb_cfg,\n",
+    "    device_map={\"\": \"cuda\"},\n",
+    "    torch_dtype=torch.bfloat16,\n",
+    "    low_cpu_mem_usage=True,\n",
+    ")\n",
+    "\n",
+    "print(\"Attaching adapter:\", ADAPTER_ID)\n",
+    "model = PeftModel.from_pretrained(base, ADAPTER_ID).eval()\n",
+    "processor = AutoProcessor.from_pretrained(BASE_ID)\n",
+    "\n",
+    "# ---- Prompt templates ----\n",
+    "SYSTEM_PROMPT = (\n",
+    "    \"You are an expert pulmonary pathologist. Return ONLY JSON with key 'subtype' strictly from: \"\n",
+    "    + \", \".join(SUBTYPES) + \".\"\n",
+    ")\n",
+    "USER_PROMPT = \"Predict the subtype for this H&E lung adenocarcinoma patch. Only JSON.\"\n",
+    "\n",
+    "def load_image_from_url(url: str) -> Image.Image:\n",
+    "    r = requests.get(url, timeout=30)\n",
+    "    r.raise_for_status()\n",
+    "    return Image.open(io.BytesIO(r.content)).convert(\"RGB\")\n",
+    "\n",
+    "def load_image_from_path(path: str) -> Image.Image:\n",
+    "    return Image.open(path).convert(\"RGB\")\n",
+    "\n",
+    "def run_inference(img: Image.Image, max_new_tokens: int = 32) -> str:\n",
+    "    messages = [\n",
+    "        {\"role\":\"system\",\"content\":[{\"type\":\"text\",\"text\":SYSTEM_PROMPT}]},\n",
+    "        {\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":USER_PROMPT},{\"type\":\"image\",\"image\":img}]}\n",
+    "    ]\n",
+    "    templ = processor.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)\n",
+    "    enc = processor(text=templ, images=img, return_tensors=\"pt\")\n",
+    "    inputs = {\n",
+    "        \"input_ids\":      enc[\"input_ids\"].to(model.device),\n",
+    "        \"attention_mask\": enc[\"attention_mask\"].to(model.device),\n",
+    "        \"pixel_values\":   enc[\"pixel_values\"].to(model.device, dtype=torch.bfloat16),\n",
+    "    }\n",
+    "    with torch.inference_mode(), torch.amp.autocast(\"cuda\", dtype=torch.bfloat16):\n",
+    "        out = model.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)[0]\n",
+    "    gen = out[inputs[\"input_ids\"].shape[-1]:]\n",
+    "    decoded = processor.decode(gen, skip_special_tokens=True)\n",
+    "    return decoded\n",
+    "\n",
+    "def try_parse_json(s: str) -> Optional[dict]:\n",
+    "    # Extract a JSON-looking object if extra tokens sneak in\n",
+    "    m = re.search(r'\\{.*\\}', s, flags=re.DOTALL)\n",
+    "    if m:\n",
+    "        try:\n",
+    "            return json.loads(m.group(0))\n",
+    "        except Exception:\n",
+    "            return None\n",
+    "    return None\n",
+    "\n",
+    "print(\"Ready ✅\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ddd98a70",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# ===== Option A: Load image from URL =====\n",
+    "IMAGE_URL = \"\"  # <-- Paste a direct image URL here (e.g. a PNG/JPG of H&E patch)\n",
+    "img = None\n",
+    "if IMAGE_URL:\n",
+    "    img = load_image_from_url(IMAGE_URL)\n",
+    "    display(img)\n",
+    "\n",
+    "# ===== Option B: Upload from your computer (Colab UI) =====\n",
+    "if img is None:\n",
+    "    try:\n",
+    "        from google.colab import files  # type: ignore\n",
+    "        up = files.upload()\n",
+    "        assert len(up) > 0, \"No file was uploaded.\"\n",
+    "        fname = list(up.keys())[0]\n",
+    "        img = load_image_from_path(fname)\n",
+    "        display(img)\n",
+    "    except Exception as e:\n",
+    "        raise SystemExit(f\"Please provide a valid IMAGE_URL or upload an image. Error: {e}\")\n",
+    "\n",
+    "# ---- Run inference ----\n",
+    "raw = run_inference(img, max_new_tokens=32)\n",
+    "print(\"\\nRaw model output:\")\n",
+    "print(raw)\n",
+    "\n",
+    "maybe = try_parse_json(raw)\n",
+    "if maybe and isinstance(maybe, dict) and \"subtype\" in maybe:\n",
+    "    print(\"\\nParsed JSON:\")\n",
+    "    print(json.dumps(maybe, indent=2))\n",
+    "    print(\"\\nPredicted subtype:\", maybe.get(\"subtype\"))\n",
+    "else:\n",
+    "    print(\"\\n[WARNING] Could not parse a clean JSON payload. Review the raw output above.\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "57e74a19",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# (Optional) Batch inference from a list of URLs.\n",
+    "URLS = [\n",
+    "    # \"https://example.org/patch1.jpg\",\n",
+    "    # \"https://example.org/patch2.png\",\n",
+    "]\n",
+    "\n",
+    "results = []\n",
+    "for url in URLS:\n",
+    "    try:\n",
+    "        im = load_image_from_url(url)\n",
+    "        out = run_inference(im)\n",
+    "        parsed = try_parse_json(out) or {\"raw\": out}\n",
+    "        results.append({\"url\": url, **parsed})\n",
+    "        print(f\"[OK] {url} ->\", parsed)\n",
+    "    except Exception as e:\n",
+    "        print(f\"[ERROR] {url}: {e}\")\n",
+    "\n",
+    "# If you want to save results to JSON:\n",
+    "# import json, time\n",
+    "# ts = int(time.time())\n",
+    "# with open(f\"batch_results_{ts}.json\", \"w\") as f:\n",
+    "#     json.dump(results, f, indent=2)\n"
+   ]
+  }
+ ],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 5
+}