Spaces:

ArchCoder
/

medintake-ai

Sleeping

App Files Files Community

priyansh-saxena1 commited on Apr 26

Commit

f538014

1 Parent(s): c9ecd03

fix: ROS hallucination guard + debug logging

Browse files

Files changed (1) hide show

clinical_ai_agent_fixed.ipynb +1132 -0

clinical_ai_agent_fixed.ipynb ADDED Viewed

	@@ -0,0 +1,1132 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": [],
+      "gpuType": "T4"
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    },
+    "accelerator": "GPU"
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "source": [
+        "%%bash\n",
+        "set -euo pipefail\n",
+        "\n",
+        "echo '════════════════════════════════════════════'\n",
+        "echo ' CELL 1 — System tools + Install Ollama'\n",
+        "echo '════════════════════════════════════════════'\n",
+        "\n",
+        "apt-get update -qq\n",
+        "apt-get install -y zstd pciutils curl 2>&1 | tail -3\n",
+        "echo '[OK] apt packages installed'\n",
+        "\n",
+        "npm install -g localtunnel 2>&1 | tail -2\n",
+        "echo \"[OK] localtunnel $(lt --version)\"\n",
+        "\n",
+        "echo ''\n",
+        "echo '── Installing Ollama ──'\n",
+        "curl -fsSL https://ollama.com/install.sh | sh\n",
+        "ollama --version\n",
+        "\n",
+        "echo ''\n",
+        "echo '[DONE] Cell 1 complete'"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "uxsL2dxR0kMA",
+        "outputId": "d17a4773-e25a-48a3-9336-781913fc1f6c"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "════════════════════════════════════════════\n",
+            " CELL 1 — System tools + Install Ollama\n",
+            "════════════════════════════════════════════\n",
+            "\r\n",
+            "/sbin/ldconfig.real: /usr/local/lib/libur_adapter_level_zero.so.0 is not a symbolic link\r\n",
+            "\r\n",
+            "[OK] apt packages installed\n",
+            "npm notice To update run: npm install -g npm@11.13.0\n",
+            "npm notice\n",
+            "[OK] localtunnel 2.0.2\n",
+            "\n",
+            "── Installing Ollama ──\n",
+            "\u001b[1m\u001b[31mWARNING:\u001b[m systemd is not running\n",
+            "Warning: could not connect to a running Ollama instance\n",
+            "Warning: client version is 0.21.2\n",
+            "\n",
+            "[DONE] Cell 1 complete\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\n",
+            ">>> Installing ollama to /usr/local\n",
+            ">>> Downloading ollama-linux-amd64.tar.zst\n",
+            "#=#=#                                                                         \r##O#-#                                                                        \r##O=#  #                                                                      \r#=#=-#  #                                                                     \r\r                                                                           0.1%\r                                                                           0.3%\r                                                                           0.6%\r                                                                           1.0%\r#                                                                          1.4%\r#                                                                          2.0%\r#                                                                          2.6%\r##                                                                         3.2%\r##                                                                         3.7%\r##                                                                         3.9%\r##                                                                         4.1%\r###                                                                        4.2%\r###                                                                        4.4%\r###                                                                        4.6%\r###                                                                        4.7%\r###                                                                        4.9%\r###                                                                        5.1%\r###                                                                        5.3%\r###                                                                        5.5%\r####                                                                       5.6%\r####                                                                       5.8%\r####                                                                       6.0%\r####                                                                       6.2%\r####                                                                       6.4%\r####                                                                       6.6%\r####                                                                       6.7%\r####                                                                       6.9%\r#####                                                                      7.0%\r#####                                                                      7.3%\r#####                                                                      7.5%\r#####                                                                      7.7%\r#####                                                                      7.9%\r#####                                                                      8.1%\r#####                                                                      8.2%\r######                                                                     8.4%\r######                                                                     8.6%\r######                                                                     8.8%\r######                                                                     9.0%\r######                                                                     9.2%\r######                                                                     9.4%\r######                                                                     9.6%\r#######                                                                    9.8%\r#######                                                                   10.1%\r#######                                                                   10.3%\r#######                                                                   10.5%\r#######                                                                   10.7%\r#######                                                                   10.9%\r#######                                                                   11.1%\r########                                                                  11.3%\r########                                                                  11.5%\r########                                                                  11.7%\r########                                                                  11.9%\r########                                                                  12.1%\r########                                                                  12.3%\r########                                                                  12.4%\r#########                                                                 12.5%\r#########                                                                 12.6%\r#########                                                                 12.8%\r#########                                                                 13.0%\r#########                                                                 13.2%\r#########                                                                 13.4%\r#########                                                                 13.5%\r#########                                                                 13.9%\r##########                                                                14.1%\r##########                                                                14.4%\r##########                                                                14.7%\r##########                                                                15.0%\r###########                                                               15.5%\r###########                                                               15.9%\r###########                                                               16.2%\r###########                                                               16.6%\r############                                                              16.9%\r############                                                              17.5%\r############                                                              17.9%\r#############                                                             18.5%\r#############                                                             19.2%\r##############                                                            19.6%\r##############                                                            19.8%\r##############                                                            20.1%\r##############                                                            20.5%\r###############                                                           20.9%\r###############                                                           21.6%\r###############                                                           21.8%\r###############                                                           21.9%\r###############                                                           21.9%\r###############                                                           22.0%\r###############                                                           22.1%\r################                                                          23.2%\r#################                                                         24.6%\r##################                                                        25.8%\r###################                                                       26.5%\r###################                                                       26.8%\r###################                                                       27.1%\r####################                                                      27.9%\r####################                                                      29.0%\r#####################                                                     30.2%\r######################                                                    31.3%\r######################                                                    31.5%\r######################                                                    31.7%\r######################                                                    31.8%\r#######################                                                   32.0%\r#######################                                                   32.5%\r#######################                                                   32.9%\r#######################                                                   33.3%\r########################                                                  34.5%\r#########################                                                 35.6%\r##########################                                                36.4%\r##########################                                                36.7%\r##########################                                                37.5%\r###########################                                               38.5%\r############################                                              39.5%\r#############################                                             40.6%\r#############################                                             41.5%\r##############################                                            42.2%\r##############################                                            42.6%\r##############################                                            42.8%\r###############################                                           43.3%\r###############################                                           43.5%\r###############################                                           43.8%\r###############################                                           44.2%\r################################                                          45.1%\r#################################                                         46.1%\r#################################                                         47.0%\r##################################                                        48.0%\r##################################                                        48.4%\r###################################                                       48.8%\r###################################                                       49.3%\r###################################                                       49.8%\r####################################                                      50.6%\r####################################                                      51.3%\r#####################################                                     51.8%\r#####################################                                     52.4%\r######################################                                    52.9%\r######################################                                    53.4%\r######################################                                    53.9%\r#######################################                                   54.6%\r#######################################                                   55.5%\r########################################                                  56.4%\r#########################################                                 57.0%\r#########################################                                 57.2%\r#########################################                                 57.3%\r#########################################                                 57.5%\r#########################################                                 57.7%\r#########################################                                 57.9%\r#########################################                                 58.2%\r##########################################                                58.4%\r##########################################                                58.5%\r##########################################                                58.6%\r##########################################                                58.7%\r##########################################                                58.8%\r##########################################                                59.2%\r##########################################                                59.5%\r###########################################                               59.8%\r###########################################                               60.3%\r############################################                              61.1%\r############################################                              62.1%\r#############################################                             62.9%\r#############################################                             63.3%\r#############################################                             63.6%\r#############################################                             63.8%\r##############################################                            64.0%\r##############################################                            64.2%\r##############################################                            64.4%\r##############################################                            64.6%\r##############################################                            65.0%\r###############################################                           65.3%\r###############################################                           66.0%\r###############################################                           66.6%\r################################################                          67.8%\r#################################################                         68.9%\r##################################################                        70.2%\r###################################################                       71.4%\r###################################################                       71.6%\r###################################################                       71.8%\r####################################################                      72.8%\r#####################################################                     73.9%\r#####################################################                     75.0%\r######################################################                    76.0%\r#######################################################                   76.9%\r########################################################                  77.9%\r########################################################                  79.0%\r#########################################################                 79.9%\r#########################################################                 80.2%\r#########################################################                 80.5%\r##########################################################                80.8%\r##########################################################                81.2%\r##########################################################                81.5%\r##########################################################                81.7%\r###########################################################               82.1%\r###########################################################               82.8%\r############################################################              83.7%\r############################################################              84.0%\r############################################################              84.7%\r#############################################################             85.5%\r##############################################################            86.4%\r##############################################################            87.1%\r###############################################################           87.5%\r###############################################################           88.3%\r################################################################          89.4%\r#################################################################         90.7%\r##################################################################        92.0%\r###################################################################       93.4%\r####################################################################      94.8%\r####################################################################      95.7%\r#####################################################################     97.2%\r######################################################################    98.4%\r#######################################################################   98.6%\r#######################################################################   98.8%\r#######################################################################   99.1%\r#######################################################################   99.3%\r#######################################################################   99.5%\r#######################################################################   99.8%\r#######################################################################   99.9%\r######################################################################## 100.0%\n",
+            ">>> Creating ollama user...\n",
+            ">>> Adding ollama user to video group...\n",
+            ">>> Adding current user to ollama group...\n",
+            ">>> Creating ollama systemd service...\n",
+            ">>> NVIDIA GPU installed.\n",
+            ">>> The Ollama API is now available at 127.0.0.1:11434.\n",
+            ">>> Install complete. Run \"ollama\" from the command line.\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import subprocess, os, glob, time, requests\n",
+        "\n",
+        "print(\"════════════════════════════════════════════\")\n",
+        "print(\" CELL 2 — GPU Fix + Ollama GPU Start\")\n",
+        "print(\"════════════════════════════════════════════\")\n",
+        "\n",
+        "subprocess.run([\"pkill\", \"-f\", \"ollama\"], capture_output=True)\n",
+        "time.sleep(2)\n",
+        "print(\"[OK] Killed any stale Ollama process\")\n",
+        "\n",
+        "print(\"\\n── Discovering NVIDIA/CUDA libs ──\")\n",
+        "r = subprocess.run(\"find /usr/lib64-nvidia /usr/local/cuda* -name 'libcuda.so*' 2>/dev/null\",\n",
+        "                   shell=True, capture_output=True, text=True)\n",
+        "print(r.stdout.strip() or \"WARNING: No libcuda found!\")\n",
+        "\n",
+        "ollama_lib = \"/usr/local/lib/ollama\"\n",
+        "print(f\"\\n── Ollama lib dir: {ollama_lib} ──\")\n",
+        "r2 = subprocess.run(f\"ls {ollama_lib}/\", shell=True, capture_output=True, text=True)\n",
+        "print(r2.stdout.strip())\n",
+        "\n",
+        "nvidia_dir = \"/usr/lib64-nvidia\"\n",
+        "cuda_dir   = \"/usr/local/cuda/lib64\"\n",
+        "cuda128    = \"/usr/local/cuda-12.8/targets/x86_64-linux/lib\"\n",
+        "\n",
+        "needed = {\n",
+        "    \"libcuda.so\":         [f\"{nvidia_dir}/libcuda.so\",      f\"{nvidia_dir}/libcuda.so.1\"],\n",
+        "    \"libcuda.so.1\":       [f\"{nvidia_dir}/libcuda.so.1\"],\n",
+        "    \"libnvidia-ml.so.1\":  [f\"{nvidia_dir}/libnvidia-ml.so.1\"],\n",
+        "    \"libnvidia-ml.so\":    [f\"{nvidia_dir}/libnvidia-ml.so.1\"],\n",
+        "    \"libcudart.so.12\":    [f\"{cuda_dir}/libcudart.so.12\",   f\"{cuda128}/libcudart.so.12\"],\n",
+        "}\n",
+        "\n",
+        "print(\"\\n── Creating symlinks ──\")\n",
+        "for dst_name, srcs in needed.items():\n",
+        "    dst = os.path.join(ollama_lib, dst_name)\n",
+        "    if os.path.lexists(dst):\n",
+        "        os.remove(dst)\n",
+        "    for src in srcs:\n",
+        "        if os.path.exists(src):\n",
+        "            os.symlink(src, dst)\n",
+        "            print(f\"  ✅ {src} → {dst}\")\n",
+        "            break\n",
+        "    else:\n",
+        "        print(f\"  ⚠️  MISSING: {dst_name} (no source found)\")\n",
+        "\n",
+        "gpu_env = os.environ.copy()\n",
+        "gpu_env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
+        "gpu_env[\"LD_LIBRARY_PATH\"] = (\n",
+        "    f\"{nvidia_dir}:{ollama_lib}:/usr/local/cuda/lib64:\"\n",
+        "    + os.environ.get(\"LD_LIBRARY_PATH\", \"\")\n",
+        ")\n",
+        "gpu_env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
+        "gpu_env[\"OLLAMA_DEBUG\"]      = \"INFO\"\n",
+        "\n",
+        "print(f\"\\n── Starting Ollama daemon ──\")\n",
+        "print(f\"LD_LIBRARY_PATH starts with: {gpu_env['LD_LIBRARY_PATH'][:80]}...\")\n",
+        "log = open(\"/content/ollama.log\", \"w\")\n",
+        "proc = subprocess.Popen([\"ollama\", \"serve\"], env=gpu_env, stdout=log, stderr=log,\n",
+        "                        preexec_fn=os.setpgrp)\n",
+        "print(f\"[OK] Ollama PID: {proc.pid}\")\n",
+        "\n",
+        "for i in range(25):\n",
+        "    try:\n",
+        "        requests.get(\"http://localhost:11434/api/tags\", timeout=2)\n",
+        "        print(f\"[OK] Ollama API responsive after {i+1}s\")\n",
+        "        break\n",
+        "    except:\n",
+        "        time.sleep(1)\n",
+        "\n",
+        "time.sleep(2)\n",
+        "\n",
+        "print(\"\\n── Ollama startup log (GPU detection lines) ──\")\n",
+        "log_txt = open(\"/content/ollama.log\").read()\n",
+        "for line in log_txt.splitlines():\n",
+        "    kws = [\"gpu\", \"cuda\", \"vram\", \"nvidia\", \"error\", \"warn\", \"discovered\", \"total_vram\"]\n",
+        "    if any(k in line.lower() for k in kws):\n",
+        "        print(line)\n",
+        "\n",
+        "print(\"\\n── Checking model ──\")\n",
+        "tags = requests.get(\"http://localhost:11434/api/tags\").json()\n",
+        "models = [m[\"name\"] for m in tags.get(\"models\", [])]\n",
+        "print(f\"Installed: {models}\")\n",
+        "if not any(\"llama3.1:8b\" in m for m in models):\n",
+        "    print(\"Pulling llama3.1:8b ...\")\n",
+        "    subprocess.run([\"ollama\", \"pull\", \"llama3.1:8b\"], check=True)\n",
+        "    print(\"[OK] Model pulled\")\n",
+        "\n",
+        "print(\"\\n── Warming up model (60-90s first load) ──\")\n",
+        "t0 = time.time()\n",
+        "r3 = requests.post(\"http://localhost:11434/api/chat\", json={\n",
+        "    \"model\": \"llama3.1:8b\",\n",
+        "    \"messages\": [{\"role\": \"user\", \"content\": \"Reply with the word READY only.\"}],\n",
+        "    \"stream\": False,\n",
+        "    \"options\": {\"temperature\": 0, \"num_predict\": 5}\n",
+        "}, timeout=(10, 300))\n",
+        "elapsed = time.time() - t0\n",
+        "d = r3.json()\n",
+        "load_s = d.get(\"load_duration\", 0) / 1e9\n",
+        "eval_s = d.get(\"eval_duration\", 0) / 1e9\n",
+        "reply  = d[\"message\"][\"content\"].strip()\n",
+        "print(f\"Reply   : {reply}\")\n",
+        "print(f\"Total   : {elapsed:.1f}s  |  Load: {load_s:.1f}s  |  Eval: {eval_s:.1f}s\")\n",
+        "if eval_s > 0:\n",
+        "    tps = d.get(\"eval_count\", 0) / eval_s\n",
+        "    print(f\"Speed   : {tps:.1f} tok/s  {'← GPU (>30 t/s)' if tps > 30 else '← CPU (<10 t/s typical)'}\")\n",
+        "\n",
+        "print(\"\\n── ollama ps ──\")\n",
+        "ps = subprocess.run([\"ollama\", \"ps\"], capture_output=True, text=True)\n",
+        "print(ps.stdout)\n",
+        "\n",
+        "print(\"── nvidia-smi ──\")\n",
+        "smi = subprocess.run(\n",
+        "    [\"nvidia-smi\", \"--query-gpu=name,memory.used,memory.total\", \"--format=csv,noheader\"],\n",
+        "    capture_output=True, text=True)\n",
+        "print(smi.stdout)\n",
+        "\n",
+        "used_mib = int(smi.stdout.split(\",\")[1].strip().split()[0]) if smi.stdout else 0\n",
+        "if used_mib > 4000:\n",
+        "    print(f\"✅ GPU confirmed — {used_mib} MiB used\")\n",
+        "elif \"GPU\" in ps.stdout:\n",
+        "    print(f\"✅ ollama ps shows GPU\")\n",
+        "else:\n",
+        "    print(f\"⚠️  Still on CPU ({used_mib} MiB). Check log lines above for CUDA errors.\")\n",
+        "\n",
+        "print(\"\\n[DONE] Cell 2 complete\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "nK5UZG3F0k7Q",
+        "outputId": "56840451-6c1d-47c5-f024-320f1c777462"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "════════════════════════════════════════════\n",
+            " CELL 2 — GPU Fix + Ollama GPU Start\n",
+            "════════════════════════════════════════════\n",
+            "[OK] Killed any stale Ollama process\n",
+            "\n",
+            "── Discovering NVIDIA/CUDA libs ──\n",
+            "/usr/lib64-nvidia/libcuda.so.1\n",
+            "/usr/lib64-nvidia/libcuda.so.580.82.07\n",
+            "/usr/lib64-nvidia/libcuda.so\n",
+            "/usr/local/cuda-12.8/compat/libcuda.so.570.124.06\n",
+            "/usr/local/cuda-12.8/compat/libcuda.so.1\n",
+            "/usr/local/cuda-12.8/compat/libcuda.so\n",
+            "/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs/libcuda.so\n",
+            "\n",
+            "── Ollama lib dir: /usr/local/lib/ollama ──\n",
+            "cuda_v12\n",
+            "cuda_v13\n",
+            "include\n",
+            "libggml-base.so\n",
+            "libggml-base.so.0\n",
+            "libggml-base.so.0.0.0\n",
+            "libggml-cpu-alderlake.so\n",
+            "libggml-cpu-haswell.so\n",
+            "libggml-cpu-icelake.so\n",
+            "libggml-cpu-sandybridge.so\n",
+            "libggml-cpu-skylakex.so\n",
+            "libggml-cpu-sse42.so\n",
+            "libggml-cpu-x64.so\n",
+            "mlx_cuda_v13\n",
+            "vulkan\n",
+            "\n",
+            "── Creating symlinks ──\n",
+            "  ✅ /usr/lib64-nvidia/libcuda.so → /usr/local/lib/ollama/libcuda.so\n",
+            "  ✅ /usr/lib64-nvidia/libcuda.so.1 → /usr/local/lib/ollama/libcuda.so.1\n",
+            "  ✅ /usr/lib64-nvidia/libnvidia-ml.so.1 → /usr/local/lib/ollama/libnvidia-ml.so.1\n",
+            "  ✅ /usr/lib64-nvidia/libnvidia-ml.so.1 → /usr/local/lib/ollama/libnvidia-ml.so\n",
+            "  ✅ /usr/local/cuda/lib64/libcudart.so.12 → /usr/local/lib/ollama/libcudart.so.12\n",
+            "\n",
+            "── Starting Ollama daemon ──\n",
+            "LD_LIBRARY_PATH starts with: /usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64:/usr/lib64-nvidia...\n",
+            "[OK] Ollama PID: 15538\n",
+            "[OK] Ollama API responsive after 2s\n",
+            "\n",
+            "── Ollama startup log (GPU detection lines) ──\n",
+            "time=2026-04-25T22:47:53.770Z level=INFO source=routes.go:1752 msg=\"server config\" env=\"map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_DEBUG_LOG_REQUESTS:false OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_KEEP_ALIVE:2h0m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]\"\n",
+            "time=2026-04-25T22:47:53.772Z level=INFO source=runner.go:67 msg=\"discovering available GPUs...\"\n",
+            "time=2026-04-25T22:47:54.618Z level=INFO source=types.go:42 msg=\"inference compute\" id=GPU-0779169f-d299-340e-42ee-14fb3ed34faf filter_id=\"\" library=CUDA compute=7.5 name=CUDA0 description=\"Tesla T4\" libdirs=ollama,cuda_v13 driver=13.0 pci_id=0000:00:04.0 type=discrete total=\"15.0 GiB\" available=\"14.6 GiB\"\n",
+            "time=2026-04-25T22:47:54.618Z level=INFO source=routes.go:1860 msg=\"vram-based default context\" total_vram=\"15.0 GiB\" default_num_ctx=4096\n",
+            "\n",
+            "── Checking model ──\n",
+            "Installed: []\n",
+            "Pulling llama3.1:8b ...\n",
+            "[OK] Model pulled\n",
+            "\n",
+            "── Warming up model (60-90s first load) ──\n",
+            "Reply   : READY\n",
+            "Total   : 96.8s  |  Load: 96.7s  |  Eval: 0.0s\n",
+            "Speed   : 58.5 tok/s  ← GPU (>30 t/s)\n",
+            "\n",
+            "── ollama ps ──\n",
+            "NAME           ID              SIZE      PROCESSOR    CONTEXT    UNTIL            \n",
+            "llama3.1:8b    46e0c10c039e    5.5 GB    100% GPU     4096       2 hours from now    \n",
+            "\n",
+            "── nvidia-smi ──\n",
+            "Tesla T4, 5367 MiB, 15360 MiB\n",
+            "\n",
+            "✅ GPU confirmed — 5367 MiB used\n",
+            "\n",
+            "[DONE] Cell 2 complete\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "%%bash\n",
+        "set -euo pipefail\n",
+        "echo '════════════════════════════════════════════'\n",
+        "echo ' CELL 3 — Clone + Patch'\n",
+        "echo '════════════════════════════════════════════'\n",
+        "cd /content\n",
+        "rm -rf medintake-ai\n",
+        "git clone https://github.com/priyansh-saxena1/medintake-ai.git\n",
+        "cd medintake-ai\n",
+        "echo \"[OK] commit: $(git rev-parse --short HEAD)\"\n",
+        "pip install -r requirements.txt 2>&1 | tail -4\n",
+        "echo '[OK] pip done'\n",
+        "\n",
+        "python3 << 'PYEOF'\n",
+        "import pathlib\n",
+        "p   = pathlib.Path('/content/medintake-ai/app/llm.py')\n",
+        "src = p.read_text()\n",
+        "changed = False\n",
+        "\n",
+        "patches = [\n",
+        "    (\"PATCH 1 timeout\",\n",
+        "     'requests.post(self.api_url, json=payload, timeout=60)',\n",
+        "     'requests.post(self.api_url, json=payload, timeout=(10, 300))'),\n",
+        "    (\"PATCH 2 OLLAMA_HOST\",\n",
+        "     'self.api_url = \"http://localhost:11434/api/chat\"',\n",
+        "     'self.api_url = os.environ.get(\"OLLAMA_HOST\",\"http://localhost:11434\") + \"/api/chat\"'),\n",
+        "    (\"PATCH 3 MODEL_NAME default\",\n",
+        "     'self.model_name = os.environ.get(\"MODEL_NAME\", \"qwen2.5:0.5b\")',\n",
+        "     'self.model_name = os.environ.get(\"MODEL_NAME\", \"llama3.1:8b\")'),\n",
+        "    (\"PATCH 4 response key\",\n",
+        "     'raw = data.get(\"response\", \"\")',\n",
+        "     'raw = data.get(\"message\", {}).get(\"content\", \"\")'),\n",
+        "]\n",
+        "\n",
+        "for name, old, new in patches:\n",
+        "    if old in src:\n",
+        "        src = src.replace(old, new, 1)\n",
+        "        changed = True\n",
+        "        print(f\"[APPLIED] {name}\")\n",
+        "    elif new in src:\n",
+        "        print(f\"[SKIP]    {name}\")\n",
+        "    else:\n",
+        "        print(f\"[WARN]    {name} target not found\")\n",
+        "\n",
+        "if changed:\n",
+        "    p.write_text(src)\n",
+        "    print(\"[OK] llm.py saved\")\n",
+        "\n",
+        "import py_compile\n",
+        "py_compile.compile(str(p), doraise=True)\n",
+        "print(\"[OK] syntax valid\")\n",
+        "PYEOF\n",
+        "\n",
+        "echo ''\n",
+        "echo '── Tests (MockLLM) ──'\n",
+        "cd /content/medintake-ai\n",
+        "MOCK_LLM=true python3 -m pytest tests/ -v --tb=short 2>&1\n",
+        "echo '[DONE] Cell 3'\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "jXLQSpFj0n8a",
+        "outputId": "10292259-f6c0-4b07-a1a5-e3d9c474afc9"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "════════════════════════════════════════════\n",
+            " CELL 3 — Clone + Patch\n",
+            "════════════════════════════════════════════\n",
+            "[OK] commit: eb1b955\n",
+            "Requirement already satisfied: orjson>=3.11.5 in /usr/local/lib/python3.12/dist-packages (from langgraph-sdk<0.4.0,>=0.3.0->langgraph->-r requirements.txt (line 1)) (3.11.8)\n",
+            "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.12/dist-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (3.1.1)\n",
+            "Requirement already satisfied: requests-toolbelt>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from langsmith<1.0.0,>=0.3.45->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (1.0.0)\n",
+            "Requirement already satisfied: zstandard>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from langsmith<1.0.0,>=0.3.45->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (0.25.0)\n",
+            "[OK] pip done\n",
+            "[APPLIED] PATCH 1 timeout\n",
+            "[APPLIED] PATCH 2 OLLAMA_HOST\n",
+            "[APPLIED] PATCH 3 MODEL_NAME default\n",
+            "[SKIP]    PATCH 4 response key\n",
+            "[OK] llm.py saved\n",
+            "[OK] syntax valid\n",
+            "\n",
+            "── Tests (MockLLM) ──\n",
+            "============================= test session starts ==============================\n",
+            "platform linux -- Python 3.12.13, pytest-8.4.2, pluggy-1.6.0 -- /usr/bin/python3\n",
+            "cachedir: .pytest_cache\n",
+            "rootdir: /content/medintake-ai\n",
+            "configfile: pytest.ini\n",
+            "plugins: asyncio-1.3.0, langsmith-0.7.30, typeguard-4.5.1, anyio-4.13.0\n",
+            "asyncio: mode=Mode.AUTO, debug=False, asyncio_default_fixture_loop_scope=function, asyncio_default_test_loop_scope=function\n",
+            "collecting ... collected 11 items\n",
+            "\n",
+            "tests/test_e2e.py::test_mock_llm_combined_call_basic_extraction PASSED   [  9%]\n",
+            "tests/test_e2e.py::test_mock_llm_emergency_detection PASSED              [ 18%]\n",
+            "tests/test_e2e.py::test_mock_llm_does_not_repeat_filled_questions PASSED [ 27%]\n",
+            "tests/test_e2e.py::test_mock_llm_severity_extraction PASSED              [ 36%]\n",
+            "tests/test_e2e.py::test_mock_llm_ros_extraction PASSED                   [ 45%]\n",
+            "tests/test_e2e.py::test_mock_llm_speed PASSED                            [ 54%]\n",
+            "tests/test_e2e.py::test_combined_output_schema_round_trip PASSED         [ 63%]\n",
+            "tests/test_e2e.py::test_health_endpoint PASSED                           [ 72%]\n",
+            "tests/test_e2e.py::test_emergency_triage_node PASSED                     [ 81%]\n",
+            "tests/test_e2e.py::test_full_intake_multi_turn_extraction PASSED         [ 90%]\n",
+            "tests/test_e2e.py::test_api_response_time PASSED                         [100%]\n",
+            "\n",
+            "============================== 11 passed in 0.71s ==============================\n",
+            "[DONE] Cell 3\n"
+          ]
+        },
+        {
+          "output_type": "stream",
+          "name": "stderr",
+          "text": [
+            "Cloning into 'medintake-ai'...\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Auto-heal Ollama before each turn\n",
+        "import requests, subprocess, os, time\n",
+        "def ensure_ollama():\n",
+        "    try:\n",
+        "        requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
+        "    except: pass\n",
+        "    print(\"⚠️ Ollama dead — restarting...\")\n",
+        "    env = os.environ.copy()\n",
+        "    env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
+        "    env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
+        "    env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
+        "    subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
+        "                     stdout=open(\"/content/ollama.log\",\"a\"),\n",
+        "                     stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
+        "    for _ in range(30):\n",
+        "        try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
+        "        except: time.sleep(1)\n",
+        "\n",
+        "ensure_ollama()\n",
+        "\n",
+        "import sys, os, json, time, subprocess, requests\n",
+        "sys.path.insert(0, \"/content/medintake-ai\")\n",
+        "os.environ[\"MOCK_LLM\"]    = \"false\"\n",
+        "os.environ[\"MODEL_NAME\"]  = \"llama3.1:8b\"\n",
+        "os.environ[\"OLLAMA_HOST\"] = \"http://localhost:11434\"\n",
+        "\n",
+        "print(\"════════════════════════════════════════════\")\n",
+        "print(\" CELL 4 — Deep LLM Dataflow Debug\")\n",
+        "print(\"════════════════════════════════════════════\")\n",
+        "\n",
+        "# A: Hardware\n",
+        "print(\"\\n── A: Hardware status ──\")\n",
+        "ps  = subprocess.run([\"ollama\",\"ps\"], capture_output=True, text=True)\n",
+        "smi = subprocess.run([\"nvidia-smi\",\"--query-gpu=name,memory.used,memory.total\",\n",
+        "                      \"--format=csv,noheader\"], capture_output=True, text=True)\n",
+        "print(\"ollama ps :\", ps.stdout.strip())\n",
+        "print(\"nvidia-smi:\", smi.stdout.strip())\n",
+        "\n",
+        "# B: App env + object init\n",
+        "print(\"\\n── B: App LLM object ──\")\n",
+        "from app.llm import OllamaLLM, CombinedOutput, COMBINED_SYSTEM_PROMPT\n",
+        "llm = OllamaLLM()\n",
+        "print(f\"model_name : {llm.model_name}\")\n",
+        "print(f\"api_url    : {llm.api_url}\")\n",
+        "\n",
+        "# C: Prompt construction\n",
+        "print(\"\\n── C: System prompt ──\")\n",
+        "print(COMBINED_SYSTEM_PROMPT)\n",
+        "\n",
+        "transcript  = \"Patient: I have chest pain\"\n",
+        "currentjson = CombinedOutput().model_dump_json()\n",
+        "prompt = (\n",
+        "    f\"CURRENT CLINICAL STATE:\\n{currentjson}\\n\\n\"\n",
+        "    f\"FULL CONVERSATION TRANSCRIPT:\\n{transcript}\\n\\n\"\n",
+        "    \"Instructions: Extract all new clinical facts, merge into state, \"\n",
+        "    \"generate ONE empathetic follow-up question. Return ONLY JSON.\"\n",
+        ")\n",
+        "print(\"\\n── D: User prompt ──\")\n",
+        "print(prompt)\n",
+        "\n",
+        "# D: Raw HTTP\n",
+        "print(\"\\n── E: Raw Ollama HTTP call ──\")\n",
+        "payload = {\n",
+        "    \"model\": llm.model_name,\n",
+        "    \"messages\": [\n",
+        "        {\"role\":\"system\",\"content\": COMBINED_SYSTEM_PROMPT},\n",
+        "        {\"role\":\"user\",  \"content\": prompt}\n",
+        "    ],\n",
+        "    \"format\": \"json\",\n",
+        "    \"stream\": False,\n",
+        "    \"options\": {\"temperature\": 0.0, \"num_predict\": 300}\n",
+        "}\n",
+        "\n",
+        "t0   = time.time()\n",
+        "resp = requests.post(llm.api_url, json=payload, timeout=(10,300))\n",
+        "elapsed = time.time() - t0\n",
+        "full = resp.json()\n",
+        "\n",
+        "load_s = full.get(\"load_duration\",0) / 1e9\n",
+        "eval_s = full.get(\"eval_duration\",1) / 1e9\n",
+        "tps    = full.get(\"eval_count\",0) / eval_s\n",
+        "\n",
+        "print(f\"HTTP status   : {resp.status_code}\")\n",
+        "print(f\"Total time    : {elapsed:.2f}s\")\n",
+        "print(f\"Load duration : {load_s:.2f}s   {'GPU (fast)' if load_s < 1 else 'CPU (slow)'}\")\n",
+        "print(f\"Tokens/sec    : {tps:.1f}        {'GPU (>30)' if tps > 30 else 'CPU (<15)'}\")\n",
+        "raw = full.get(\"message\",{}).get(\"content\",\"\").strip()\n",
+        "print(f\"\\nRaw content:\\n{raw}\")\n",
+        "\n",
+        "# E: Parsing\n",
+        "print(\"\\n── F: JSON parse ──\")\n",
+        "try:\n",
+        "    parsed = json.loads(raw)\n",
+        "    print(\"json.loads() OK\")\n",
+        "    REQUIRED = {\"chief_complaint\",\"onset\",\"location\",\"duration\",\n",
+        "                \"character\",\"severity\",\"aggravating\",\"relieving\",\"ros\",\"reply\"}\n",
+        "    missing_k = REQUIRED - set(parsed.keys())\n",
+        "    extra_k   = set(parsed.keys()) - REQUIRED\n",
+        "    print(f\"Missing keys : {missing_k or 'none'}\")\n",
+        "    print(f\"Extra keys   : {extra_k or 'none'}\")\n",
+        "    print(json.dumps(parsed, indent=2))\n",
+        "except Exception as e:\n",
+        "    print(f\"FAILED: {e}\")\n",
+        "\n",
+        "# F: Full pipeline\n",
+        "print(\"\\n── G: Full app pipeline ──\")\n",
+        "result = llm.combined_call(transcript, currentjson)\n",
+        "print(\"CombinedOutput:\")\n",
+        "print(json.dumps(result.model_dump(), indent=2))\n",
+        "\n",
+        "from app.graph import compute_stage, missing_from\n",
+        "stage   = compute_stage(result)\n",
+        "missing = missing_from(result)\n",
+        "print(f\"\\nStage   : {stage}\")\n",
+        "print(f\"Missing : {missing}\")\n",
+        "print(f\"Reply   : '{result.reply}'\")\n",
+        "\n",
+        "FALLBACK = {\"\", \"Could you tell me more?\", \"Could you please repeat that?\"}\n",
+        "if result.reply in FALLBACK:\n",
+        "    print(\"\\nWARNING: FALLBACK REPLY — LLM output failed silently!\")\n",
+        "    print(\"Check logs below:\")\n",
+        "    print(open(\"/content/ollama.log\").read()[-2000:])\n",
+        "else:\n",
+        "    print(\"\\nOK: Real LLM reply returned\")\n",
+        "\n",
+        "print(\"[DONE] Cell 4\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "6oX-ZDYB0pJ3",
+        "outputId": "66521e8e-e829-44ff-e4dd-d836f600ba0e"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "════════════════════════════════════════════\n",
+            " CELL 4 — Deep LLM Dataflow Debug\n",
+            "════════════════════════════════════════════\n",
+            "\n",
+            "── A: Hardware status ──\n",
+            "ollama ps : NAME           ID              SIZE      PROCESSOR    CONTEXT    UNTIL            \n",
+            "llama3.1:8b    46e0c10c039e    5.5 GB    100% GPU     4096       2 hours from now\n",
+            "nvidia-smi: Tesla T4, 5367 MiB, 15360 MiB\n",
+            "\n",
+            "── B: App LLM object ──\n",
+            "model_name : llama3.1:8b\n",
+            "api_url    : http://localhost:11434/api/chat\n",
+            "\n",
+            "── C: System prompt ──\n",
+            "You are a clinical intake assistant AI. You have two jobs per turn:\n",
+            "\n",
+            "JOB 1 (EXTRACT): Read the FULL conversation and update the clinical JSON state with any new information the patient provided. Only extract facts explicitly stated.\n",
+            "\n",
+            "JOB 2 (RESPOND): Based on what is STILL MISSING from the clinical state, ask the patient ONE natural, empathetic question. Do NOT ask about things already filled in.\n",
+            "\n",
+            "CRITICAL RULES:\n",
+            "- Output ONLY valid JSON, nothing else.\n",
+            "- Do NOT diagnose or give medical advice.\n",
+            "- Do NOT ask more than one question.\n",
+            "- If all fields are complete, set reply to \"Thank you — I have everything I need.\"\n",
+            "\n",
+            "OUTPUT FORMAT (strictly follow this, no extra text):\n",
+            "{\n",
+            "  \"chief_complaint\": \"...\",\n",
+            "  \"onset\": \"...\",\n",
+            "  \"location\": \"...\",\n",
+            "  \"duration\": \"...\",\n",
+            "  \"character\": \"...\",\n",
+            "  \"severity\": \"...\",\n",
+            "  \"aggravating\": \"...\",\n",
+            "  \"relieving\": \"...\",\n",
+            "  \"ros\": {\"system_name\": [\"finding1\", \"finding2\"]},\n",
+            "  \"reply\": \"The single question to ask the patient next\"\n",
+            "}\n",
+            "\n",
+            "Use null for any field not yet known. Keep existing values if the patient didn't add new info.\n",
+            "\n",
+            "── D: User prompt ──\n",
+            "CURRENT CLINICAL STATE:\n",
+            "{\"chief_complaint\":null,\"onset\":null,\"location\":null,\"duration\":null,\"character\":null,\"severity\":null,\"aggravating\":null,\"relieving\":null,\"ros\":{},\"emergency\":false,\"reply\":\"\"}\n",
+            "\n",
+            "FULL CONVERSATION TRANSCRIPT:\n",
+            "Patient: I have chest pain\n",
+            "\n",
+            "Instructions: Extract all new clinical facts, merge into state, generate ONE empathetic follow-up question. Return ONLY JSON.\n",
+            "\n",
+            "── E: Raw Ollama HTTP call ──\n",
+            "HTTP status   : 200\n",
+            "Total time    : 5.61s\n",
+            "Load duration : 0.20s   GPU (fast)\n",
+            "Tokens/sec    : 45.2        GPU (>30)\n",
+            "\n",
+            "Raw content:\n",
+            "{\n",
+            "  \"chief_complaint\": \"chest pain\",\n",
+            "  \"onset\": null,\n",
+            "  \"location\": null,\n",
+            "  \"duration\": null,\n",
+            "  \"character\": null,\n",
+            "  \"severity\": null,\n",
+            "  \"aggravating\": null,\n",
+            "  \"relieving\": null,\n",
+            "  \"ros\": {},\n",
+            "  \"emergency\": false,\n",
+            "  \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
+            "}\n",
+            "\n",
+            "── F: JSON parse ──\n",
+            "json.loads() OK\n",
+            "Missing keys : none\n",
+            "Extra keys   : {'emergency'}\n",
+            "{\n",
+            "  \"chief_complaint\": \"chest pain\",\n",
+            "  \"onset\": null,\n",
+            "  \"location\": null,\n",
+            "  \"duration\": null,\n",
+            "  \"character\": null,\n",
+            "  \"severity\": null,\n",
+            "  \"aggravating\": null,\n",
+            "  \"relieving\": null,\n",
+            "  \"ros\": {},\n",
+            "  \"emergency\": false,\n",
+            "  \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
+            "}\n",
+            "\n",
+            "── G: Full app pipeline ──\n",
+            "[Ollama] Starting inference for model 'llama3.1:8b'...\n",
+            "[Ollama] Inference completed in 5.23s total.\n",
+            "CombinedOutput:\n",
+            "{\n",
+            "  \"chief_complaint\": \"chest pain\",\n",
+            "  \"onset\": null,\n",
+            "  \"location\": null,\n",
+            "  \"duration\": null,\n",
+            "  \"character\": null,\n",
+            "  \"severity\": null,\n",
+            "  \"aggravating\": null,\n",
+            "  \"relieving\": null,\n",
+            "  \"ros\": {},\n",
+            "  \"emergency\": false,\n",
+            "  \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
+            "}\n",
+            "\n",
+            "Stage   : hpi\n",
+            "Missing : ['HPI:onset', 'HPI:location', 'HPI:duration', 'HPI:character', 'HPI:severity', 'HPI:aggravating', 'HPI:relieving', 'ROS (3 more systems needed)']\n",
+            "Reply   : 'Can you tell me more about when this chest pain started?'\n",
+            "\n",
+            "OK: Real LLM reply returned\n",
+            "[DONE] Cell 4\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Auto-heal Ollama before each turn\n",
+        "import requests, subprocess, os, time\n",
+        "def ensure_ollama():\n",
+        "    try:\n",
+        "        requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
+        "    except: pass\n",
+        "    print(\"⚠️ Ollama dead — restarting...\")\n",
+        "    env = os.environ.copy()\n",
+        "    env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
+        "    env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
+        "    env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
+        "    subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
+        "                     stdout=open(\"/content/ollama.log\",\"a\"),\n",
+        "                     stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
+        "    for _ in range(30):\n",
+        "        try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
+        "        except: time.sleep(1)\n",
+        "\n",
+        "ensure_ollama()\n",
+        "\n",
+        "import subprocess, time, requests, os\n",
+        "\n",
+        "subprocess.run([\"pkill\", \"-f\", \"uvicorn\"], capture_output=True)\n",
+        "time.sleep(2)\n",
+        "\n",
+        "env = os.environ.copy()\n",
+        "env[\"MOCK_LLM\"]    = \"false\"\n",
+        "env[\"MODEL_NAME\"]  = \"llama3.1:8b\"\n",
+        "env[\"OLLAMA_HOST\"] = \"http://localhost:11434\"\n",
+        "\n",
+        "log  = open(\"/content/api.log\", \"w\")\n",
+        "proc = subprocess.Popen(\n",
+        "    [\"python\", \"-m\", \"uvicorn\", \"app.main:app\",\n",
+        "     \"--host\", \"0.0.0.0\", \"--port\", \"7860\", \"--log-level\", \"info\"],\n",
+        "    cwd=\"/content/medintake-ai\",\n",
+        "    env=env, stdout=log, stderr=log,\n",
+        "    preexec_fn=os.setpgrp\n",
+        ")\n",
+        "print(f\"uvicorn PID: {proc.pid}\")\n",
+        "\n",
+        "for i in range(20):\n",
+        "    try:\n",
+        "        r = requests.get(\"http://localhost:7860/health\", timeout=2)\n",
+        "        if r.status_code == 200:\n",
+        "            d = r.json()\n",
+        "            print(f\"✅ FastAPI ready after {i+1}s\")\n",
+        "            print(f\"   mock_mode = {d.get('mock_mode')}  ← must be False\")\n",
+        "            break\n",
+        "    except: pass\n",
+        "    print(f\"  ...{i+1}s\")\n",
+        "    time.sleep(1)\n",
+        "else:\n",
+        "    print(\"❌ Failed — dumping api.log:\")\n",
+        "    print(open(\"/content/api.log\").read()[-2000:])"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "qhgapTbx0qRi",
+        "outputId": "886d3a8c-928e-4051-926a-878534769de1"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "uvicorn PID: 19612\n",
+            "  ...1s\n",
+            "  ...2s\n",
+            "✅ FastAPI ready after 3s\n",
+            "   mock_mode = False  ← must be False\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "# Auto-heal Ollama before each turn\n",
+        "import requests, subprocess, os, time\n",
+        "def ensure_ollama():\n",
+        "    try:\n",
+        "        requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
+        "    except: pass\n",
+        "    print(\"⚠️ Ollama dead — restarting...\")\n",
+        "    env = os.environ.copy()\n",
+        "    env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
+        "    env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
+        "    env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
+        "    subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
+        "                     stdout=open(\"/content/ollama.log\",\"a\"),\n",
+        "                     stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
+        "    for _ in range(30):\n",
+        "        try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
+        "        except: time.sleep(1)\n",
+        "\n",
+        "ensure_ollama()\n",
+        "\n",
+        "import subprocess, time, requests\n",
+        "\n",
+        "PUBLIC_IP = requests.get(\"https://ipv4.icanhazip.com\", timeout=5).text.strip()\n",
+        "print(f\"Tunnel password: {PUBLIC_IP}\")\n",
+        "print(\"Starting tunnel...\")\n",
+        "\n",
+        "# Start lt as background process, capture output to file\n",
+        "tunnel_log = open(\"/content/tunnel.log\", \"w\")\n",
+        "proc = subprocess.Popen(\n",
+        "    [\"lt\", \"--port\", \"7860\"],\n",
+        "    stdout=tunnel_log, stderr=tunnel_log,\n",
+        "    preexec_fn=__import__(\"os\").setpgrp\n",
+        ")\n",
+        "print(f\"Tunnel PID: {proc.pid}\")\n",
+        "\n",
+        "# Wait for URL to appear in log\n",
+        "for i in range(15):\n",
+        "    time.sleep(1)\n",
+        "    try:\n",
+        "        txt = open(\"/content/tunnel.log\").read()\n",
+        "        if \"loca.lt\" in txt or \"https://\" in txt:\n",
+        "            for line in txt.splitlines():\n",
+        "                if \"https://\" in line:\n",
+        "                    print(f\"\\n🌐 PUBLIC URL: {line.strip()}\")\n",
+        "            break\n",
+        "    except: pass\n",
+        "    print(f\"  ...waiting for URL {i+1}s\")\n",
+        "else:\n",
+        "    print(\"⚠️  URL not found yet — run: !cat /content/tunnel.log\")\n",
+        "\n",
+        "print(\"\\n✅ Cell 5B done — proceed to Cell 6\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "ub55lE6d3LMA",
+        "outputId": "9aac6f5a-e022-493b-868c-c5fd5e425297"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Tunnel password: 34.87.70.249\n",
+            "Starting tunnel...\n",
+            "Tunnel PID: 19630\n",
+            "  ...waiting for URL 1s\n",
+            "\n",
+            "🌐 PUBLIC URL: your url is: https://proud-bears-drum.loca.lt\n",
+            "\n",
+            "✅ Cell 5B done — proceed to Cell 6\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\n",
+        "# Auto-heal Ollama before each turn\n",
+        "import requests, subprocess, os, time\n",
+        "def ensure_ollama():\n",
+        "    try:\n",
+        "        requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
+        "    except: pass\n",
+        "    print(\"⚠️ Ollama dead — restarting...\")\n",
+        "    env = os.environ.copy()\n",
+        "    env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
+        "    env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
+        "    env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
+        "    subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
+        "                     stdout=open(\"/content/ollama.log\",\"a\"),\n",
+        "                     stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
+        "    for _ in range(30):\n",
+        "        try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
+        "        except: time.sleep(1)\n",
+        "\n",
+        "ensure_ollama()\n",
+        "\n",
+        "import requests, json, time, subprocess\n",
+        "\n",
+        "SESSION_ID = \"debug_session_001\"   # keep same across all turns\n",
+        "USER_MSG   = \"I have chest pain\"   # ← change each turn\n",
+        "\n",
+        "print(f\"Session: {SESSION_ID}  |  Message: {USER_MSG}\")\n",
+        "\n",
+        "t0 = time.time()\n",
+        "r  = requests.post(\"http://localhost:7860/chat\",\n",
+        "    json={\"session_id\": SESSION_ID, \"message\": USER_MSG},\n",
+        "    timeout=120)\n",
+        "elapsed = time.time() - t0\n",
+        "\n",
+        "d = r.json()\n",
+        "print(f\"\\nHTTP {r.status_code}  ({elapsed:.1f}s)\")\n",
+        "print(json.dumps(d, indent=2))\n",
+        "print(f\"\\nStage : {d.get('state')}\")\n",
+        "print(f\"Reply : {d.get('reply')}\")\n",
+        "\n",
+        "if d.get(\"brief\"):\n",
+        "    print(\"\\n📋 CLINICAL BRIEF:\")\n",
+        "    print(json.dumps(d[\"brief\"], indent=2))\n",
+        "\n",
+        "FALLBACK = {\"Could you tell me more?\", \"\", None, \"Could you please repeat that?\"}\n",
+        "if d.get(\"reply\") in FALLBACK:\n",
+        "    print(\"\\n⚠️  FALLBACK reply — dumping api.log:\")\n",
+        "    print(open(\"/content/api.log\").read()[-2000:])\n",
+        "\n",
+        "# Quick GPU check\n",
+        "smi = subprocess.run([\"nvidia-smi\",\"--query-gpu=memory.used,memory.total\",\n",
+        "                      \"--format=csv,noheader\"], capture_output=True, text=True)\n",
+        "ps  = subprocess.run([\"ollama\",\"ps\"], capture_output=True, text=True)\n",
+        "print(f\"\\nGPU RAM  : {smi.stdout.strip()}\")\n",
+        "print(f\"ollama ps: {ps.stdout.strip()}\")\n",
+        "print(subprocess.run([\"tail\",\"-n\",\"15\",\"/content/api.log\"],\n",
+        "                     capture_output=True, text=True).stdout)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "4kokp0w50rQQ",
+        "outputId": "a0de78f6-fe97-487b-ed0d-f4ebaa4ed770"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Session: debug_session_001  |  Message: I have chest pain\n",
+            "\n",
+            "HTTP 200  (6.2s)\n",
+            "{\n",
+            "  \"reply\": \"Can you tell me more about when this chest pain started?\",\n",
+            "  \"state\": \"hpi\",\n",
+            "  \"brief\": null\n",
+            "}\n",
+            "\n",
+            "Stage : hpi\n",
+            "Reply : Can you tell me more about when this chest pain started?\n",
+            "\n",
+            "GPU RAM  : 5369 MiB, 15360 MiB\n",
+            "ollama ps: NAME           ID              SIZE      PROCESSOR    CONTEXT    UNTIL            \n",
+            "llama3.1:8b    46e0c10c039e    5.5 GB    100% GPU     4096       2 hours from now\n",
+            "INFO:     Waiting for application startup.\n",
+            "INFO:     Application startup complete.\n",
+            "INFO:     Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)\n",
+            "INFO:     127.0.0.1:49798 - \"GET /health HTTP/1.1\" 200 OK\n",
+            "\n",
+            "[1777158113.511] [API] -> POST /chat received for debug_session_001\n",
+            "[1777158113.512] [API] Read existing state snapshot.\n",
+            "[1777158113.512] [API] Starting new graph invoke...\n",
+            "[1777158113.521] [Graph Node] Requesting LLM inference...\n",
+            "[Ollama] Starting inference for model 'llama3.1:8b'...\n",
+            "[Ollama] Inference completed in 6.17s total.\n",
+            "[1777158119.696] [Graph Node] LLM returned. Preparing node dictionaries...\n",
+            "[1777158119.697] [API] <- Graph invoke returned in 6.19s\n",
+            "[1777158119.698] [API] Chat completed in 6.19s total. Reply length: 56\n",
+            "INFO:     127.0.0.1:49810 - \"POST /chat HTTP/1.1\" 200 OK\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import subprocess\n",
+        "for log in [\"/content/api.log\", \"/content/ollama.log\"]:\n",
+        "    print(f\"\\n{'='*55}\\n {log}\\n{'='*55}\")\n",
+        "    print(subprocess.run([\"tail\",\"-n\",\"40\",log], capture_output=True, text=True).stdout or \"(empty)\")\n"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "1WZOi3Hn0sff",
+        "outputId": "6edc93ea-2f1b-4fb8-c631-bb84032fe1e8"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "\n",
+            "=======================================================\n",
+            " /content/api.log\n",
+            "=======================================================\n",
+            "INFO:     Started server process [19612]\n",
+            "INFO:     Waiting for application startup.\n",
+            "INFO:     Application startup complete.\n",
+            "INFO:     Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)\n",
+            "INFO:     127.0.0.1:49798 - \"GET /health HTTP/1.1\" 200 OK\n",
+            "\n",
+            "[1777158113.511] [API] -> POST /chat received for debug_session_001\n",
+            "[1777158113.512] [API] Read existing state snapshot.\n",
+            "[1777158113.512] [API] Starting new graph invoke...\n",
+            "[1777158113.521] [Graph Node] Requesting LLM inference...\n",
+            "[Ollama] Starting inference for model 'llama3.1:8b'...\n",
+            "[Ollama] Inference completed in 6.17s total.\n",
+            "[1777158119.696] [Graph Node] LLM returned. Preparing node dictionaries...\n",
+            "[1777158119.697] [API] <- Graph invoke returned in 6.19s\n",
+            "[1777158119.698] [API] Chat completed in 6.19s total. Reply length: 56\n",
+            "INFO:     127.0.0.1:49810 - \"POST /chat HTTP/1.1\" 200 OK\n",
+            "\n",
+            "\n",
+            "=======================================================\n",
+            " /content/ollama.log\n",
+            "=======================================================\n",
+            "load_tensors:   CPU_Mapped model buffer size =   281.81 MiB\n",
+            "load_tensors:        CUDA0 model buffer size =  4403.49 MiB\n",
+            "llama_context: constructing llama_context\n",
+            "llama_context: n_seq_max     = 1\n",
+            "llama_context: n_ctx         = 4096\n",
+            "llama_context: n_ctx_seq     = 4096\n",
+            "llama_context: n_batch       = 512\n",
+            "llama_context: n_ubatch      = 512\n",
+            "llama_context: causal_attn   = 1\n",
+            "llama_context: flash_attn    = auto\n",
+            "llama_context: kv_unified    = false\n",
+            "llama_context: freq_base     = 500000.0\n",
+            "llama_context: freq_scale    = 1\n",
+            "llama_context: n_ctx_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized\n",
+            "llama_context:  CUDA_Host  output buffer size =     0.50 MiB\n",
+            "llama_kv_cache:      CUDA0 KV buffer size =   512.00 MiB\n",
+            "llama_kv_cache: size =  512.00 MiB (  4096 cells,  32 layers,  1/1 seqs), K (f16):  256.00 MiB, V (f16):  256.00 MiB\n",
+            "llama_context: Flash Attention was auto, set to enabled\n",
+            "llama_context:      CUDA0 compute buffer size =   258.50 MiB\n",
+            "llama_context:  CUDA_Host compute buffer size =    16.01 MiB\n",
+            "llama_context: graph nodes  = 999\n",
+            "llama_context: graph splits = 2\n",
+            "time=2026-04-25T23:01:03.205Z level=INFO source=server.go:1402 msg=\"llama runner started in 2.68 seconds\"\n",
+            "time=2026-04-25T23:01:03.205Z level=INFO source=sched.go:561 msg=\"loaded runners\" count=1\n",
+            "time=2026-04-25T23:01:03.205Z level=INFO source=server.go:1364 msg=\"waiting for llama runner to start responding\"\n",
+            "time=2026-04-25T23:01:03.206Z level=INFO source=server.go:1402 msg=\"llama runner started in 2.68 seconds\"\n",
+            "[GIN] 2026/04/25 - 23:01:09 | 200 |  9.705078074s |       127.0.0.1 | POST     \"/api/chat\"\n",
+            "[GIN] 2026/04/25 - 23:01:09 | 200 |      59.798µs |       127.0.0.1 | HEAD     \"/\"\n",
+            "[GIN] 2026/04/25 - 23:01:09 | 200 |     129.278µs |       127.0.0.1 | GET      \"/api/ps\"\n",
+            "[GIN] 2026/04/25 - 23:01:36 | 200 |     354.357µs |       127.0.0.1 | GET      \"/api/tags\"\n",
+            "[GIN] 2026/04/25 - 23:01:36 | 200 |      33.021µs |       127.0.0.1 | HEAD     \"/\"\n",
+            "[GIN] 2026/04/25 - 23:01:36 | 200 |      38.107µs |       127.0.0.1 | GET      \"/api/ps\"\n",
+            "[GIN] 2026/04/25 - 23:01:42 | 200 |  5.606222729s |       127.0.0.1 | POST     \"/api/chat\"\n",
+            "[GIN] 2026/04/25 - 23:01:47 | 200 |  5.227727634s |       127.0.0.1 | POST     \"/api/chat\"\n",
+            "[GIN] 2026/04/25 - 23:01:47 | 200 |     510.297µs |       127.0.0.1 | GET      \"/api/tags\"\n",
+            "[GIN] 2026/04/25 - 23:01:51 | 200 |     622.673µs |       127.0.0.1 | GET      \"/api/tags\"\n",
+            "[GIN] 2026/04/25 - 23:01:53 | 200 |     655.176µs |       127.0.0.1 | GET      \"/api/tags\"\n",
+            "[GIN] 2026/04/25 - 23:01:59 | 200 |   6.17069071s |       127.0.0.1 | POST     \"/api/chat\"\n",
+            "[GIN] 2026/04/25 - 23:01:59 | 200 |      35.997µs |       127.0.0.1 | HEAD     \"/\"\n",
+            "[GIN] 2026/04/25 - 23:01:59 | 200 |      38.825µs |       127.0.0.1 | GET      \"/api/ps\"\n",
+            "\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 356
+        },
+        "id": "xxtmukiS11_T",
+        "outputId": "016c6964-4288-4647-ae12-b28b067c2552"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "MessageError",
+          "evalue": "Error: credential propagation was unsuccessful",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mMessageError\u001b[0m                              Traceback (most recent call last)",
+            "\u001b[0;32m/tmp/ipykernel_14360/1408506528.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolab\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdrive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdrive\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/content/drive'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36mmount\u001b[0;34m(mountpoint, force_remount, timeout_ms, readonly)\u001b[0m\n\u001b[1;32m     95\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m120000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreadonly\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     96\u001b[0m   \u001b[0;34m\"\"\"Mount your Google Drive at the specified mountpoint path.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m   return _mount(\n\u001b[0m\u001b[1;32m     98\u001b[0m       \u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     99\u001b[0m       \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mforce_remount\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36m_mount\u001b[0;34m(mountpoint, force_remount, timeout_ms, ephemeral, readonly)\u001b[0m\n\u001b[1;32m    132\u001b[0m   )\n\u001b[1;32m    133\u001b[0m   \u001b[0;32mif\u001b[0m \u001b[0mephemeral\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m     _message.blocking_request(\n\u001b[0m\u001b[1;32m    135\u001b[0m         \u001b[0;34m'request_auth'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    136\u001b[0m         \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'authType'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'dfs_ephemeral'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mblocking_request\u001b[0;34m(request_type, request, timeout_sec, parent)\u001b[0m\n\u001b[1;32m    174\u001b[0m       \u001b[0mrequest_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpect_reply\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    175\u001b[0m   )\n\u001b[0;32m--> 176\u001b[0;31m   \u001b[0;32mreturn\u001b[0m \u001b[0mread_reply_from_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_sec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mread_reply_from_input\u001b[0;34m(message_id, timeout_sec)\u001b[0m\n\u001b[1;32m    101\u001b[0m     ):\n\u001b[1;32m    102\u001b[0m       \u001b[0;32mif\u001b[0m \u001b[0;34m'error'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mreply\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 103\u001b[0;31m         \u001b[0;32mraise\u001b[0m \u001b[0mMessageError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreply\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    104\u001b[0m       \u001b[0;32mreturn\u001b[0m \u001b[0mreply\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+            "\u001b[0;31mMessageError\u001b[0m: Error: credential propagation was unsuccessful"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import subprocess, os, time\n",
+        "\n",
+        "subprocess.run([\"pkill\",\"-f\",\"ollama\"])\n",
+        "time.sleep(3)\n",
+        "\n",
+        "env = os.environ.copy()\n",
+        "env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
+        "\n",
+        "subprocess.Popen([\"ollama\",\"serve\"], env=env)\n",
+        "print(\"Restarted Ollama\")"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "SKcQv5Ng0thH",
+        "outputId": "e3b95518-e432-4669-8fd0-50b86fdb216f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Restarted Ollama\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [],
+      "metadata": {
+        "id": "T51mIyDV1wiD"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}