priyansh-saxena1 commited on
Commit
f538014
·
1 Parent(s): c9ecd03

fix: ROS hallucination guard + debug logging

Browse files
Files changed (1) hide show
  1. clinical_ai_agent_fixed.ipynb +1132 -0
clinical_ai_agent_fixed.ipynb ADDED
@@ -0,0 +1,1132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "nbformat": 4,
3
+ "nbformat_minor": 0,
4
+ "metadata": {
5
+ "colab": {
6
+ "provenance": [],
7
+ "gpuType": "T4"
8
+ },
9
+ "kernelspec": {
10
+ "name": "python3",
11
+ "display_name": "Python 3"
12
+ },
13
+ "language_info": {
14
+ "name": "python"
15
+ },
16
+ "accelerator": "GPU"
17
+ },
18
+ "cells": [
19
+ {
20
+ "cell_type": "code",
21
+ "source": [
22
+ "%%bash\n",
23
+ "set -euo pipefail\n",
24
+ "\n",
25
+ "echo '════════════════════════════════════════════'\n",
26
+ "echo ' CELL 1 — System tools + Install Ollama'\n",
27
+ "echo '════════════════════════════════════════════'\n",
28
+ "\n",
29
+ "apt-get update -qq\n",
30
+ "apt-get install -y zstd pciutils curl 2>&1 | tail -3\n",
31
+ "echo '[OK] apt packages installed'\n",
32
+ "\n",
33
+ "npm install -g localtunnel 2>&1 | tail -2\n",
34
+ "echo \"[OK] localtunnel $(lt --version)\"\n",
35
+ "\n",
36
+ "echo ''\n",
37
+ "echo '── Installing Ollama ──'\n",
38
+ "curl -fsSL https://ollama.com/install.sh | sh\n",
39
+ "ollama --version\n",
40
+ "\n",
41
+ "echo ''\n",
42
+ "echo '[DONE] Cell 1 complete'"
43
+ ],
44
+ "metadata": {
45
+ "colab": {
46
+ "base_uri": "https://localhost:8080/"
47
+ },
48
+ "id": "uxsL2dxR0kMA",
49
+ "outputId": "d17a4773-e25a-48a3-9336-781913fc1f6c"
50
+ },
51
+ "execution_count": null,
52
+ "outputs": [
53
+ {
54
+ "output_type": "stream",
55
+ "name": "stdout",
56
+ "text": [
57
+ "════════════════════════════════════════════\n",
58
+ " CELL 1 — System tools + Install Ollama\n",
59
+ "════════════════════════════════════════════\n",
60
+ "\r\n",
61
+ "/sbin/ldconfig.real: /usr/local/lib/libur_adapter_level_zero.so.0 is not a symbolic link\r\n",
62
+ "\r\n",
63
+ "[OK] apt packages installed\n",
64
+ "npm notice To update run: npm install -g npm@11.13.0\n",
65
+ "npm notice\n",
66
+ "[OK] localtunnel 2.0.2\n",
67
+ "\n",
68
+ "── Installing Ollama ──\n",
69
+ "\u001b[1m\u001b[31mWARNING:\u001b[m systemd is not running\n",
70
+ "Warning: could not connect to a running Ollama instance\n",
71
+ "Warning: client version is 0.21.2\n",
72
+ "\n",
73
+ "[DONE] Cell 1 complete\n"
74
+ ]
75
+ },
76
+ {
77
+ "output_type": "stream",
78
+ "name": "stderr",
79
+ "text": [
80
+ "W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\n",
81
+ ">>> Installing ollama to /usr/local\n",
82
+ ">>> Downloading ollama-linux-amd64.tar.zst\n",
83
+ "#=#=# \r##O#-# \r##O=# # \r#=#=-# # \r\r 0.1%\r 0.3%\r 0.6%\r 1.0%\r# 1.4%\r# 2.0%\r# 2.6%\r## 3.2%\r## 3.7%\r## 3.9%\r## 4.1%\r### 4.2%\r### 4.4%\r### 4.6%\r### 4.7%\r### 4.9%\r### 5.1%\r### 5.3%\r### 5.5%\r#### 5.6%\r#### 5.8%\r#### 6.0%\r#### 6.2%\r#### 6.4%\r#### 6.6%\r#### 6.7%\r#### 6.9%\r##### 7.0%\r##### 7.3%\r##### 7.5%\r##### 7.7%\r##### 7.9%\r##### 8.1%\r##### 8.2%\r###### 8.4%\r###### 8.6%\r###### 8.8%\r###### 9.0%\r###### 9.2%\r###### 9.4%\r###### 9.6%\r####### 9.8%\r####### 10.1%\r####### 10.3%\r####### 10.5%\r####### 10.7%\r####### 10.9%\r####### 11.1%\r######## 11.3%\r######## 11.5%\r######## 11.7%\r######## 11.9%\r######## 12.1%\r######## 12.3%\r######## 12.4%\r######### 12.5%\r######### 12.6%\r######### 12.8%\r######### 13.0%\r######### 13.2%\r######### 13.4%\r######### 13.5%\r######### 13.9%\r########## 14.1%\r########## 14.4%\r########## 14.7%\r########## 15.0%\r########### 15.5%\r########### 15.9%\r########### 16.2%\r########### 16.6%\r############ 16.9%\r############ 17.5%\r############ 17.9%\r############# 18.5%\r############# 19.2%\r############## 19.6%\r############## 19.8%\r############## 20.1%\r############## 20.5%\r############### 20.9%\r############### 21.6%\r############### 21.8%\r############### 21.9%\r############### 21.9%\r############### 22.0%\r############### 22.1%\r################ 23.2%\r################# 24.6%\r################## 25.8%\r################### 26.5%\r################### 26.8%\r################### 27.1%\r#################### 27.9%\r#################### 29.0%\r##################### 30.2%\r###################### 31.3%\r###################### 31.5%\r###################### 31.7%\r###################### 31.8%\r####################### 32.0%\r####################### 32.5%\r####################### 32.9%\r####################### 33.3%\r######################## 34.5%\r######################### 35.6%\r########################## 36.4%\r########################## 36.7%\r########################## 37.5%\r########################### 38.5%\r############################ 39.5%\r############################# 40.6%\r############################# 41.5%\r############################## 42.2%\r############################## 42.6%\r############################## 42.8%\r############################### 43.3%\r############################### 43.5%\r############################### 43.8%\r############################### 44.2%\r################################ 45.1%\r################################# 46.1%\r################################# 47.0%\r################################## 48.0%\r################################## 48.4%\r################################### 48.8%\r################################### 49.3%\r################################### 49.8%\r#################################### 50.6%\r#################################### 51.3%\r##################################### 51.8%\r##################################### 52.4%\r###################################### 52.9%\r###################################### 53.4%\r###################################### 53.9%\r####################################### 54.6%\r####################################### 55.5%\r######################################## 56.4%\r######################################### 57.0%\r######################################### 57.2%\r######################################### 57.3%\r######################################### 57.5%\r######################################### 57.7%\r######################################### 57.9%\r######################################### 58.2%\r########################################## 58.4%\r########################################## 58.5%\r########################################## 58.6%\r########################################## 58.7%\r########################################## 58.8%\r########################################## 59.2%\r########################################## 59.5%\r########################################### 59.8%\r########################################### 60.3%\r############################################ 61.1%\r############################################ 62.1%\r############################################# 62.9%\r############################################# 63.3%\r############################################# 63.6%\r############################################# 63.8%\r############################################## 64.0%\r############################################## 64.2%\r############################################## 64.4%\r############################################## 64.6%\r############################################## 65.0%\r############################################### 65.3%\r############################################### 66.0%\r############################################### 66.6%\r################################################ 67.8%\r################################################# 68.9%\r################################################## 70.2%\r################################################### 71.4%\r################################################### 71.6%\r################################################### 71.8%\r#################################################### 72.8%\r##################################################### 73.9%\r##################################################### 75.0%\r###################################################### 76.0%\r####################################################### 76.9%\r######################################################## 77.9%\r######################################################## 79.0%\r######################################################### 79.9%\r######################################################### 80.2%\r######################################################### 80.5%\r########################################################## 80.8%\r########################################################## 81.2%\r########################################################## 81.5%\r########################################################## 81.7%\r########################################################### 82.1%\r########################################################### 82.8%\r############################################################ 83.7%\r############################################################ 84.0%\r############################################################ 84.7%\r############################################################# 85.5%\r############################################################## 86.4%\r############################################################## 87.1%\r############################################################### 87.5%\r############################################################### 88.3%\r################################################################ 89.4%\r################################################################# 90.7%\r################################################################## 92.0%\r################################################################### 93.4%\r#################################################################### 94.8%\r#################################################################### 95.7%\r##################################################################### 97.2%\r###################################################################### 98.4%\r####################################################################### 98.6%\r####################################################################### 98.8%\r####################################################################### 99.1%\r####################################################################### 99.3%\r####################################################################### 99.5%\r####################################################################### 99.8%\r####################################################################### 99.9%\r######################################################################## 100.0%\n",
84
+ ">>> Creating ollama user...\n",
85
+ ">>> Adding ollama user to video group...\n",
86
+ ">>> Adding current user to ollama group...\n",
87
+ ">>> Creating ollama systemd service...\n",
88
+ ">>> NVIDIA GPU installed.\n",
89
+ ">>> The Ollama API is now available at 127.0.0.1:11434.\n",
90
+ ">>> Install complete. Run \"ollama\" from the command line.\n"
91
+ ]
92
+ }
93
+ ]
94
+ },
95
+ {
96
+ "cell_type": "code",
97
+ "source": [
98
+ "import subprocess, os, glob, time, requests\n",
99
+ "\n",
100
+ "print(\"════════════════════════════════════════════\")\n",
101
+ "print(\" CELL 2 — GPU Fix + Ollama GPU Start\")\n",
102
+ "print(\"════════════════════════════════════════════\")\n",
103
+ "\n",
104
+ "subprocess.run([\"pkill\", \"-f\", \"ollama\"], capture_output=True)\n",
105
+ "time.sleep(2)\n",
106
+ "print(\"[OK] Killed any stale Ollama process\")\n",
107
+ "\n",
108
+ "print(\"\\n── Discovering NVIDIA/CUDA libs ──\")\n",
109
+ "r = subprocess.run(\"find /usr/lib64-nvidia /usr/local/cuda* -name 'libcuda.so*' 2>/dev/null\",\n",
110
+ " shell=True, capture_output=True, text=True)\n",
111
+ "print(r.stdout.strip() or \"WARNING: No libcuda found!\")\n",
112
+ "\n",
113
+ "ollama_lib = \"/usr/local/lib/ollama\"\n",
114
+ "print(f\"\\n── Ollama lib dir: {ollama_lib} ──\")\n",
115
+ "r2 = subprocess.run(f\"ls {ollama_lib}/\", shell=True, capture_output=True, text=True)\n",
116
+ "print(r2.stdout.strip())\n",
117
+ "\n",
118
+ "nvidia_dir = \"/usr/lib64-nvidia\"\n",
119
+ "cuda_dir = \"/usr/local/cuda/lib64\"\n",
120
+ "cuda128 = \"/usr/local/cuda-12.8/targets/x86_64-linux/lib\"\n",
121
+ "\n",
122
+ "needed = {\n",
123
+ " \"libcuda.so\": [f\"{nvidia_dir}/libcuda.so\", f\"{nvidia_dir}/libcuda.so.1\"],\n",
124
+ " \"libcuda.so.1\": [f\"{nvidia_dir}/libcuda.so.1\"],\n",
125
+ " \"libnvidia-ml.so.1\": [f\"{nvidia_dir}/libnvidia-ml.so.1\"],\n",
126
+ " \"libnvidia-ml.so\": [f\"{nvidia_dir}/libnvidia-ml.so.1\"],\n",
127
+ " \"libcudart.so.12\": [f\"{cuda_dir}/libcudart.so.12\", f\"{cuda128}/libcudart.so.12\"],\n",
128
+ "}\n",
129
+ "\n",
130
+ "print(\"\\n── Creating symlinks ──\")\n",
131
+ "for dst_name, srcs in needed.items():\n",
132
+ " dst = os.path.join(ollama_lib, dst_name)\n",
133
+ " if os.path.lexists(dst):\n",
134
+ " os.remove(dst)\n",
135
+ " for src in srcs:\n",
136
+ " if os.path.exists(src):\n",
137
+ " os.symlink(src, dst)\n",
138
+ " print(f\" ✅ {src} → {dst}\")\n",
139
+ " break\n",
140
+ " else:\n",
141
+ " print(f\" ⚠️ MISSING: {dst_name} (no source found)\")\n",
142
+ "\n",
143
+ "gpu_env = os.environ.copy()\n",
144
+ "gpu_env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
145
+ "gpu_env[\"LD_LIBRARY_PATH\"] = (\n",
146
+ " f\"{nvidia_dir}:{ollama_lib}:/usr/local/cuda/lib64:\"\n",
147
+ " + os.environ.get(\"LD_LIBRARY_PATH\", \"\")\n",
148
+ ")\n",
149
+ "gpu_env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
150
+ "gpu_env[\"OLLAMA_DEBUG\"] = \"INFO\"\n",
151
+ "\n",
152
+ "print(f\"\\n── Starting Ollama daemon ──\")\n",
153
+ "print(f\"LD_LIBRARY_PATH starts with: {gpu_env['LD_LIBRARY_PATH'][:80]}...\")\n",
154
+ "log = open(\"/content/ollama.log\", \"w\")\n",
155
+ "proc = subprocess.Popen([\"ollama\", \"serve\"], env=gpu_env, stdout=log, stderr=log,\n",
156
+ " preexec_fn=os.setpgrp)\n",
157
+ "print(f\"[OK] Ollama PID: {proc.pid}\")\n",
158
+ "\n",
159
+ "for i in range(25):\n",
160
+ " try:\n",
161
+ " requests.get(\"http://localhost:11434/api/tags\", timeout=2)\n",
162
+ " print(f\"[OK] Ollama API responsive after {i+1}s\")\n",
163
+ " break\n",
164
+ " except:\n",
165
+ " time.sleep(1)\n",
166
+ "\n",
167
+ "time.sleep(2)\n",
168
+ "\n",
169
+ "print(\"\\n── Ollama startup log (GPU detection lines) ──\")\n",
170
+ "log_txt = open(\"/content/ollama.log\").read()\n",
171
+ "for line in log_txt.splitlines():\n",
172
+ " kws = [\"gpu\", \"cuda\", \"vram\", \"nvidia\", \"error\", \"warn\", \"discovered\", \"total_vram\"]\n",
173
+ " if any(k in line.lower() for k in kws):\n",
174
+ " print(line)\n",
175
+ "\n",
176
+ "print(\"\\n── Checking model ──\")\n",
177
+ "tags = requests.get(\"http://localhost:11434/api/tags\").json()\n",
178
+ "models = [m[\"name\"] for m in tags.get(\"models\", [])]\n",
179
+ "print(f\"Installed: {models}\")\n",
180
+ "if not any(\"llama3.1:8b\" in m for m in models):\n",
181
+ " print(\"Pulling llama3.1:8b ...\")\n",
182
+ " subprocess.run([\"ollama\", \"pull\", \"llama3.1:8b\"], check=True)\n",
183
+ " print(\"[OK] Model pulled\")\n",
184
+ "\n",
185
+ "print(\"\\n── Warming up model (60-90s first load) ──\")\n",
186
+ "t0 = time.time()\n",
187
+ "r3 = requests.post(\"http://localhost:11434/api/chat\", json={\n",
188
+ " \"model\": \"llama3.1:8b\",\n",
189
+ " \"messages\": [{\"role\": \"user\", \"content\": \"Reply with the word READY only.\"}],\n",
190
+ " \"stream\": False,\n",
191
+ " \"options\": {\"temperature\": 0, \"num_predict\": 5}\n",
192
+ "}, timeout=(10, 300))\n",
193
+ "elapsed = time.time() - t0\n",
194
+ "d = r3.json()\n",
195
+ "load_s = d.get(\"load_duration\", 0) / 1e9\n",
196
+ "eval_s = d.get(\"eval_duration\", 0) / 1e9\n",
197
+ "reply = d[\"message\"][\"content\"].strip()\n",
198
+ "print(f\"Reply : {reply}\")\n",
199
+ "print(f\"Total : {elapsed:.1f}s | Load: {load_s:.1f}s | Eval: {eval_s:.1f}s\")\n",
200
+ "if eval_s > 0:\n",
201
+ " tps = d.get(\"eval_count\", 0) / eval_s\n",
202
+ " print(f\"Speed : {tps:.1f} tok/s {'← GPU (>30 t/s)' if tps > 30 else '← CPU (<10 t/s typical)'}\")\n",
203
+ "\n",
204
+ "print(\"\\n── ollama ps ──\")\n",
205
+ "ps = subprocess.run([\"ollama\", \"ps\"], capture_output=True, text=True)\n",
206
+ "print(ps.stdout)\n",
207
+ "\n",
208
+ "print(\"── nvidia-smi ──\")\n",
209
+ "smi = subprocess.run(\n",
210
+ " [\"nvidia-smi\", \"--query-gpu=name,memory.used,memory.total\", \"--format=csv,noheader\"],\n",
211
+ " capture_output=True, text=True)\n",
212
+ "print(smi.stdout)\n",
213
+ "\n",
214
+ "used_mib = int(smi.stdout.split(\",\")[1].strip().split()[0]) if smi.stdout else 0\n",
215
+ "if used_mib > 4000:\n",
216
+ " print(f\"✅ GPU confirmed — {used_mib} MiB used\")\n",
217
+ "elif \"GPU\" in ps.stdout:\n",
218
+ " print(f\"✅ ollama ps shows GPU\")\n",
219
+ "else:\n",
220
+ " print(f\"⚠️ Still on CPU ({used_mib} MiB). Check log lines above for CUDA errors.\")\n",
221
+ "\n",
222
+ "print(\"\\n[DONE] Cell 2 complete\")"
223
+ ],
224
+ "metadata": {
225
+ "colab": {
226
+ "base_uri": "https://localhost:8080/"
227
+ },
228
+ "id": "nK5UZG3F0k7Q",
229
+ "outputId": "56840451-6c1d-47c5-f024-320f1c777462"
230
+ },
231
+ "execution_count": null,
232
+ "outputs": [
233
+ {
234
+ "output_type": "stream",
235
+ "name": "stdout",
236
+ "text": [
237
+ "════════════════════════════════════════════\n",
238
+ " CELL 2 — GPU Fix + Ollama GPU Start\n",
239
+ "════════════════════════════════════════════\n",
240
+ "[OK] Killed any stale Ollama process\n",
241
+ "\n",
242
+ "── Discovering NVIDIA/CUDA libs ──\n",
243
+ "/usr/lib64-nvidia/libcuda.so.1\n",
244
+ "/usr/lib64-nvidia/libcuda.so.580.82.07\n",
245
+ "/usr/lib64-nvidia/libcuda.so\n",
246
+ "/usr/local/cuda-12.8/compat/libcuda.so.570.124.06\n",
247
+ "/usr/local/cuda-12.8/compat/libcuda.so.1\n",
248
+ "/usr/local/cuda-12.8/compat/libcuda.so\n",
249
+ "/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs/libcuda.so\n",
250
+ "\n",
251
+ "── Ollama lib dir: /usr/local/lib/ollama ──\n",
252
+ "cuda_v12\n",
253
+ "cuda_v13\n",
254
+ "include\n",
255
+ "libggml-base.so\n",
256
+ "libggml-base.so.0\n",
257
+ "libggml-base.so.0.0.0\n",
258
+ "libggml-cpu-alderlake.so\n",
259
+ "libggml-cpu-haswell.so\n",
260
+ "libggml-cpu-icelake.so\n",
261
+ "libggml-cpu-sandybridge.so\n",
262
+ "libggml-cpu-skylakex.so\n",
263
+ "libggml-cpu-sse42.so\n",
264
+ "libggml-cpu-x64.so\n",
265
+ "mlx_cuda_v13\n",
266
+ "vulkan\n",
267
+ "\n",
268
+ "── Creating symlinks ──\n",
269
+ " ✅ /usr/lib64-nvidia/libcuda.so → /usr/local/lib/ollama/libcuda.so\n",
270
+ " ✅ /usr/lib64-nvidia/libcuda.so.1 → /usr/local/lib/ollama/libcuda.so.1\n",
271
+ " ✅ /usr/lib64-nvidia/libnvidia-ml.so.1 → /usr/local/lib/ollama/libnvidia-ml.so.1\n",
272
+ " ✅ /usr/lib64-nvidia/libnvidia-ml.so.1 → /usr/local/lib/ollama/libnvidia-ml.so\n",
273
+ " ✅ /usr/local/cuda/lib64/libcudart.so.12 → /usr/local/lib/ollama/libcudart.so.12\n",
274
+ "\n",
275
+ "── Starting Ollama daemon ──\n",
276
+ "LD_LIBRARY_PATH starts with: /usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64:/usr/lib64-nvidia...\n",
277
+ "[OK] Ollama PID: 15538\n",
278
+ "[OK] Ollama API responsive after 2s\n",
279
+ "\n",
280
+ "── Ollama startup log (GPU detection lines) ──\n",
281
+ "time=2026-04-25T22:47:53.770Z level=INFO source=routes.go:1752 msg=\"server config\" env=\"map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_DEBUG_LOG_REQUESTS:false OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_KEEP_ALIVE:2h0m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]\"\n",
282
+ "time=2026-04-25T22:47:53.772Z level=INFO source=runner.go:67 msg=\"discovering available GPUs...\"\n",
283
+ "time=2026-04-25T22:47:54.618Z level=INFO source=types.go:42 msg=\"inference compute\" id=GPU-0779169f-d299-340e-42ee-14fb3ed34faf filter_id=\"\" library=CUDA compute=7.5 name=CUDA0 description=\"Tesla T4\" libdirs=ollama,cuda_v13 driver=13.0 pci_id=0000:00:04.0 type=discrete total=\"15.0 GiB\" available=\"14.6 GiB\"\n",
284
+ "time=2026-04-25T22:47:54.618Z level=INFO source=routes.go:1860 msg=\"vram-based default context\" total_vram=\"15.0 GiB\" default_num_ctx=4096\n",
285
+ "\n",
286
+ "── Checking model ──\n",
287
+ "Installed: []\n",
288
+ "Pulling llama3.1:8b ...\n",
289
+ "[OK] Model pulled\n",
290
+ "\n",
291
+ "── Warming up model (60-90s first load) ──\n",
292
+ "Reply : READY\n",
293
+ "Total : 96.8s | Load: 96.7s | Eval: 0.0s\n",
294
+ "Speed : 58.5 tok/s ← GPU (>30 t/s)\n",
295
+ "\n",
296
+ "── ollama ps ──\n",
297
+ "NAME ID SIZE PROCESSOR CONTEXT UNTIL \n",
298
+ "llama3.1:8b 46e0c10c039e 5.5 GB 100% GPU 4096 2 hours from now \n",
299
+ "\n",
300
+ "── nvidia-smi ──\n",
301
+ "Tesla T4, 5367 MiB, 15360 MiB\n",
302
+ "\n",
303
+ "✅ GPU confirmed — 5367 MiB used\n",
304
+ "\n",
305
+ "[DONE] Cell 2 complete\n"
306
+ ]
307
+ }
308
+ ]
309
+ },
310
+ {
311
+ "cell_type": "code",
312
+ "source": [
313
+ "%%bash\n",
314
+ "set -euo pipefail\n",
315
+ "echo '════════════════════════════════════════════'\n",
316
+ "echo ' CELL 3 — Clone + Patch'\n",
317
+ "echo '════════════════════════════════════════════'\n",
318
+ "cd /content\n",
319
+ "rm -rf medintake-ai\n",
320
+ "git clone https://github.com/priyansh-saxena1/medintake-ai.git\n",
321
+ "cd medintake-ai\n",
322
+ "echo \"[OK] commit: $(git rev-parse --short HEAD)\"\n",
323
+ "pip install -r requirements.txt 2>&1 | tail -4\n",
324
+ "echo '[OK] pip done'\n",
325
+ "\n",
326
+ "python3 << 'PYEOF'\n",
327
+ "import pathlib\n",
328
+ "p = pathlib.Path('/content/medintake-ai/app/llm.py')\n",
329
+ "src = p.read_text()\n",
330
+ "changed = False\n",
331
+ "\n",
332
+ "patches = [\n",
333
+ " (\"PATCH 1 timeout\",\n",
334
+ " 'requests.post(self.api_url, json=payload, timeout=60)',\n",
335
+ " 'requests.post(self.api_url, json=payload, timeout=(10, 300))'),\n",
336
+ " (\"PATCH 2 OLLAMA_HOST\",\n",
337
+ " 'self.api_url = \"http://localhost:11434/api/chat\"',\n",
338
+ " 'self.api_url = os.environ.get(\"OLLAMA_HOST\",\"http://localhost:11434\") + \"/api/chat\"'),\n",
339
+ " (\"PATCH 3 MODEL_NAME default\",\n",
340
+ " 'self.model_name = os.environ.get(\"MODEL_NAME\", \"qwen2.5:0.5b\")',\n",
341
+ " 'self.model_name = os.environ.get(\"MODEL_NAME\", \"llama3.1:8b\")'),\n",
342
+ " (\"PATCH 4 response key\",\n",
343
+ " 'raw = data.get(\"response\", \"\")',\n",
344
+ " 'raw = data.get(\"message\", {}).get(\"content\", \"\")'),\n",
345
+ "]\n",
346
+ "\n",
347
+ "for name, old, new in patches:\n",
348
+ " if old in src:\n",
349
+ " src = src.replace(old, new, 1)\n",
350
+ " changed = True\n",
351
+ " print(f\"[APPLIED] {name}\")\n",
352
+ " elif new in src:\n",
353
+ " print(f\"[SKIP] {name}\")\n",
354
+ " else:\n",
355
+ " print(f\"[WARN] {name} target not found\")\n",
356
+ "\n",
357
+ "if changed:\n",
358
+ " p.write_text(src)\n",
359
+ " print(\"[OK] llm.py saved\")\n",
360
+ "\n",
361
+ "import py_compile\n",
362
+ "py_compile.compile(str(p), doraise=True)\n",
363
+ "print(\"[OK] syntax valid\")\n",
364
+ "PYEOF\n",
365
+ "\n",
366
+ "echo ''\n",
367
+ "echo '── Tests (MockLLM) ──'\n",
368
+ "cd /content/medintake-ai\n",
369
+ "MOCK_LLM=true python3 -m pytest tests/ -v --tb=short 2>&1\n",
370
+ "echo '[DONE] Cell 3'\n"
371
+ ],
372
+ "metadata": {
373
+ "colab": {
374
+ "base_uri": "https://localhost:8080/"
375
+ },
376
+ "id": "jXLQSpFj0n8a",
377
+ "outputId": "10292259-f6c0-4b07-a1a5-e3d9c474afc9"
378
+ },
379
+ "execution_count": null,
380
+ "outputs": [
381
+ {
382
+ "output_type": "stream",
383
+ "name": "stdout",
384
+ "text": [
385
+ "════════════════════════════════════════════\n",
386
+ " CELL 3 — Clone + Patch\n",
387
+ "════════════════════════════════════════════\n",
388
+ "[OK] commit: eb1b955\n",
389
+ "Requirement already satisfied: orjson>=3.11.5 in /usr/local/lib/python3.12/dist-packages (from langgraph-sdk<0.4.0,>=0.3.0->langgraph->-r requirements.txt (line 1)) (3.11.8)\n",
390
+ "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.12/dist-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (3.1.1)\n",
391
+ "Requirement already satisfied: requests-toolbelt>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from langsmith<1.0.0,>=0.3.45->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (1.0.0)\n",
392
+ "Requirement already satisfied: zstandard>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from langsmith<1.0.0,>=0.3.45->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (0.25.0)\n",
393
+ "[OK] pip done\n",
394
+ "[APPLIED] PATCH 1 timeout\n",
395
+ "[APPLIED] PATCH 2 OLLAMA_HOST\n",
396
+ "[APPLIED] PATCH 3 MODEL_NAME default\n",
397
+ "[SKIP] PATCH 4 response key\n",
398
+ "[OK] llm.py saved\n",
399
+ "[OK] syntax valid\n",
400
+ "\n",
401
+ "── Tests (MockLLM) ──\n",
402
+ "============================= test session starts ==============================\n",
403
+ "platform linux -- Python 3.12.13, pytest-8.4.2, pluggy-1.6.0 -- /usr/bin/python3\n",
404
+ "cachedir: .pytest_cache\n",
405
+ "rootdir: /content/medintake-ai\n",
406
+ "configfile: pytest.ini\n",
407
+ "plugins: asyncio-1.3.0, langsmith-0.7.30, typeguard-4.5.1, anyio-4.13.0\n",
408
+ "asyncio: mode=Mode.AUTO, debug=False, asyncio_default_fixture_loop_scope=function, asyncio_default_test_loop_scope=function\n",
409
+ "collecting ... collected 11 items\n",
410
+ "\n",
411
+ "tests/test_e2e.py::test_mock_llm_combined_call_basic_extraction PASSED [ 9%]\n",
412
+ "tests/test_e2e.py::test_mock_llm_emergency_detection PASSED [ 18%]\n",
413
+ "tests/test_e2e.py::test_mock_llm_does_not_repeat_filled_questions PASSED [ 27%]\n",
414
+ "tests/test_e2e.py::test_mock_llm_severity_extraction PASSED [ 36%]\n",
415
+ "tests/test_e2e.py::test_mock_llm_ros_extraction PASSED [ 45%]\n",
416
+ "tests/test_e2e.py::test_mock_llm_speed PASSED [ 54%]\n",
417
+ "tests/test_e2e.py::test_combined_output_schema_round_trip PASSED [ 63%]\n",
418
+ "tests/test_e2e.py::test_health_endpoint PASSED [ 72%]\n",
419
+ "tests/test_e2e.py::test_emergency_triage_node PASSED [ 81%]\n",
420
+ "tests/test_e2e.py::test_full_intake_multi_turn_extraction PASSED [ 90%]\n",
421
+ "tests/test_e2e.py::test_api_response_time PASSED [100%]\n",
422
+ "\n",
423
+ "============================== 11 passed in 0.71s ==============================\n",
424
+ "[DONE] Cell 3\n"
425
+ ]
426
+ },
427
+ {
428
+ "output_type": "stream",
429
+ "name": "stderr",
430
+ "text": [
431
+ "Cloning into 'medintake-ai'...\n"
432
+ ]
433
+ }
434
+ ]
435
+ },
436
+ {
437
+ "cell_type": "code",
438
+ "source": [
439
+ "# Auto-heal Ollama before each turn\n",
440
+ "import requests, subprocess, os, time\n",
441
+ "def ensure_ollama():\n",
442
+ " try:\n",
443
+ " requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
444
+ " except: pass\n",
445
+ " print(\"⚠️ Ollama dead — restarting...\")\n",
446
+ " env = os.environ.copy()\n",
447
+ " env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
448
+ " env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
449
+ " env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
450
+ " subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
451
+ " stdout=open(\"/content/ollama.log\",\"a\"),\n",
452
+ " stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
453
+ " for _ in range(30):\n",
454
+ " try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
455
+ " except: time.sleep(1)\n",
456
+ "\n",
457
+ "ensure_ollama()\n",
458
+ "\n",
459
+ "import sys, os, json, time, subprocess, requests\n",
460
+ "sys.path.insert(0, \"/content/medintake-ai\")\n",
461
+ "os.environ[\"MOCK_LLM\"] = \"false\"\n",
462
+ "os.environ[\"MODEL_NAME\"] = \"llama3.1:8b\"\n",
463
+ "os.environ[\"OLLAMA_HOST\"] = \"http://localhost:11434\"\n",
464
+ "\n",
465
+ "print(\"════════════════════════════════════════════\")\n",
466
+ "print(\" CELL 4 — Deep LLM Dataflow Debug\")\n",
467
+ "print(\"════════════════════════════════════════════\")\n",
468
+ "\n",
469
+ "# A: Hardware\n",
470
+ "print(\"\\n── A: Hardware status ──\")\n",
471
+ "ps = subprocess.run([\"ollama\",\"ps\"], capture_output=True, text=True)\n",
472
+ "smi = subprocess.run([\"nvidia-smi\",\"--query-gpu=name,memory.used,memory.total\",\n",
473
+ " \"--format=csv,noheader\"], capture_output=True, text=True)\n",
474
+ "print(\"ollama ps :\", ps.stdout.strip())\n",
475
+ "print(\"nvidia-smi:\", smi.stdout.strip())\n",
476
+ "\n",
477
+ "# B: App env + object init\n",
478
+ "print(\"\\n── B: App LLM object ──\")\n",
479
+ "from app.llm import OllamaLLM, CombinedOutput, COMBINED_SYSTEM_PROMPT\n",
480
+ "llm = OllamaLLM()\n",
481
+ "print(f\"model_name : {llm.model_name}\")\n",
482
+ "print(f\"api_url : {llm.api_url}\")\n",
483
+ "\n",
484
+ "# C: Prompt construction\n",
485
+ "print(\"\\n── C: System prompt ──\")\n",
486
+ "print(COMBINED_SYSTEM_PROMPT)\n",
487
+ "\n",
488
+ "transcript = \"Patient: I have chest pain\"\n",
489
+ "currentjson = CombinedOutput().model_dump_json()\n",
490
+ "prompt = (\n",
491
+ " f\"CURRENT CLINICAL STATE:\\n{currentjson}\\n\\n\"\n",
492
+ " f\"FULL CONVERSATION TRANSCRIPT:\\n{transcript}\\n\\n\"\n",
493
+ " \"Instructions: Extract all new clinical facts, merge into state, \"\n",
494
+ " \"generate ONE empathetic follow-up question. Return ONLY JSON.\"\n",
495
+ ")\n",
496
+ "print(\"\\n── D: User prompt ──\")\n",
497
+ "print(prompt)\n",
498
+ "\n",
499
+ "# D: Raw HTTP\n",
500
+ "print(\"\\n── E: Raw Ollama HTTP call ──\")\n",
501
+ "payload = {\n",
502
+ " \"model\": llm.model_name,\n",
503
+ " \"messages\": [\n",
504
+ " {\"role\":\"system\",\"content\": COMBINED_SYSTEM_PROMPT},\n",
505
+ " {\"role\":\"user\", \"content\": prompt}\n",
506
+ " ],\n",
507
+ " \"format\": \"json\",\n",
508
+ " \"stream\": False,\n",
509
+ " \"options\": {\"temperature\": 0.0, \"num_predict\": 300}\n",
510
+ "}\n",
511
+ "\n",
512
+ "t0 = time.time()\n",
513
+ "resp = requests.post(llm.api_url, json=payload, timeout=(10,300))\n",
514
+ "elapsed = time.time() - t0\n",
515
+ "full = resp.json()\n",
516
+ "\n",
517
+ "load_s = full.get(\"load_duration\",0) / 1e9\n",
518
+ "eval_s = full.get(\"eval_duration\",1) / 1e9\n",
519
+ "tps = full.get(\"eval_count\",0) / eval_s\n",
520
+ "\n",
521
+ "print(f\"HTTP status : {resp.status_code}\")\n",
522
+ "print(f\"Total time : {elapsed:.2f}s\")\n",
523
+ "print(f\"Load duration : {load_s:.2f}s {'GPU (fast)' if load_s < 1 else 'CPU (slow)'}\")\n",
524
+ "print(f\"Tokens/sec : {tps:.1f} {'GPU (>30)' if tps > 30 else 'CPU (<15)'}\")\n",
525
+ "raw = full.get(\"message\",{}).get(\"content\",\"\").strip()\n",
526
+ "print(f\"\\nRaw content:\\n{raw}\")\n",
527
+ "\n",
528
+ "# E: Parsing\n",
529
+ "print(\"\\n── F: JSON parse ──\")\n",
530
+ "try:\n",
531
+ " parsed = json.loads(raw)\n",
532
+ " print(\"json.loads() OK\")\n",
533
+ " REQUIRED = {\"chief_complaint\",\"onset\",\"location\",\"duration\",\n",
534
+ " \"character\",\"severity\",\"aggravating\",\"relieving\",\"ros\",\"reply\"}\n",
535
+ " missing_k = REQUIRED - set(parsed.keys())\n",
536
+ " extra_k = set(parsed.keys()) - REQUIRED\n",
537
+ " print(f\"Missing keys : {missing_k or 'none'}\")\n",
538
+ " print(f\"Extra keys : {extra_k or 'none'}\")\n",
539
+ " print(json.dumps(parsed, indent=2))\n",
540
+ "except Exception as e:\n",
541
+ " print(f\"FAILED: {e}\")\n",
542
+ "\n",
543
+ "# F: Full pipeline\n",
544
+ "print(\"\\n── G: Full app pipeline ──\")\n",
545
+ "result = llm.combined_call(transcript, currentjson)\n",
546
+ "print(\"CombinedOutput:\")\n",
547
+ "print(json.dumps(result.model_dump(), indent=2))\n",
548
+ "\n",
549
+ "from app.graph import compute_stage, missing_from\n",
550
+ "stage = compute_stage(result)\n",
551
+ "missing = missing_from(result)\n",
552
+ "print(f\"\\nStage : {stage}\")\n",
553
+ "print(f\"Missing : {missing}\")\n",
554
+ "print(f\"Reply : '{result.reply}'\")\n",
555
+ "\n",
556
+ "FALLBACK = {\"\", \"Could you tell me more?\", \"Could you please repeat that?\"}\n",
557
+ "if result.reply in FALLBACK:\n",
558
+ " print(\"\\nWARNING: FALLBACK REPLY — LLM output failed silently!\")\n",
559
+ " print(\"Check logs below:\")\n",
560
+ " print(open(\"/content/ollama.log\").read()[-2000:])\n",
561
+ "else:\n",
562
+ " print(\"\\nOK: Real LLM reply returned\")\n",
563
+ "\n",
564
+ "print(\"[DONE] Cell 4\")\n"
565
+ ],
566
+ "metadata": {
567
+ "colab": {
568
+ "base_uri": "https://localhost:8080/"
569
+ },
570
+ "id": "6oX-ZDYB0pJ3",
571
+ "outputId": "66521e8e-e829-44ff-e4dd-d836f600ba0e"
572
+ },
573
+ "execution_count": null,
574
+ "outputs": [
575
+ {
576
+ "output_type": "stream",
577
+ "name": "stdout",
578
+ "text": [
579
+ "════════════════════════════════════════════\n",
580
+ " CELL 4 — Deep LLM Dataflow Debug\n",
581
+ "════════════════════════════════════════════\n",
582
+ "\n",
583
+ "── A: Hardware status ──\n",
584
+ "ollama ps : NAME ID SIZE PROCESSOR CONTEXT UNTIL \n",
585
+ "llama3.1:8b 46e0c10c039e 5.5 GB 100% GPU 4096 2 hours from now\n",
586
+ "nvidia-smi: Tesla T4, 5367 MiB, 15360 MiB\n",
587
+ "\n",
588
+ "── B: App LLM object ──\n",
589
+ "model_name : llama3.1:8b\n",
590
+ "api_url : http://localhost:11434/api/chat\n",
591
+ "\n",
592
+ "── C: System prompt ──\n",
593
+ "You are a clinical intake assistant AI. You have two jobs per turn:\n",
594
+ "\n",
595
+ "JOB 1 (EXTRACT): Read the FULL conversation and update the clinical JSON state with any new information the patient provided. Only extract facts explicitly stated.\n",
596
+ "\n",
597
+ "JOB 2 (RESPOND): Based on what is STILL MISSING from the clinical state, ask the patient ONE natural, empathetic question. Do NOT ask about things already filled in.\n",
598
+ "\n",
599
+ "CRITICAL RULES:\n",
600
+ "- Output ONLY valid JSON, nothing else.\n",
601
+ "- Do NOT diagnose or give medical advice.\n",
602
+ "- Do NOT ask more than one question.\n",
603
+ "- If all fields are complete, set reply to \"Thank you — I have everything I need.\"\n",
604
+ "\n",
605
+ "OUTPUT FORMAT (strictly follow this, no extra text):\n",
606
+ "{\n",
607
+ " \"chief_complaint\": \"...\",\n",
608
+ " \"onset\": \"...\",\n",
609
+ " \"location\": \"...\",\n",
610
+ " \"duration\": \"...\",\n",
611
+ " \"character\": \"...\",\n",
612
+ " \"severity\": \"...\",\n",
613
+ " \"aggravating\": \"...\",\n",
614
+ " \"relieving\": \"...\",\n",
615
+ " \"ros\": {\"system_name\": [\"finding1\", \"finding2\"]},\n",
616
+ " \"reply\": \"The single question to ask the patient next\"\n",
617
+ "}\n",
618
+ "\n",
619
+ "Use null for any field not yet known. Keep existing values if the patient didn't add new info.\n",
620
+ "\n",
621
+ "── D: User prompt ──\n",
622
+ "CURRENT CLINICAL STATE:\n",
623
+ "{\"chief_complaint\":null,\"onset\":null,\"location\":null,\"duration\":null,\"character\":null,\"severity\":null,\"aggravating\":null,\"relieving\":null,\"ros\":{},\"emergency\":false,\"reply\":\"\"}\n",
624
+ "\n",
625
+ "FULL CONVERSATION TRANSCRIPT:\n",
626
+ "Patient: I have chest pain\n",
627
+ "\n",
628
+ "Instructions: Extract all new clinical facts, merge into state, generate ONE empathetic follow-up question. Return ONLY JSON.\n",
629
+ "\n",
630
+ "── E: Raw Ollama HTTP call ──\n",
631
+ "HTTP status : 200\n",
632
+ "Total time : 5.61s\n",
633
+ "Load duration : 0.20s GPU (fast)\n",
634
+ "Tokens/sec : 45.2 GPU (>30)\n",
635
+ "\n",
636
+ "Raw content:\n",
637
+ "{\n",
638
+ " \"chief_complaint\": \"chest pain\",\n",
639
+ " \"onset\": null,\n",
640
+ " \"location\": null,\n",
641
+ " \"duration\": null,\n",
642
+ " \"character\": null,\n",
643
+ " \"severity\": null,\n",
644
+ " \"aggravating\": null,\n",
645
+ " \"relieving\": null,\n",
646
+ " \"ros\": {},\n",
647
+ " \"emergency\": false,\n",
648
+ " \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
649
+ "}\n",
650
+ "\n",
651
+ "── F: JSON parse ──\n",
652
+ "json.loads() OK\n",
653
+ "Missing keys : none\n",
654
+ "Extra keys : {'emergency'}\n",
655
+ "{\n",
656
+ " \"chief_complaint\": \"chest pain\",\n",
657
+ " \"onset\": null,\n",
658
+ " \"location\": null,\n",
659
+ " \"duration\": null,\n",
660
+ " \"character\": null,\n",
661
+ " \"severity\": null,\n",
662
+ " \"aggravating\": null,\n",
663
+ " \"relieving\": null,\n",
664
+ " \"ros\": {},\n",
665
+ " \"emergency\": false,\n",
666
+ " \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
667
+ "}\n",
668
+ "\n",
669
+ "── G: Full app pipeline ──\n",
670
+ "[Ollama] Starting inference for model 'llama3.1:8b'...\n",
671
+ "[Ollama] Inference completed in 5.23s total.\n",
672
+ "CombinedOutput:\n",
673
+ "{\n",
674
+ " \"chief_complaint\": \"chest pain\",\n",
675
+ " \"onset\": null,\n",
676
+ " \"location\": null,\n",
677
+ " \"duration\": null,\n",
678
+ " \"character\": null,\n",
679
+ " \"severity\": null,\n",
680
+ " \"aggravating\": null,\n",
681
+ " \"relieving\": null,\n",
682
+ " \"ros\": {},\n",
683
+ " \"emergency\": false,\n",
684
+ " \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
685
+ "}\n",
686
+ "\n",
687
+ "Stage : hpi\n",
688
+ "Missing : ['HPI:onset', 'HPI:location', 'HPI:duration', 'HPI:character', 'HPI:severity', 'HPI:aggravating', 'HPI:relieving', 'ROS (3 more systems needed)']\n",
689
+ "Reply : 'Can you tell me more about when this chest pain started?'\n",
690
+ "\n",
691
+ "OK: Real LLM reply returned\n",
692
+ "[DONE] Cell 4\n"
693
+ ]
694
+ }
695
+ ]
696
+ },
697
+ {
698
+ "cell_type": "code",
699
+ "source": [
700
+ "# Auto-heal Ollama before each turn\n",
701
+ "import requests, subprocess, os, time\n",
702
+ "def ensure_ollama():\n",
703
+ " try:\n",
704
+ " requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
705
+ " except: pass\n",
706
+ " print(\"⚠️ Ollama dead — restarting...\")\n",
707
+ " env = os.environ.copy()\n",
708
+ " env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
709
+ " env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
710
+ " env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
711
+ " subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
712
+ " stdout=open(\"/content/ollama.log\",\"a\"),\n",
713
+ " stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
714
+ " for _ in range(30):\n",
715
+ " try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
716
+ " except: time.sleep(1)\n",
717
+ "\n",
718
+ "ensure_ollama()\n",
719
+ "\n",
720
+ "import subprocess, time, requests, os\n",
721
+ "\n",
722
+ "subprocess.run([\"pkill\", \"-f\", \"uvicorn\"], capture_output=True)\n",
723
+ "time.sleep(2)\n",
724
+ "\n",
725
+ "env = os.environ.copy()\n",
726
+ "env[\"MOCK_LLM\"] = \"false\"\n",
727
+ "env[\"MODEL_NAME\"] = \"llama3.1:8b\"\n",
728
+ "env[\"OLLAMA_HOST\"] = \"http://localhost:11434\"\n",
729
+ "\n",
730
+ "log = open(\"/content/api.log\", \"w\")\n",
731
+ "proc = subprocess.Popen(\n",
732
+ " [\"python\", \"-m\", \"uvicorn\", \"app.main:app\",\n",
733
+ " \"--host\", \"0.0.0.0\", \"--port\", \"7860\", \"--log-level\", \"info\"],\n",
734
+ " cwd=\"/content/medintake-ai\",\n",
735
+ " env=env, stdout=log, stderr=log,\n",
736
+ " preexec_fn=os.setpgrp\n",
737
+ ")\n",
738
+ "print(f\"uvicorn PID: {proc.pid}\")\n",
739
+ "\n",
740
+ "for i in range(20):\n",
741
+ " try:\n",
742
+ " r = requests.get(\"http://localhost:7860/health\", timeout=2)\n",
743
+ " if r.status_code == 200:\n",
744
+ " d = r.json()\n",
745
+ " print(f\"✅ FastAPI ready after {i+1}s\")\n",
746
+ " print(f\" mock_mode = {d.get('mock_mode')} ← must be False\")\n",
747
+ " break\n",
748
+ " except: pass\n",
749
+ " print(f\" ...{i+1}s\")\n",
750
+ " time.sleep(1)\n",
751
+ "else:\n",
752
+ " print(\"❌ Failed — dumping api.log:\")\n",
753
+ " print(open(\"/content/api.log\").read()[-2000:])"
754
+ ],
755
+ "metadata": {
756
+ "colab": {
757
+ "base_uri": "https://localhost:8080/"
758
+ },
759
+ "id": "qhgapTbx0qRi",
760
+ "outputId": "886d3a8c-928e-4051-926a-878534769de1"
761
+ },
762
+ "execution_count": null,
763
+ "outputs": [
764
+ {
765
+ "output_type": "stream",
766
+ "name": "stdout",
767
+ "text": [
768
+ "uvicorn PID: 19612\n",
769
+ " ...1s\n",
770
+ " ...2s\n",
771
+ "✅ FastAPI ready after 3s\n",
772
+ " mock_mode = False ← must be False\n"
773
+ ]
774
+ }
775
+ ]
776
+ },
777
+ {
778
+ "cell_type": "code",
779
+ "source": [
780
+ "# Auto-heal Ollama before each turn\n",
781
+ "import requests, subprocess, os, time\n",
782
+ "def ensure_ollama():\n",
783
+ " try:\n",
784
+ " requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
785
+ " except: pass\n",
786
+ " print(\"⚠️ Ollama dead — restarting...\")\n",
787
+ " env = os.environ.copy()\n",
788
+ " env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
789
+ " env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
790
+ " env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
791
+ " subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
792
+ " stdout=open(\"/content/ollama.log\",\"a\"),\n",
793
+ " stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
794
+ " for _ in range(30):\n",
795
+ " try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
796
+ " except: time.sleep(1)\n",
797
+ "\n",
798
+ "ensure_ollama()\n",
799
+ "\n",
800
+ "import subprocess, time, requests\n",
801
+ "\n",
802
+ "PUBLIC_IP = requests.get(\"https://ipv4.icanhazip.com\", timeout=5).text.strip()\n",
803
+ "print(f\"Tunnel password: {PUBLIC_IP}\")\n",
804
+ "print(\"Starting tunnel...\")\n",
805
+ "\n",
806
+ "# Start lt as background process, capture output to file\n",
807
+ "tunnel_log = open(\"/content/tunnel.log\", \"w\")\n",
808
+ "proc = subprocess.Popen(\n",
809
+ " [\"lt\", \"--port\", \"7860\"],\n",
810
+ " stdout=tunnel_log, stderr=tunnel_log,\n",
811
+ " preexec_fn=__import__(\"os\").setpgrp\n",
812
+ ")\n",
813
+ "print(f\"Tunnel PID: {proc.pid}\")\n",
814
+ "\n",
815
+ "# Wait for URL to appear in log\n",
816
+ "for i in range(15):\n",
817
+ " time.sleep(1)\n",
818
+ " try:\n",
819
+ " txt = open(\"/content/tunnel.log\").read()\n",
820
+ " if \"loca.lt\" in txt or \"https://\" in txt:\n",
821
+ " for line in txt.splitlines():\n",
822
+ " if \"https://\" in line:\n",
823
+ " print(f\"\\n🌐 PUBLIC URL: {line.strip()}\")\n",
824
+ " break\n",
825
+ " except: pass\n",
826
+ " print(f\" ...waiting for URL {i+1}s\")\n",
827
+ "else:\n",
828
+ " print(\"⚠️ URL not found yet — run: !cat /content/tunnel.log\")\n",
829
+ "\n",
830
+ "print(\"\\n✅ Cell 5B done — proceed to Cell 6\")"
831
+ ],
832
+ "metadata": {
833
+ "colab": {
834
+ "base_uri": "https://localhost:8080/"
835
+ },
836
+ "id": "ub55lE6d3LMA",
837
+ "outputId": "9aac6f5a-e022-493b-868c-c5fd5e425297"
838
+ },
839
+ "execution_count": null,
840
+ "outputs": [
841
+ {
842
+ "output_type": "stream",
843
+ "name": "stdout",
844
+ "text": [
845
+ "Tunnel password: 34.87.70.249\n",
846
+ "Starting tunnel...\n",
847
+ "Tunnel PID: 19630\n",
848
+ " ...waiting for URL 1s\n",
849
+ "\n",
850
+ "🌐 PUBLIC URL: your url is: https://proud-bears-drum.loca.lt\n",
851
+ "\n",
852
+ "✅ Cell 5B done — proceed to Cell 6\n"
853
+ ]
854
+ }
855
+ ]
856
+ },
857
+ {
858
+ "cell_type": "code",
859
+ "source": [
860
+ "\n",
861
+ "# Auto-heal Ollama before each turn\n",
862
+ "import requests, subprocess, os, time\n",
863
+ "def ensure_ollama():\n",
864
+ " try:\n",
865
+ " requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
866
+ " except: pass\n",
867
+ " print(\"⚠️ Ollama dead — restarting...\")\n",
868
+ " env = os.environ.copy()\n",
869
+ " env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
870
+ " env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
871
+ " env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
872
+ " subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
873
+ " stdout=open(\"/content/ollama.log\",\"a\"),\n",
874
+ " stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
875
+ " for _ in range(30):\n",
876
+ " try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
877
+ " except: time.sleep(1)\n",
878
+ "\n",
879
+ "ensure_ollama()\n",
880
+ "\n",
881
+ "import requests, json, time, subprocess\n",
882
+ "\n",
883
+ "SESSION_ID = \"debug_session_001\" # keep same across all turns\n",
884
+ "USER_MSG = \"I have chest pain\" # ← change each turn\n",
885
+ "\n",
886
+ "print(f\"Session: {SESSION_ID} | Message: {USER_MSG}\")\n",
887
+ "\n",
888
+ "t0 = time.time()\n",
889
+ "r = requests.post(\"http://localhost:7860/chat\",\n",
890
+ " json={\"session_id\": SESSION_ID, \"message\": USER_MSG},\n",
891
+ " timeout=120)\n",
892
+ "elapsed = time.time() - t0\n",
893
+ "\n",
894
+ "d = r.json()\n",
895
+ "print(f\"\\nHTTP {r.status_code} ({elapsed:.1f}s)\")\n",
896
+ "print(json.dumps(d, indent=2))\n",
897
+ "print(f\"\\nStage : {d.get('state')}\")\n",
898
+ "print(f\"Reply : {d.get('reply')}\")\n",
899
+ "\n",
900
+ "if d.get(\"brief\"):\n",
901
+ " print(\"\\n📋 CLINICAL BRIEF:\")\n",
902
+ " print(json.dumps(d[\"brief\"], indent=2))\n",
903
+ "\n",
904
+ "FALLBACK = {\"Could you tell me more?\", \"\", None, \"Could you please repeat that?\"}\n",
905
+ "if d.get(\"reply\") in FALLBACK:\n",
906
+ " print(\"\\n⚠️ FALLBACK reply — dumping api.log:\")\n",
907
+ " print(open(\"/content/api.log\").read()[-2000:])\n",
908
+ "\n",
909
+ "# Quick GPU check\n",
910
+ "smi = subprocess.run([\"nvidia-smi\",\"--query-gpu=memory.used,memory.total\",\n",
911
+ " \"--format=csv,noheader\"], capture_output=True, text=True)\n",
912
+ "ps = subprocess.run([\"ollama\",\"ps\"], capture_output=True, text=True)\n",
913
+ "print(f\"\\nGPU RAM : {smi.stdout.strip()}\")\n",
914
+ "print(f\"ollama ps: {ps.stdout.strip()}\")\n",
915
+ "print(subprocess.run([\"tail\",\"-n\",\"15\",\"/content/api.log\"],\n",
916
+ " capture_output=True, text=True).stdout)"
917
+ ],
918
+ "metadata": {
919
+ "colab": {
920
+ "base_uri": "https://localhost:8080/"
921
+ },
922
+ "id": "4kokp0w50rQQ",
923
+ "outputId": "a0de78f6-fe97-487b-ed0d-f4ebaa4ed770"
924
+ },
925
+ "execution_count": null,
926
+ "outputs": [
927
+ {
928
+ "output_type": "stream",
929
+ "name": "stdout",
930
+ "text": [
931
+ "Session: debug_session_001 | Message: I have chest pain\n",
932
+ "\n",
933
+ "HTTP 200 (6.2s)\n",
934
+ "{\n",
935
+ " \"reply\": \"Can you tell me more about when this chest pain started?\",\n",
936
+ " \"state\": \"hpi\",\n",
937
+ " \"brief\": null\n",
938
+ "}\n",
939
+ "\n",
940
+ "Stage : hpi\n",
941
+ "Reply : Can you tell me more about when this chest pain started?\n",
942
+ "\n",
943
+ "GPU RAM : 5369 MiB, 15360 MiB\n",
944
+ "ollama ps: NAME ID SIZE PROCESSOR CONTEXT UNTIL \n",
945
+ "llama3.1:8b 46e0c10c039e 5.5 GB 100% GPU 4096 2 hours from now\n",
946
+ "INFO: Waiting for application startup.\n",
947
+ "INFO: Application startup complete.\n",
948
+ "INFO: Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)\n",
949
+ "INFO: 127.0.0.1:49798 - \"GET /health HTTP/1.1\" 200 OK\n",
950
+ "\n",
951
+ "[1777158113.511] [API] -> POST /chat received for debug_session_001\n",
952
+ "[1777158113.512] [API] Read existing state snapshot.\n",
953
+ "[1777158113.512] [API] Starting new graph invoke...\n",
954
+ "[1777158113.521] [Graph Node] Requesting LLM inference...\n",
955
+ "[Ollama] Starting inference for model 'llama3.1:8b'...\n",
956
+ "[Ollama] Inference completed in 6.17s total.\n",
957
+ "[1777158119.696] [Graph Node] LLM returned. Preparing node dictionaries...\n",
958
+ "[1777158119.697] [API] <- Graph invoke returned in 6.19s\n",
959
+ "[1777158119.698] [API] Chat completed in 6.19s total. Reply length: 56\n",
960
+ "INFO: 127.0.0.1:49810 - \"POST /chat HTTP/1.1\" 200 OK\n",
961
+ "\n"
962
+ ]
963
+ }
964
+ ]
965
+ },
966
+ {
967
+ "cell_type": "code",
968
+ "source": [
969
+ "import subprocess\n",
970
+ "for log in [\"/content/api.log\", \"/content/ollama.log\"]:\n",
971
+ " print(f\"\\n{'='*55}\\n {log}\\n{'='*55}\")\n",
972
+ " print(subprocess.run([\"tail\",\"-n\",\"40\",log], capture_output=True, text=True).stdout or \"(empty)\")\n"
973
+ ],
974
+ "metadata": {
975
+ "colab": {
976
+ "base_uri": "https://localhost:8080/"
977
+ },
978
+ "id": "1WZOi3Hn0sff",
979
+ "outputId": "6edc93ea-2f1b-4fb8-c631-bb84032fe1e8"
980
+ },
981
+ "execution_count": null,
982
+ "outputs": [
983
+ {
984
+ "output_type": "stream",
985
+ "name": "stdout",
986
+ "text": [
987
+ "\n",
988
+ "=======================================================\n",
989
+ " /content/api.log\n",
990
+ "=======================================================\n",
991
+ "INFO: Started server process [19612]\n",
992
+ "INFO: Waiting for application startup.\n",
993
+ "INFO: Application startup complete.\n",
994
+ "INFO: Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)\n",
995
+ "INFO: 127.0.0.1:49798 - \"GET /health HTTP/1.1\" 200 OK\n",
996
+ "\n",
997
+ "[1777158113.511] [API] -> POST /chat received for debug_session_001\n",
998
+ "[1777158113.512] [API] Read existing state snapshot.\n",
999
+ "[1777158113.512] [API] Starting new graph invoke...\n",
1000
+ "[1777158113.521] [Graph Node] Requesting LLM inference...\n",
1001
+ "[Ollama] Starting inference for model 'llama3.1:8b'...\n",
1002
+ "[Ollama] Inference completed in 6.17s total.\n",
1003
+ "[1777158119.696] [Graph Node] LLM returned. Preparing node dictionaries...\n",
1004
+ "[1777158119.697] [API] <- Graph invoke returned in 6.19s\n",
1005
+ "[1777158119.698] [API] Chat completed in 6.19s total. Reply length: 56\n",
1006
+ "INFO: 127.0.0.1:49810 - \"POST /chat HTTP/1.1\" 200 OK\n",
1007
+ "\n",
1008
+ "\n",
1009
+ "=======================================================\n",
1010
+ " /content/ollama.log\n",
1011
+ "=======================================================\n",
1012
+ "load_tensors: CPU_Mapped model buffer size = 281.81 MiB\n",
1013
+ "load_tensors: CUDA0 model buffer size = 4403.49 MiB\n",
1014
+ "llama_context: constructing llama_context\n",
1015
+ "llama_context: n_seq_max = 1\n",
1016
+ "llama_context: n_ctx = 4096\n",
1017
+ "llama_context: n_ctx_seq = 4096\n",
1018
+ "llama_context: n_batch = 512\n",
1019
+ "llama_context: n_ubatch = 512\n",
1020
+ "llama_context: causal_attn = 1\n",
1021
+ "llama_context: flash_attn = auto\n",
1022
+ "llama_context: kv_unified = false\n",
1023
+ "llama_context: freq_base = 500000.0\n",
1024
+ "llama_context: freq_scale = 1\n",
1025
+ "llama_context: n_ctx_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized\n",
1026
+ "llama_context: CUDA_Host output buffer size = 0.50 MiB\n",
1027
+ "llama_kv_cache: CUDA0 KV buffer size = 512.00 MiB\n",
1028
+ "llama_kv_cache: size = 512.00 MiB ( 4096 cells, 32 layers, 1/1 seqs), K (f16): 256.00 MiB, V (f16): 256.00 MiB\n",
1029
+ "llama_context: Flash Attention was auto, set to enabled\n",
1030
+ "llama_context: CUDA0 compute buffer size = 258.50 MiB\n",
1031
+ "llama_context: CUDA_Host compute buffer size = 16.01 MiB\n",
1032
+ "llama_context: graph nodes = 999\n",
1033
+ "llama_context: graph splits = 2\n",
1034
+ "time=2026-04-25T23:01:03.205Z level=INFO source=server.go:1402 msg=\"llama runner started in 2.68 seconds\"\n",
1035
+ "time=2026-04-25T23:01:03.205Z level=INFO source=sched.go:561 msg=\"loaded runners\" count=1\n",
1036
+ "time=2026-04-25T23:01:03.205Z level=INFO source=server.go:1364 msg=\"waiting for llama runner to start responding\"\n",
1037
+ "time=2026-04-25T23:01:03.206Z level=INFO source=server.go:1402 msg=\"llama runner started in 2.68 seconds\"\n",
1038
+ "[GIN] 2026/04/25 - 23:01:09 | 200 | 9.705078074s | 127.0.0.1 | POST \"/api/chat\"\n",
1039
+ "[GIN] 2026/04/25 - 23:01:09 | 200 | 59.798µs | 127.0.0.1 | HEAD \"/\"\n",
1040
+ "[GIN] 2026/04/25 - 23:01:09 | 200 | 129.278µs | 127.0.0.1 | GET \"/api/ps\"\n",
1041
+ "[GIN] 2026/04/25 - 23:01:36 | 200 | 354.357µs | 127.0.0.1 | GET \"/api/tags\"\n",
1042
+ "[GIN] 2026/04/25 - 23:01:36 | 200 | 33.021µs | 127.0.0.1 | HEAD \"/\"\n",
1043
+ "[GIN] 2026/04/25 - 23:01:36 | 200 | 38.107µs | 127.0.0.1 | GET \"/api/ps\"\n",
1044
+ "[GIN] 2026/04/25 - 23:01:42 | 200 | 5.606222729s | 127.0.0.1 | POST \"/api/chat\"\n",
1045
+ "[GIN] 2026/04/25 - 23:01:47 | 200 | 5.227727634s | 127.0.0.1 | POST \"/api/chat\"\n",
1046
+ "[GIN] 2026/04/25 - 23:01:47 | 200 | 510.297µs | 127.0.0.1 | GET \"/api/tags\"\n",
1047
+ "[GIN] 2026/04/25 - 23:01:51 | 200 | 622.673µs | 127.0.0.1 | GET \"/api/tags\"\n",
1048
+ "[GIN] 2026/04/25 - 23:01:53 | 200 | 655.176µs | 127.0.0.1 | GET \"/api/tags\"\n",
1049
+ "[GIN] 2026/04/25 - 23:01:59 | 200 | 6.17069071s | 127.0.0.1 | POST \"/api/chat\"\n",
1050
+ "[GIN] 2026/04/25 - 23:01:59 | 200 | 35.997µs | 127.0.0.1 | HEAD \"/\"\n",
1051
+ "[GIN] 2026/04/25 - 23:01:59 | 200 | 38.825µs | 127.0.0.1 | GET \"/api/ps\"\n",
1052
+ "\n"
1053
+ ]
1054
+ }
1055
+ ]
1056
+ },
1057
+ {
1058
+ "cell_type": "code",
1059
+ "source": [
1060
+ "from google.colab import drive\n",
1061
+ "drive.mount('/content/drive')"
1062
+ ],
1063
+ "metadata": {
1064
+ "colab": {
1065
+ "base_uri": "https://localhost:8080/",
1066
+ "height": 356
1067
+ },
1068
+ "id": "xxtmukiS11_T",
1069
+ "outputId": "016c6964-4288-4647-ae12-b28b067c2552"
1070
+ },
1071
+ "execution_count": null,
1072
+ "outputs": [
1073
+ {
1074
+ "output_type": "error",
1075
+ "ename": "MessageError",
1076
+ "evalue": "Error: credential propagation was unsuccessful",
1077
+ "traceback": [
1078
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
1079
+ "\u001b[0;31mMessageError\u001b[0m Traceback (most recent call last)",
1080
+ "\u001b[0;32m/tmp/ipykernel_14360/1408506528.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolab\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdrive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdrive\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/content/drive'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1081
+ "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36mmount\u001b[0;34m(mountpoint, force_remount, timeout_ms, readonly)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m120000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreadonly\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;34m\"\"\"Mount your Google Drive at the specified mountpoint path.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m return _mount(\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mforce_remount\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1082
+ "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36m_mount\u001b[0;34m(mountpoint, force_remount, timeout_ms, ephemeral, readonly)\u001b[0m\n\u001b[1;32m 132\u001b[0m )\n\u001b[1;32m 133\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mephemeral\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m _message.blocking_request(\n\u001b[0m\u001b[1;32m 135\u001b[0m \u001b[0;34m'request_auth'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'authType'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'dfs_ephemeral'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
1083
+ "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mblocking_request\u001b[0;34m(request_type, request, timeout_sec, parent)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0mrequest_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpect_reply\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 175\u001b[0m )\n\u001b[0;32m--> 176\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mread_reply_from_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_sec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
1084
+ "\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mread_reply_from_input\u001b[0;34m(message_id, timeout_sec)\u001b[0m\n\u001b[1;32m 101\u001b[0m ):\n\u001b[1;32m 102\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'error'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mreply\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 103\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mMessageError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreply\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 104\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mreply\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
1085
+ "\u001b[0;31mMessageError\u001b[0m: Error: credential propagation was unsuccessful"
1086
+ ]
1087
+ }
1088
+ ]
1089
+ },
1090
+ {
1091
+ "cell_type": "code",
1092
+ "source": [
1093
+ "import subprocess, os, time\n",
1094
+ "\n",
1095
+ "subprocess.run([\"pkill\",\"-f\",\"ollama\"])\n",
1096
+ "time.sleep(3)\n",
1097
+ "\n",
1098
+ "env = os.environ.copy()\n",
1099
+ "env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
1100
+ "\n",
1101
+ "subprocess.Popen([\"ollama\",\"serve\"], env=env)\n",
1102
+ "print(\"Restarted Ollama\")"
1103
+ ],
1104
+ "metadata": {
1105
+ "colab": {
1106
+ "base_uri": "https://localhost:8080/"
1107
+ },
1108
+ "id": "SKcQv5Ng0thH",
1109
+ "outputId": "e3b95518-e432-4669-8fd0-50b86fdb216f"
1110
+ },
1111
+ "execution_count": null,
1112
+ "outputs": [
1113
+ {
1114
+ "output_type": "stream",
1115
+ "name": "stdout",
1116
+ "text": [
1117
+ "Restarted Ollama\n"
1118
+ ]
1119
+ }
1120
+ ]
1121
+ },
1122
+ {
1123
+ "cell_type": "code",
1124
+ "source": [],
1125
+ "metadata": {
1126
+ "id": "T51mIyDV1wiD"
1127
+ },
1128
+ "execution_count": null,
1129
+ "outputs": []
1130
+ }
1131
+ ]
1132
+ }