Spaces:
Sleeping
Sleeping
priyansh-saxena1 commited on
Commit ·
f538014
1
Parent(s): c9ecd03
fix: ROS hallucination guard + debug logging
Browse files- clinical_ai_agent_fixed.ipynb +1132 -0
clinical_ai_agent_fixed.ipynb
ADDED
|
@@ -0,0 +1,1132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"nbformat": 4,
|
| 3 |
+
"nbformat_minor": 0,
|
| 4 |
+
"metadata": {
|
| 5 |
+
"colab": {
|
| 6 |
+
"provenance": [],
|
| 7 |
+
"gpuType": "T4"
|
| 8 |
+
},
|
| 9 |
+
"kernelspec": {
|
| 10 |
+
"name": "python3",
|
| 11 |
+
"display_name": "Python 3"
|
| 12 |
+
},
|
| 13 |
+
"language_info": {
|
| 14 |
+
"name": "python"
|
| 15 |
+
},
|
| 16 |
+
"accelerator": "GPU"
|
| 17 |
+
},
|
| 18 |
+
"cells": [
|
| 19 |
+
{
|
| 20 |
+
"cell_type": "code",
|
| 21 |
+
"source": [
|
| 22 |
+
"%%bash\n",
|
| 23 |
+
"set -euo pipefail\n",
|
| 24 |
+
"\n",
|
| 25 |
+
"echo '════════════════════════════════════════════'\n",
|
| 26 |
+
"echo ' CELL 1 — System tools + Install Ollama'\n",
|
| 27 |
+
"echo '════════════════════════════════════════════'\n",
|
| 28 |
+
"\n",
|
| 29 |
+
"apt-get update -qq\n",
|
| 30 |
+
"apt-get install -y zstd pciutils curl 2>&1 | tail -3\n",
|
| 31 |
+
"echo '[OK] apt packages installed'\n",
|
| 32 |
+
"\n",
|
| 33 |
+
"npm install -g localtunnel 2>&1 | tail -2\n",
|
| 34 |
+
"echo \"[OK] localtunnel $(lt --version)\"\n",
|
| 35 |
+
"\n",
|
| 36 |
+
"echo ''\n",
|
| 37 |
+
"echo '── Installing Ollama ──'\n",
|
| 38 |
+
"curl -fsSL https://ollama.com/install.sh | sh\n",
|
| 39 |
+
"ollama --version\n",
|
| 40 |
+
"\n",
|
| 41 |
+
"echo ''\n",
|
| 42 |
+
"echo '[DONE] Cell 1 complete'"
|
| 43 |
+
],
|
| 44 |
+
"metadata": {
|
| 45 |
+
"colab": {
|
| 46 |
+
"base_uri": "https://localhost:8080/"
|
| 47 |
+
},
|
| 48 |
+
"id": "uxsL2dxR0kMA",
|
| 49 |
+
"outputId": "d17a4773-e25a-48a3-9336-781913fc1f6c"
|
| 50 |
+
},
|
| 51 |
+
"execution_count": null,
|
| 52 |
+
"outputs": [
|
| 53 |
+
{
|
| 54 |
+
"output_type": "stream",
|
| 55 |
+
"name": "stdout",
|
| 56 |
+
"text": [
|
| 57 |
+
"════════════════════════════════════════════\n",
|
| 58 |
+
" CELL 1 — System tools + Install Ollama\n",
|
| 59 |
+
"════════════════════════════════════════════\n",
|
| 60 |
+
"\r\n",
|
| 61 |
+
"/sbin/ldconfig.real: /usr/local/lib/libur_adapter_level_zero.so.0 is not a symbolic link\r\n",
|
| 62 |
+
"\r\n",
|
| 63 |
+
"[OK] apt packages installed\n",
|
| 64 |
+
"npm notice To update run: npm install -g npm@11.13.0\n",
|
| 65 |
+
"npm notice\n",
|
| 66 |
+
"[OK] localtunnel 2.0.2\n",
|
| 67 |
+
"\n",
|
| 68 |
+
"── Installing Ollama ──\n",
|
| 69 |
+
"\u001b[1m\u001b[31mWARNING:\u001b[m systemd is not running\n",
|
| 70 |
+
"Warning: could not connect to a running Ollama instance\n",
|
| 71 |
+
"Warning: client version is 0.21.2\n",
|
| 72 |
+
"\n",
|
| 73 |
+
"[DONE] Cell 1 complete\n"
|
| 74 |
+
]
|
| 75 |
+
},
|
| 76 |
+
{
|
| 77 |
+
"output_type": "stream",
|
| 78 |
+
"name": "stderr",
|
| 79 |
+
"text": [
|
| 80 |
+
"W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)\n",
|
| 81 |
+
">>> Installing ollama to /usr/local\n",
|
| 82 |
+
">>> Downloading ollama-linux-amd64.tar.zst\n",
|
| 83 |
+
"#=#=# \r##O#-# \r##O=# # \r#=#=-# # \r\r 0.1%\r 0.3%\r 0.6%\r 1.0%\r# 1.4%\r# 2.0%\r# 2.6%\r## 3.2%\r## 3.7%\r## 3.9%\r## 4.1%\r### 4.2%\r### 4.4%\r### 4.6%\r### 4.7%\r### 4.9%\r### 5.1%\r### 5.3%\r### 5.5%\r#### 5.6%\r#### 5.8%\r#### 6.0%\r#### 6.2%\r#### 6.4%\r#### 6.6%\r#### 6.7%\r#### 6.9%\r##### 7.0%\r##### 7.3%\r##### 7.5%\r##### 7.7%\r##### 7.9%\r##### 8.1%\r##### 8.2%\r###### 8.4%\r###### 8.6%\r###### 8.8%\r###### 9.0%\r###### 9.2%\r###### 9.4%\r###### 9.6%\r####### 9.8%\r####### 10.1%\r####### 10.3%\r####### 10.5%\r####### 10.7%\r####### 10.9%\r####### 11.1%\r######## 11.3%\r######## 11.5%\r######## 11.7%\r######## 11.9%\r######## 12.1%\r######## 12.3%\r######## 12.4%\r######### 12.5%\r######### 12.6%\r######### 12.8%\r######### 13.0%\r######### 13.2%\r######### 13.4%\r######### 13.5%\r######### 13.9%\r########## 14.1%\r########## 14.4%\r########## 14.7%\r########## 15.0%\r########### 15.5%\r########### 15.9%\r########### 16.2%\r########### 16.6%\r############ 16.9%\r############ 17.5%\r############ 17.9%\r############# 18.5%\r############# 19.2%\r############## 19.6%\r############## 19.8%\r############## 20.1%\r############## 20.5%\r############### 20.9%\r############### 21.6%\r############### 21.8%\r############### 21.9%\r############### 21.9%\r############### 22.0%\r############### 22.1%\r################ 23.2%\r################# 24.6%\r################## 25.8%\r################### 26.5%\r################### 26.8%\r################### 27.1%\r#################### 27.9%\r#################### 29.0%\r##################### 30.2%\r###################### 31.3%\r###################### 31.5%\r###################### 31.7%\r###################### 31.8%\r####################### 32.0%\r####################### 32.5%\r####################### 32.9%\r####################### 33.3%\r######################## 34.5%\r######################### 35.6%\r########################## 36.4%\r########################## 36.7%\r########################## 37.5%\r########################### 38.5%\r############################ 39.5%\r############################# 40.6%\r############################# 41.5%\r############################## 42.2%\r############################## 42.6%\r############################## 42.8%\r############################### 43.3%\r############################### 43.5%\r############################### 43.8%\r############################### 44.2%\r################################ 45.1%\r################################# 46.1%\r################################# 47.0%\r################################## 48.0%\r################################## 48.4%\r################################### 48.8%\r################################### 49.3%\r################################### 49.8%\r#################################### 50.6%\r#################################### 51.3%\r##################################### 51.8%\r##################################### 52.4%\r###################################### 52.9%\r###################################### 53.4%\r###################################### 53.9%\r####################################### 54.6%\r####################################### 55.5%\r######################################## 56.4%\r######################################### 57.0%\r######################################### 57.2%\r######################################### 57.3%\r######################################### 57.5%\r######################################### 57.7%\r######################################### 57.9%\r######################################### 58.2%\r########################################## 58.4%\r########################################## 58.5%\r########################################## 58.6%\r########################################## 58.7%\r########################################## 58.8%\r########################################## 59.2%\r########################################## 59.5%\r########################################### 59.8%\r########################################### 60.3%\r############################################ 61.1%\r############################################ 62.1%\r############################################# 62.9%\r############################################# 63.3%\r############################################# 63.6%\r############################################# 63.8%\r############################################## 64.0%\r############################################## 64.2%\r############################################## 64.4%\r############################################## 64.6%\r############################################## 65.0%\r############################################### 65.3%\r############################################### 66.0%\r############################################### 66.6%\r################################################ 67.8%\r################################################# 68.9%\r################################################## 70.2%\r################################################### 71.4%\r################################################### 71.6%\r################################################### 71.8%\r#################################################### 72.8%\r##################################################### 73.9%\r##################################################### 75.0%\r###################################################### 76.0%\r####################################################### 76.9%\r######################################################## 77.9%\r######################################################## 79.0%\r######################################################### 79.9%\r######################################################### 80.2%\r######################################################### 80.5%\r########################################################## 80.8%\r########################################################## 81.2%\r########################################################## 81.5%\r########################################################## 81.7%\r########################################################### 82.1%\r########################################################### 82.8%\r############################################################ 83.7%\r############################################################ 84.0%\r############################################################ 84.7%\r############################################################# 85.5%\r############################################################## 86.4%\r############################################################## 87.1%\r############################################################### 87.5%\r############################################################### 88.3%\r################################################################ 89.4%\r################################################################# 90.7%\r################################################################## 92.0%\r################################################################### 93.4%\r#################################################################### 94.8%\r#################################################################### 95.7%\r##################################################################### 97.2%\r###################################################################### 98.4%\r####################################################################### 98.6%\r####################################################################### 98.8%\r####################################################################### 99.1%\r####################################################################### 99.3%\r####################################################################### 99.5%\r####################################################################### 99.8%\r####################################################################### 99.9%\r######################################################################## 100.0%\n",
|
| 84 |
+
">>> Creating ollama user...\n",
|
| 85 |
+
">>> Adding ollama user to video group...\n",
|
| 86 |
+
">>> Adding current user to ollama group...\n",
|
| 87 |
+
">>> Creating ollama systemd service...\n",
|
| 88 |
+
">>> NVIDIA GPU installed.\n",
|
| 89 |
+
">>> The Ollama API is now available at 127.0.0.1:11434.\n",
|
| 90 |
+
">>> Install complete. Run \"ollama\" from the command line.\n"
|
| 91 |
+
]
|
| 92 |
+
}
|
| 93 |
+
]
|
| 94 |
+
},
|
| 95 |
+
{
|
| 96 |
+
"cell_type": "code",
|
| 97 |
+
"source": [
|
| 98 |
+
"import subprocess, os, glob, time, requests\n",
|
| 99 |
+
"\n",
|
| 100 |
+
"print(\"════════════════════════════════════════════\")\n",
|
| 101 |
+
"print(\" CELL 2 — GPU Fix + Ollama GPU Start\")\n",
|
| 102 |
+
"print(\"════════════════════════════════════════════\")\n",
|
| 103 |
+
"\n",
|
| 104 |
+
"subprocess.run([\"pkill\", \"-f\", \"ollama\"], capture_output=True)\n",
|
| 105 |
+
"time.sleep(2)\n",
|
| 106 |
+
"print(\"[OK] Killed any stale Ollama process\")\n",
|
| 107 |
+
"\n",
|
| 108 |
+
"print(\"\\n── Discovering NVIDIA/CUDA libs ──\")\n",
|
| 109 |
+
"r = subprocess.run(\"find /usr/lib64-nvidia /usr/local/cuda* -name 'libcuda.so*' 2>/dev/null\",\n",
|
| 110 |
+
" shell=True, capture_output=True, text=True)\n",
|
| 111 |
+
"print(r.stdout.strip() or \"WARNING: No libcuda found!\")\n",
|
| 112 |
+
"\n",
|
| 113 |
+
"ollama_lib = \"/usr/local/lib/ollama\"\n",
|
| 114 |
+
"print(f\"\\n── Ollama lib dir: {ollama_lib} ──\")\n",
|
| 115 |
+
"r2 = subprocess.run(f\"ls {ollama_lib}/\", shell=True, capture_output=True, text=True)\n",
|
| 116 |
+
"print(r2.stdout.strip())\n",
|
| 117 |
+
"\n",
|
| 118 |
+
"nvidia_dir = \"/usr/lib64-nvidia\"\n",
|
| 119 |
+
"cuda_dir = \"/usr/local/cuda/lib64\"\n",
|
| 120 |
+
"cuda128 = \"/usr/local/cuda-12.8/targets/x86_64-linux/lib\"\n",
|
| 121 |
+
"\n",
|
| 122 |
+
"needed = {\n",
|
| 123 |
+
" \"libcuda.so\": [f\"{nvidia_dir}/libcuda.so\", f\"{nvidia_dir}/libcuda.so.1\"],\n",
|
| 124 |
+
" \"libcuda.so.1\": [f\"{nvidia_dir}/libcuda.so.1\"],\n",
|
| 125 |
+
" \"libnvidia-ml.so.1\": [f\"{nvidia_dir}/libnvidia-ml.so.1\"],\n",
|
| 126 |
+
" \"libnvidia-ml.so\": [f\"{nvidia_dir}/libnvidia-ml.so.1\"],\n",
|
| 127 |
+
" \"libcudart.so.12\": [f\"{cuda_dir}/libcudart.so.12\", f\"{cuda128}/libcudart.so.12\"],\n",
|
| 128 |
+
"}\n",
|
| 129 |
+
"\n",
|
| 130 |
+
"print(\"\\n── Creating symlinks ──\")\n",
|
| 131 |
+
"for dst_name, srcs in needed.items():\n",
|
| 132 |
+
" dst = os.path.join(ollama_lib, dst_name)\n",
|
| 133 |
+
" if os.path.lexists(dst):\n",
|
| 134 |
+
" os.remove(dst)\n",
|
| 135 |
+
" for src in srcs:\n",
|
| 136 |
+
" if os.path.exists(src):\n",
|
| 137 |
+
" os.symlink(src, dst)\n",
|
| 138 |
+
" print(f\" ✅ {src} → {dst}\")\n",
|
| 139 |
+
" break\n",
|
| 140 |
+
" else:\n",
|
| 141 |
+
" print(f\" ⚠️ MISSING: {dst_name} (no source found)\")\n",
|
| 142 |
+
"\n",
|
| 143 |
+
"gpu_env = os.environ.copy()\n",
|
| 144 |
+
"gpu_env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
|
| 145 |
+
"gpu_env[\"LD_LIBRARY_PATH\"] = (\n",
|
| 146 |
+
" f\"{nvidia_dir}:{ollama_lib}:/usr/local/cuda/lib64:\"\n",
|
| 147 |
+
" + os.environ.get(\"LD_LIBRARY_PATH\", \"\")\n",
|
| 148 |
+
")\n",
|
| 149 |
+
"gpu_env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
|
| 150 |
+
"gpu_env[\"OLLAMA_DEBUG\"] = \"INFO\"\n",
|
| 151 |
+
"\n",
|
| 152 |
+
"print(f\"\\n── Starting Ollama daemon ──\")\n",
|
| 153 |
+
"print(f\"LD_LIBRARY_PATH starts with: {gpu_env['LD_LIBRARY_PATH'][:80]}...\")\n",
|
| 154 |
+
"log = open(\"/content/ollama.log\", \"w\")\n",
|
| 155 |
+
"proc = subprocess.Popen([\"ollama\", \"serve\"], env=gpu_env, stdout=log, stderr=log,\n",
|
| 156 |
+
" preexec_fn=os.setpgrp)\n",
|
| 157 |
+
"print(f\"[OK] Ollama PID: {proc.pid}\")\n",
|
| 158 |
+
"\n",
|
| 159 |
+
"for i in range(25):\n",
|
| 160 |
+
" try:\n",
|
| 161 |
+
" requests.get(\"http://localhost:11434/api/tags\", timeout=2)\n",
|
| 162 |
+
" print(f\"[OK] Ollama API responsive after {i+1}s\")\n",
|
| 163 |
+
" break\n",
|
| 164 |
+
" except:\n",
|
| 165 |
+
" time.sleep(1)\n",
|
| 166 |
+
"\n",
|
| 167 |
+
"time.sleep(2)\n",
|
| 168 |
+
"\n",
|
| 169 |
+
"print(\"\\n── Ollama startup log (GPU detection lines) ──\")\n",
|
| 170 |
+
"log_txt = open(\"/content/ollama.log\").read()\n",
|
| 171 |
+
"for line in log_txt.splitlines():\n",
|
| 172 |
+
" kws = [\"gpu\", \"cuda\", \"vram\", \"nvidia\", \"error\", \"warn\", \"discovered\", \"total_vram\"]\n",
|
| 173 |
+
" if any(k in line.lower() for k in kws):\n",
|
| 174 |
+
" print(line)\n",
|
| 175 |
+
"\n",
|
| 176 |
+
"print(\"\\n── Checking model ──\")\n",
|
| 177 |
+
"tags = requests.get(\"http://localhost:11434/api/tags\").json()\n",
|
| 178 |
+
"models = [m[\"name\"] for m in tags.get(\"models\", [])]\n",
|
| 179 |
+
"print(f\"Installed: {models}\")\n",
|
| 180 |
+
"if not any(\"llama3.1:8b\" in m for m in models):\n",
|
| 181 |
+
" print(\"Pulling llama3.1:8b ...\")\n",
|
| 182 |
+
" subprocess.run([\"ollama\", \"pull\", \"llama3.1:8b\"], check=True)\n",
|
| 183 |
+
" print(\"[OK] Model pulled\")\n",
|
| 184 |
+
"\n",
|
| 185 |
+
"print(\"\\n── Warming up model (60-90s first load) ──\")\n",
|
| 186 |
+
"t0 = time.time()\n",
|
| 187 |
+
"r3 = requests.post(\"http://localhost:11434/api/chat\", json={\n",
|
| 188 |
+
" \"model\": \"llama3.1:8b\",\n",
|
| 189 |
+
" \"messages\": [{\"role\": \"user\", \"content\": \"Reply with the word READY only.\"}],\n",
|
| 190 |
+
" \"stream\": False,\n",
|
| 191 |
+
" \"options\": {\"temperature\": 0, \"num_predict\": 5}\n",
|
| 192 |
+
"}, timeout=(10, 300))\n",
|
| 193 |
+
"elapsed = time.time() - t0\n",
|
| 194 |
+
"d = r3.json()\n",
|
| 195 |
+
"load_s = d.get(\"load_duration\", 0) / 1e9\n",
|
| 196 |
+
"eval_s = d.get(\"eval_duration\", 0) / 1e9\n",
|
| 197 |
+
"reply = d[\"message\"][\"content\"].strip()\n",
|
| 198 |
+
"print(f\"Reply : {reply}\")\n",
|
| 199 |
+
"print(f\"Total : {elapsed:.1f}s | Load: {load_s:.1f}s | Eval: {eval_s:.1f}s\")\n",
|
| 200 |
+
"if eval_s > 0:\n",
|
| 201 |
+
" tps = d.get(\"eval_count\", 0) / eval_s\n",
|
| 202 |
+
" print(f\"Speed : {tps:.1f} tok/s {'← GPU (>30 t/s)' if tps > 30 else '← CPU (<10 t/s typical)'}\")\n",
|
| 203 |
+
"\n",
|
| 204 |
+
"print(\"\\n── ollama ps ──\")\n",
|
| 205 |
+
"ps = subprocess.run([\"ollama\", \"ps\"], capture_output=True, text=True)\n",
|
| 206 |
+
"print(ps.stdout)\n",
|
| 207 |
+
"\n",
|
| 208 |
+
"print(\"── nvidia-smi ──\")\n",
|
| 209 |
+
"smi = subprocess.run(\n",
|
| 210 |
+
" [\"nvidia-smi\", \"--query-gpu=name,memory.used,memory.total\", \"--format=csv,noheader\"],\n",
|
| 211 |
+
" capture_output=True, text=True)\n",
|
| 212 |
+
"print(smi.stdout)\n",
|
| 213 |
+
"\n",
|
| 214 |
+
"used_mib = int(smi.stdout.split(\",\")[1].strip().split()[0]) if smi.stdout else 0\n",
|
| 215 |
+
"if used_mib > 4000:\n",
|
| 216 |
+
" print(f\"✅ GPU confirmed — {used_mib} MiB used\")\n",
|
| 217 |
+
"elif \"GPU\" in ps.stdout:\n",
|
| 218 |
+
" print(f\"✅ ollama ps shows GPU\")\n",
|
| 219 |
+
"else:\n",
|
| 220 |
+
" print(f\"⚠️ Still on CPU ({used_mib} MiB). Check log lines above for CUDA errors.\")\n",
|
| 221 |
+
"\n",
|
| 222 |
+
"print(\"\\n[DONE] Cell 2 complete\")"
|
| 223 |
+
],
|
| 224 |
+
"metadata": {
|
| 225 |
+
"colab": {
|
| 226 |
+
"base_uri": "https://localhost:8080/"
|
| 227 |
+
},
|
| 228 |
+
"id": "nK5UZG3F0k7Q",
|
| 229 |
+
"outputId": "56840451-6c1d-47c5-f024-320f1c777462"
|
| 230 |
+
},
|
| 231 |
+
"execution_count": null,
|
| 232 |
+
"outputs": [
|
| 233 |
+
{
|
| 234 |
+
"output_type": "stream",
|
| 235 |
+
"name": "stdout",
|
| 236 |
+
"text": [
|
| 237 |
+
"════════════════════════════════════════════\n",
|
| 238 |
+
" CELL 2 — GPU Fix + Ollama GPU Start\n",
|
| 239 |
+
"════════════════════════════════════════════\n",
|
| 240 |
+
"[OK] Killed any stale Ollama process\n",
|
| 241 |
+
"\n",
|
| 242 |
+
"── Discovering NVIDIA/CUDA libs ──\n",
|
| 243 |
+
"/usr/lib64-nvidia/libcuda.so.1\n",
|
| 244 |
+
"/usr/lib64-nvidia/libcuda.so.580.82.07\n",
|
| 245 |
+
"/usr/lib64-nvidia/libcuda.so\n",
|
| 246 |
+
"/usr/local/cuda-12.8/compat/libcuda.so.570.124.06\n",
|
| 247 |
+
"/usr/local/cuda-12.8/compat/libcuda.so.1\n",
|
| 248 |
+
"/usr/local/cuda-12.8/compat/libcuda.so\n",
|
| 249 |
+
"/usr/local/cuda-12.8/targets/x86_64-linux/lib/stubs/libcuda.so\n",
|
| 250 |
+
"\n",
|
| 251 |
+
"── Ollama lib dir: /usr/local/lib/ollama ──\n",
|
| 252 |
+
"cuda_v12\n",
|
| 253 |
+
"cuda_v13\n",
|
| 254 |
+
"include\n",
|
| 255 |
+
"libggml-base.so\n",
|
| 256 |
+
"libggml-base.so.0\n",
|
| 257 |
+
"libggml-base.so.0.0.0\n",
|
| 258 |
+
"libggml-cpu-alderlake.so\n",
|
| 259 |
+
"libggml-cpu-haswell.so\n",
|
| 260 |
+
"libggml-cpu-icelake.so\n",
|
| 261 |
+
"libggml-cpu-sandybridge.so\n",
|
| 262 |
+
"libggml-cpu-skylakex.so\n",
|
| 263 |
+
"libggml-cpu-sse42.so\n",
|
| 264 |
+
"libggml-cpu-x64.so\n",
|
| 265 |
+
"mlx_cuda_v13\n",
|
| 266 |
+
"vulkan\n",
|
| 267 |
+
"\n",
|
| 268 |
+
"── Creating symlinks ──\n",
|
| 269 |
+
" ✅ /usr/lib64-nvidia/libcuda.so → /usr/local/lib/ollama/libcuda.so\n",
|
| 270 |
+
" ✅ /usr/lib64-nvidia/libcuda.so.1 → /usr/local/lib/ollama/libcuda.so.1\n",
|
| 271 |
+
" ✅ /usr/lib64-nvidia/libnvidia-ml.so.1 → /usr/local/lib/ollama/libnvidia-ml.so.1\n",
|
| 272 |
+
" ✅ /usr/lib64-nvidia/libnvidia-ml.so.1 → /usr/local/lib/ollama/libnvidia-ml.so\n",
|
| 273 |
+
" ✅ /usr/local/cuda/lib64/libcudart.so.12 → /usr/local/lib/ollama/libcudart.so.12\n",
|
| 274 |
+
"\n",
|
| 275 |
+
"── Starting Ollama daemon ──\n",
|
| 276 |
+
"LD_LIBRARY_PATH starts with: /usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64:/usr/lib64-nvidia...\n",
|
| 277 |
+
"[OK] Ollama PID: 15538\n",
|
| 278 |
+
"[OK] Ollama API responsive after 2s\n",
|
| 279 |
+
"\n",
|
| 280 |
+
"── Ollama startup log (GPU detection lines) ──\n",
|
| 281 |
+
"time=2026-04-25T22:47:53.770Z level=INFO source=routes.go:1752 msg=\"server config\" env=\"map[CUDA_VISIBLE_DEVICES: GGML_VK_VISIBLE_DEVICES: GPU_DEVICE_ORDINAL: HIP_VISIBLE_DEVICES: HSA_OVERRIDE_GFX_VERSION: HTTPS_PROXY: HTTP_PROXY: NO_PROXY: OLLAMA_CONTEXT_LENGTH:0 OLLAMA_DEBUG:INFO OLLAMA_DEBUG_LOG_REQUESTS:false OLLAMA_EDITOR: OLLAMA_FLASH_ATTENTION:false OLLAMA_GPU_OVERHEAD:0 OLLAMA_HOST:http://127.0.0.1:11434 OLLAMA_KEEP_ALIVE:2h0m0s OLLAMA_KV_CACHE_TYPE: OLLAMA_LLM_LIBRARY: OLLAMA_LOAD_TIMEOUT:5m0s OLLAMA_MAX_LOADED_MODELS:0 OLLAMA_MAX_QUEUE:512 OLLAMA_MODELS:/root/.ollama/models OLLAMA_MULTIUSER_CACHE:false OLLAMA_NEW_ENGINE:false OLLAMA_NOHISTORY:false OLLAMA_NOPRUNE:false OLLAMA_NO_CLOUD:false OLLAMA_NUM_PARALLEL:1 OLLAMA_ORIGINS:[http://localhost https://localhost http://localhost:* https://localhost:* http://127.0.0.1 https://127.0.0.1 http://127.0.0.1:* https://127.0.0.1:* http://0.0.0.0 https://0.0.0.0 http://0.0.0.0:* https://0.0.0.0:* app://* file://* tauri://* vscode-webview://* vscode-file://*] OLLAMA_REMOTES:[ollama.com] OLLAMA_SCHED_SPREAD:false OLLAMA_VULKAN:false ROCR_VISIBLE_DEVICES: http_proxy: https_proxy: no_proxy:]\"\n",
|
| 282 |
+
"time=2026-04-25T22:47:53.772Z level=INFO source=runner.go:67 msg=\"discovering available GPUs...\"\n",
|
| 283 |
+
"time=2026-04-25T22:47:54.618Z level=INFO source=types.go:42 msg=\"inference compute\" id=GPU-0779169f-d299-340e-42ee-14fb3ed34faf filter_id=\"\" library=CUDA compute=7.5 name=CUDA0 description=\"Tesla T4\" libdirs=ollama,cuda_v13 driver=13.0 pci_id=0000:00:04.0 type=discrete total=\"15.0 GiB\" available=\"14.6 GiB\"\n",
|
| 284 |
+
"time=2026-04-25T22:47:54.618Z level=INFO source=routes.go:1860 msg=\"vram-based default context\" total_vram=\"15.0 GiB\" default_num_ctx=4096\n",
|
| 285 |
+
"\n",
|
| 286 |
+
"── Checking model ──\n",
|
| 287 |
+
"Installed: []\n",
|
| 288 |
+
"Pulling llama3.1:8b ...\n",
|
| 289 |
+
"[OK] Model pulled\n",
|
| 290 |
+
"\n",
|
| 291 |
+
"── Warming up model (60-90s first load) ──\n",
|
| 292 |
+
"Reply : READY\n",
|
| 293 |
+
"Total : 96.8s | Load: 96.7s | Eval: 0.0s\n",
|
| 294 |
+
"Speed : 58.5 tok/s ← GPU (>30 t/s)\n",
|
| 295 |
+
"\n",
|
| 296 |
+
"── ollama ps ──\n",
|
| 297 |
+
"NAME ID SIZE PROCESSOR CONTEXT UNTIL \n",
|
| 298 |
+
"llama3.1:8b 46e0c10c039e 5.5 GB 100% GPU 4096 2 hours from now \n",
|
| 299 |
+
"\n",
|
| 300 |
+
"── nvidia-smi ──\n",
|
| 301 |
+
"Tesla T4, 5367 MiB, 15360 MiB\n",
|
| 302 |
+
"\n",
|
| 303 |
+
"✅ GPU confirmed — 5367 MiB used\n",
|
| 304 |
+
"\n",
|
| 305 |
+
"[DONE] Cell 2 complete\n"
|
| 306 |
+
]
|
| 307 |
+
}
|
| 308 |
+
]
|
| 309 |
+
},
|
| 310 |
+
{
|
| 311 |
+
"cell_type": "code",
|
| 312 |
+
"source": [
|
| 313 |
+
"%%bash\n",
|
| 314 |
+
"set -euo pipefail\n",
|
| 315 |
+
"echo '════════════════════════════════════════════'\n",
|
| 316 |
+
"echo ' CELL 3 — Clone + Patch'\n",
|
| 317 |
+
"echo '════════════════════════════════════════════'\n",
|
| 318 |
+
"cd /content\n",
|
| 319 |
+
"rm -rf medintake-ai\n",
|
| 320 |
+
"git clone https://github.com/priyansh-saxena1/medintake-ai.git\n",
|
| 321 |
+
"cd medintake-ai\n",
|
| 322 |
+
"echo \"[OK] commit: $(git rev-parse --short HEAD)\"\n",
|
| 323 |
+
"pip install -r requirements.txt 2>&1 | tail -4\n",
|
| 324 |
+
"echo '[OK] pip done'\n",
|
| 325 |
+
"\n",
|
| 326 |
+
"python3 << 'PYEOF'\n",
|
| 327 |
+
"import pathlib\n",
|
| 328 |
+
"p = pathlib.Path('/content/medintake-ai/app/llm.py')\n",
|
| 329 |
+
"src = p.read_text()\n",
|
| 330 |
+
"changed = False\n",
|
| 331 |
+
"\n",
|
| 332 |
+
"patches = [\n",
|
| 333 |
+
" (\"PATCH 1 timeout\",\n",
|
| 334 |
+
" 'requests.post(self.api_url, json=payload, timeout=60)',\n",
|
| 335 |
+
" 'requests.post(self.api_url, json=payload, timeout=(10, 300))'),\n",
|
| 336 |
+
" (\"PATCH 2 OLLAMA_HOST\",\n",
|
| 337 |
+
" 'self.api_url = \"http://localhost:11434/api/chat\"',\n",
|
| 338 |
+
" 'self.api_url = os.environ.get(\"OLLAMA_HOST\",\"http://localhost:11434\") + \"/api/chat\"'),\n",
|
| 339 |
+
" (\"PATCH 3 MODEL_NAME default\",\n",
|
| 340 |
+
" 'self.model_name = os.environ.get(\"MODEL_NAME\", \"qwen2.5:0.5b\")',\n",
|
| 341 |
+
" 'self.model_name = os.environ.get(\"MODEL_NAME\", \"llama3.1:8b\")'),\n",
|
| 342 |
+
" (\"PATCH 4 response key\",\n",
|
| 343 |
+
" 'raw = data.get(\"response\", \"\")',\n",
|
| 344 |
+
" 'raw = data.get(\"message\", {}).get(\"content\", \"\")'),\n",
|
| 345 |
+
"]\n",
|
| 346 |
+
"\n",
|
| 347 |
+
"for name, old, new in patches:\n",
|
| 348 |
+
" if old in src:\n",
|
| 349 |
+
" src = src.replace(old, new, 1)\n",
|
| 350 |
+
" changed = True\n",
|
| 351 |
+
" print(f\"[APPLIED] {name}\")\n",
|
| 352 |
+
" elif new in src:\n",
|
| 353 |
+
" print(f\"[SKIP] {name}\")\n",
|
| 354 |
+
" else:\n",
|
| 355 |
+
" print(f\"[WARN] {name} target not found\")\n",
|
| 356 |
+
"\n",
|
| 357 |
+
"if changed:\n",
|
| 358 |
+
" p.write_text(src)\n",
|
| 359 |
+
" print(\"[OK] llm.py saved\")\n",
|
| 360 |
+
"\n",
|
| 361 |
+
"import py_compile\n",
|
| 362 |
+
"py_compile.compile(str(p), doraise=True)\n",
|
| 363 |
+
"print(\"[OK] syntax valid\")\n",
|
| 364 |
+
"PYEOF\n",
|
| 365 |
+
"\n",
|
| 366 |
+
"echo ''\n",
|
| 367 |
+
"echo '── Tests (MockLLM) ──'\n",
|
| 368 |
+
"cd /content/medintake-ai\n",
|
| 369 |
+
"MOCK_LLM=true python3 -m pytest tests/ -v --tb=short 2>&1\n",
|
| 370 |
+
"echo '[DONE] Cell 3'\n"
|
| 371 |
+
],
|
| 372 |
+
"metadata": {
|
| 373 |
+
"colab": {
|
| 374 |
+
"base_uri": "https://localhost:8080/"
|
| 375 |
+
},
|
| 376 |
+
"id": "jXLQSpFj0n8a",
|
| 377 |
+
"outputId": "10292259-f6c0-4b07-a1a5-e3d9c474afc9"
|
| 378 |
+
},
|
| 379 |
+
"execution_count": null,
|
| 380 |
+
"outputs": [
|
| 381 |
+
{
|
| 382 |
+
"output_type": "stream",
|
| 383 |
+
"name": "stdout",
|
| 384 |
+
"text": [
|
| 385 |
+
"════════════════════════════════════════════\n",
|
| 386 |
+
" CELL 3 — Clone + Patch\n",
|
| 387 |
+
"════════════════════════════════════════════\n",
|
| 388 |
+
"[OK] commit: eb1b955\n",
|
| 389 |
+
"Requirement already satisfied: orjson>=3.11.5 in /usr/local/lib/python3.12/dist-packages (from langgraph-sdk<0.4.0,>=0.3.0->langgraph->-r requirements.txt (line 1)) (3.11.8)\n",
|
| 390 |
+
"Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.12/dist-packages (from jsonpatch<2.0.0,>=1.33.0->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (3.1.1)\n",
|
| 391 |
+
"Requirement already satisfied: requests-toolbelt>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from langsmith<1.0.0,>=0.3.45->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (1.0.0)\n",
|
| 392 |
+
"Requirement already satisfied: zstandard>=0.23.0 in /usr/local/lib/python3.12/dist-packages (from langsmith<1.0.0,>=0.3.45->langchain-core>=0.1->langgraph->-r requirements.txt (line 1)) (0.25.0)\n",
|
| 393 |
+
"[OK] pip done\n",
|
| 394 |
+
"[APPLIED] PATCH 1 timeout\n",
|
| 395 |
+
"[APPLIED] PATCH 2 OLLAMA_HOST\n",
|
| 396 |
+
"[APPLIED] PATCH 3 MODEL_NAME default\n",
|
| 397 |
+
"[SKIP] PATCH 4 response key\n",
|
| 398 |
+
"[OK] llm.py saved\n",
|
| 399 |
+
"[OK] syntax valid\n",
|
| 400 |
+
"\n",
|
| 401 |
+
"── Tests (MockLLM) ──\n",
|
| 402 |
+
"============================= test session starts ==============================\n",
|
| 403 |
+
"platform linux -- Python 3.12.13, pytest-8.4.2, pluggy-1.6.0 -- /usr/bin/python3\n",
|
| 404 |
+
"cachedir: .pytest_cache\n",
|
| 405 |
+
"rootdir: /content/medintake-ai\n",
|
| 406 |
+
"configfile: pytest.ini\n",
|
| 407 |
+
"plugins: asyncio-1.3.0, langsmith-0.7.30, typeguard-4.5.1, anyio-4.13.0\n",
|
| 408 |
+
"asyncio: mode=Mode.AUTO, debug=False, asyncio_default_fixture_loop_scope=function, asyncio_default_test_loop_scope=function\n",
|
| 409 |
+
"collecting ... collected 11 items\n",
|
| 410 |
+
"\n",
|
| 411 |
+
"tests/test_e2e.py::test_mock_llm_combined_call_basic_extraction PASSED [ 9%]\n",
|
| 412 |
+
"tests/test_e2e.py::test_mock_llm_emergency_detection PASSED [ 18%]\n",
|
| 413 |
+
"tests/test_e2e.py::test_mock_llm_does_not_repeat_filled_questions PASSED [ 27%]\n",
|
| 414 |
+
"tests/test_e2e.py::test_mock_llm_severity_extraction PASSED [ 36%]\n",
|
| 415 |
+
"tests/test_e2e.py::test_mock_llm_ros_extraction PASSED [ 45%]\n",
|
| 416 |
+
"tests/test_e2e.py::test_mock_llm_speed PASSED [ 54%]\n",
|
| 417 |
+
"tests/test_e2e.py::test_combined_output_schema_round_trip PASSED [ 63%]\n",
|
| 418 |
+
"tests/test_e2e.py::test_health_endpoint PASSED [ 72%]\n",
|
| 419 |
+
"tests/test_e2e.py::test_emergency_triage_node PASSED [ 81%]\n",
|
| 420 |
+
"tests/test_e2e.py::test_full_intake_multi_turn_extraction PASSED [ 90%]\n",
|
| 421 |
+
"tests/test_e2e.py::test_api_response_time PASSED [100%]\n",
|
| 422 |
+
"\n",
|
| 423 |
+
"============================== 11 passed in 0.71s ==============================\n",
|
| 424 |
+
"[DONE] Cell 3\n"
|
| 425 |
+
]
|
| 426 |
+
},
|
| 427 |
+
{
|
| 428 |
+
"output_type": "stream",
|
| 429 |
+
"name": "stderr",
|
| 430 |
+
"text": [
|
| 431 |
+
"Cloning into 'medintake-ai'...\n"
|
| 432 |
+
]
|
| 433 |
+
}
|
| 434 |
+
]
|
| 435 |
+
},
|
| 436 |
+
{
|
| 437 |
+
"cell_type": "code",
|
| 438 |
+
"source": [
|
| 439 |
+
"# Auto-heal Ollama before each turn\n",
|
| 440 |
+
"import requests, subprocess, os, time\n",
|
| 441 |
+
"def ensure_ollama():\n",
|
| 442 |
+
" try:\n",
|
| 443 |
+
" requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
|
| 444 |
+
" except: pass\n",
|
| 445 |
+
" print(\"⚠️ Ollama dead — restarting...\")\n",
|
| 446 |
+
" env = os.environ.copy()\n",
|
| 447 |
+
" env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
|
| 448 |
+
" env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
|
| 449 |
+
" env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
|
| 450 |
+
" subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
|
| 451 |
+
" stdout=open(\"/content/ollama.log\",\"a\"),\n",
|
| 452 |
+
" stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
|
| 453 |
+
" for _ in range(30):\n",
|
| 454 |
+
" try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
|
| 455 |
+
" except: time.sleep(1)\n",
|
| 456 |
+
"\n",
|
| 457 |
+
"ensure_ollama()\n",
|
| 458 |
+
"\n",
|
| 459 |
+
"import sys, os, json, time, subprocess, requests\n",
|
| 460 |
+
"sys.path.insert(0, \"/content/medintake-ai\")\n",
|
| 461 |
+
"os.environ[\"MOCK_LLM\"] = \"false\"\n",
|
| 462 |
+
"os.environ[\"MODEL_NAME\"] = \"llama3.1:8b\"\n",
|
| 463 |
+
"os.environ[\"OLLAMA_HOST\"] = \"http://localhost:11434\"\n",
|
| 464 |
+
"\n",
|
| 465 |
+
"print(\"════════════════════════════════════════════\")\n",
|
| 466 |
+
"print(\" CELL 4 — Deep LLM Dataflow Debug\")\n",
|
| 467 |
+
"print(\"════════════════════════════════════════════\")\n",
|
| 468 |
+
"\n",
|
| 469 |
+
"# A: Hardware\n",
|
| 470 |
+
"print(\"\\n── A: Hardware status ──\")\n",
|
| 471 |
+
"ps = subprocess.run([\"ollama\",\"ps\"], capture_output=True, text=True)\n",
|
| 472 |
+
"smi = subprocess.run([\"nvidia-smi\",\"--query-gpu=name,memory.used,memory.total\",\n",
|
| 473 |
+
" \"--format=csv,noheader\"], capture_output=True, text=True)\n",
|
| 474 |
+
"print(\"ollama ps :\", ps.stdout.strip())\n",
|
| 475 |
+
"print(\"nvidia-smi:\", smi.stdout.strip())\n",
|
| 476 |
+
"\n",
|
| 477 |
+
"# B: App env + object init\n",
|
| 478 |
+
"print(\"\\n── B: App LLM object ──\")\n",
|
| 479 |
+
"from app.llm import OllamaLLM, CombinedOutput, COMBINED_SYSTEM_PROMPT\n",
|
| 480 |
+
"llm = OllamaLLM()\n",
|
| 481 |
+
"print(f\"model_name : {llm.model_name}\")\n",
|
| 482 |
+
"print(f\"api_url : {llm.api_url}\")\n",
|
| 483 |
+
"\n",
|
| 484 |
+
"# C: Prompt construction\n",
|
| 485 |
+
"print(\"\\n── C: System prompt ──\")\n",
|
| 486 |
+
"print(COMBINED_SYSTEM_PROMPT)\n",
|
| 487 |
+
"\n",
|
| 488 |
+
"transcript = \"Patient: I have chest pain\"\n",
|
| 489 |
+
"currentjson = CombinedOutput().model_dump_json()\n",
|
| 490 |
+
"prompt = (\n",
|
| 491 |
+
" f\"CURRENT CLINICAL STATE:\\n{currentjson}\\n\\n\"\n",
|
| 492 |
+
" f\"FULL CONVERSATION TRANSCRIPT:\\n{transcript}\\n\\n\"\n",
|
| 493 |
+
" \"Instructions: Extract all new clinical facts, merge into state, \"\n",
|
| 494 |
+
" \"generate ONE empathetic follow-up question. Return ONLY JSON.\"\n",
|
| 495 |
+
")\n",
|
| 496 |
+
"print(\"\\n── D: User prompt ──\")\n",
|
| 497 |
+
"print(prompt)\n",
|
| 498 |
+
"\n",
|
| 499 |
+
"# D: Raw HTTP\n",
|
| 500 |
+
"print(\"\\n── E: Raw Ollama HTTP call ──\")\n",
|
| 501 |
+
"payload = {\n",
|
| 502 |
+
" \"model\": llm.model_name,\n",
|
| 503 |
+
" \"messages\": [\n",
|
| 504 |
+
" {\"role\":\"system\",\"content\": COMBINED_SYSTEM_PROMPT},\n",
|
| 505 |
+
" {\"role\":\"user\", \"content\": prompt}\n",
|
| 506 |
+
" ],\n",
|
| 507 |
+
" \"format\": \"json\",\n",
|
| 508 |
+
" \"stream\": False,\n",
|
| 509 |
+
" \"options\": {\"temperature\": 0.0, \"num_predict\": 300}\n",
|
| 510 |
+
"}\n",
|
| 511 |
+
"\n",
|
| 512 |
+
"t0 = time.time()\n",
|
| 513 |
+
"resp = requests.post(llm.api_url, json=payload, timeout=(10,300))\n",
|
| 514 |
+
"elapsed = time.time() - t0\n",
|
| 515 |
+
"full = resp.json()\n",
|
| 516 |
+
"\n",
|
| 517 |
+
"load_s = full.get(\"load_duration\",0) / 1e9\n",
|
| 518 |
+
"eval_s = full.get(\"eval_duration\",1) / 1e9\n",
|
| 519 |
+
"tps = full.get(\"eval_count\",0) / eval_s\n",
|
| 520 |
+
"\n",
|
| 521 |
+
"print(f\"HTTP status : {resp.status_code}\")\n",
|
| 522 |
+
"print(f\"Total time : {elapsed:.2f}s\")\n",
|
| 523 |
+
"print(f\"Load duration : {load_s:.2f}s {'GPU (fast)' if load_s < 1 else 'CPU (slow)'}\")\n",
|
| 524 |
+
"print(f\"Tokens/sec : {tps:.1f} {'GPU (>30)' if tps > 30 else 'CPU (<15)'}\")\n",
|
| 525 |
+
"raw = full.get(\"message\",{}).get(\"content\",\"\").strip()\n",
|
| 526 |
+
"print(f\"\\nRaw content:\\n{raw}\")\n",
|
| 527 |
+
"\n",
|
| 528 |
+
"# E: Parsing\n",
|
| 529 |
+
"print(\"\\n── F: JSON parse ──\")\n",
|
| 530 |
+
"try:\n",
|
| 531 |
+
" parsed = json.loads(raw)\n",
|
| 532 |
+
" print(\"json.loads() OK\")\n",
|
| 533 |
+
" REQUIRED = {\"chief_complaint\",\"onset\",\"location\",\"duration\",\n",
|
| 534 |
+
" \"character\",\"severity\",\"aggravating\",\"relieving\",\"ros\",\"reply\"}\n",
|
| 535 |
+
" missing_k = REQUIRED - set(parsed.keys())\n",
|
| 536 |
+
" extra_k = set(parsed.keys()) - REQUIRED\n",
|
| 537 |
+
" print(f\"Missing keys : {missing_k or 'none'}\")\n",
|
| 538 |
+
" print(f\"Extra keys : {extra_k or 'none'}\")\n",
|
| 539 |
+
" print(json.dumps(parsed, indent=2))\n",
|
| 540 |
+
"except Exception as e:\n",
|
| 541 |
+
" print(f\"FAILED: {e}\")\n",
|
| 542 |
+
"\n",
|
| 543 |
+
"# F: Full pipeline\n",
|
| 544 |
+
"print(\"\\n── G: Full app pipeline ──\")\n",
|
| 545 |
+
"result = llm.combined_call(transcript, currentjson)\n",
|
| 546 |
+
"print(\"CombinedOutput:\")\n",
|
| 547 |
+
"print(json.dumps(result.model_dump(), indent=2))\n",
|
| 548 |
+
"\n",
|
| 549 |
+
"from app.graph import compute_stage, missing_from\n",
|
| 550 |
+
"stage = compute_stage(result)\n",
|
| 551 |
+
"missing = missing_from(result)\n",
|
| 552 |
+
"print(f\"\\nStage : {stage}\")\n",
|
| 553 |
+
"print(f\"Missing : {missing}\")\n",
|
| 554 |
+
"print(f\"Reply : '{result.reply}'\")\n",
|
| 555 |
+
"\n",
|
| 556 |
+
"FALLBACK = {\"\", \"Could you tell me more?\", \"Could you please repeat that?\"}\n",
|
| 557 |
+
"if result.reply in FALLBACK:\n",
|
| 558 |
+
" print(\"\\nWARNING: FALLBACK REPLY — LLM output failed silently!\")\n",
|
| 559 |
+
" print(\"Check logs below:\")\n",
|
| 560 |
+
" print(open(\"/content/ollama.log\").read()[-2000:])\n",
|
| 561 |
+
"else:\n",
|
| 562 |
+
" print(\"\\nOK: Real LLM reply returned\")\n",
|
| 563 |
+
"\n",
|
| 564 |
+
"print(\"[DONE] Cell 4\")\n"
|
| 565 |
+
],
|
| 566 |
+
"metadata": {
|
| 567 |
+
"colab": {
|
| 568 |
+
"base_uri": "https://localhost:8080/"
|
| 569 |
+
},
|
| 570 |
+
"id": "6oX-ZDYB0pJ3",
|
| 571 |
+
"outputId": "66521e8e-e829-44ff-e4dd-d836f600ba0e"
|
| 572 |
+
},
|
| 573 |
+
"execution_count": null,
|
| 574 |
+
"outputs": [
|
| 575 |
+
{
|
| 576 |
+
"output_type": "stream",
|
| 577 |
+
"name": "stdout",
|
| 578 |
+
"text": [
|
| 579 |
+
"════════════════════════════════════════════\n",
|
| 580 |
+
" CELL 4 — Deep LLM Dataflow Debug\n",
|
| 581 |
+
"════════════════════════════════════════════\n",
|
| 582 |
+
"\n",
|
| 583 |
+
"── A: Hardware status ──\n",
|
| 584 |
+
"ollama ps : NAME ID SIZE PROCESSOR CONTEXT UNTIL \n",
|
| 585 |
+
"llama3.1:8b 46e0c10c039e 5.5 GB 100% GPU 4096 2 hours from now\n",
|
| 586 |
+
"nvidia-smi: Tesla T4, 5367 MiB, 15360 MiB\n",
|
| 587 |
+
"\n",
|
| 588 |
+
"── B: App LLM object ──\n",
|
| 589 |
+
"model_name : llama3.1:8b\n",
|
| 590 |
+
"api_url : http://localhost:11434/api/chat\n",
|
| 591 |
+
"\n",
|
| 592 |
+
"── C: System prompt ──\n",
|
| 593 |
+
"You are a clinical intake assistant AI. You have two jobs per turn:\n",
|
| 594 |
+
"\n",
|
| 595 |
+
"JOB 1 (EXTRACT): Read the FULL conversation and update the clinical JSON state with any new information the patient provided. Only extract facts explicitly stated.\n",
|
| 596 |
+
"\n",
|
| 597 |
+
"JOB 2 (RESPOND): Based on what is STILL MISSING from the clinical state, ask the patient ONE natural, empathetic question. Do NOT ask about things already filled in.\n",
|
| 598 |
+
"\n",
|
| 599 |
+
"CRITICAL RULES:\n",
|
| 600 |
+
"- Output ONLY valid JSON, nothing else.\n",
|
| 601 |
+
"- Do NOT diagnose or give medical advice.\n",
|
| 602 |
+
"- Do NOT ask more than one question.\n",
|
| 603 |
+
"- If all fields are complete, set reply to \"Thank you — I have everything I need.\"\n",
|
| 604 |
+
"\n",
|
| 605 |
+
"OUTPUT FORMAT (strictly follow this, no extra text):\n",
|
| 606 |
+
"{\n",
|
| 607 |
+
" \"chief_complaint\": \"...\",\n",
|
| 608 |
+
" \"onset\": \"...\",\n",
|
| 609 |
+
" \"location\": \"...\",\n",
|
| 610 |
+
" \"duration\": \"...\",\n",
|
| 611 |
+
" \"character\": \"...\",\n",
|
| 612 |
+
" \"severity\": \"...\",\n",
|
| 613 |
+
" \"aggravating\": \"...\",\n",
|
| 614 |
+
" \"relieving\": \"...\",\n",
|
| 615 |
+
" \"ros\": {\"system_name\": [\"finding1\", \"finding2\"]},\n",
|
| 616 |
+
" \"reply\": \"The single question to ask the patient next\"\n",
|
| 617 |
+
"}\n",
|
| 618 |
+
"\n",
|
| 619 |
+
"Use null for any field not yet known. Keep existing values if the patient didn't add new info.\n",
|
| 620 |
+
"\n",
|
| 621 |
+
"── D: User prompt ──\n",
|
| 622 |
+
"CURRENT CLINICAL STATE:\n",
|
| 623 |
+
"{\"chief_complaint\":null,\"onset\":null,\"location\":null,\"duration\":null,\"character\":null,\"severity\":null,\"aggravating\":null,\"relieving\":null,\"ros\":{},\"emergency\":false,\"reply\":\"\"}\n",
|
| 624 |
+
"\n",
|
| 625 |
+
"FULL CONVERSATION TRANSCRIPT:\n",
|
| 626 |
+
"Patient: I have chest pain\n",
|
| 627 |
+
"\n",
|
| 628 |
+
"Instructions: Extract all new clinical facts, merge into state, generate ONE empathetic follow-up question. Return ONLY JSON.\n",
|
| 629 |
+
"\n",
|
| 630 |
+
"── E: Raw Ollama HTTP call ──\n",
|
| 631 |
+
"HTTP status : 200\n",
|
| 632 |
+
"Total time : 5.61s\n",
|
| 633 |
+
"Load duration : 0.20s GPU (fast)\n",
|
| 634 |
+
"Tokens/sec : 45.2 GPU (>30)\n",
|
| 635 |
+
"\n",
|
| 636 |
+
"Raw content:\n",
|
| 637 |
+
"{\n",
|
| 638 |
+
" \"chief_complaint\": \"chest pain\",\n",
|
| 639 |
+
" \"onset\": null,\n",
|
| 640 |
+
" \"location\": null,\n",
|
| 641 |
+
" \"duration\": null,\n",
|
| 642 |
+
" \"character\": null,\n",
|
| 643 |
+
" \"severity\": null,\n",
|
| 644 |
+
" \"aggravating\": null,\n",
|
| 645 |
+
" \"relieving\": null,\n",
|
| 646 |
+
" \"ros\": {},\n",
|
| 647 |
+
" \"emergency\": false,\n",
|
| 648 |
+
" \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
|
| 649 |
+
"}\n",
|
| 650 |
+
"\n",
|
| 651 |
+
"── F: JSON parse ──\n",
|
| 652 |
+
"json.loads() OK\n",
|
| 653 |
+
"Missing keys : none\n",
|
| 654 |
+
"Extra keys : {'emergency'}\n",
|
| 655 |
+
"{\n",
|
| 656 |
+
" \"chief_complaint\": \"chest pain\",\n",
|
| 657 |
+
" \"onset\": null,\n",
|
| 658 |
+
" \"location\": null,\n",
|
| 659 |
+
" \"duration\": null,\n",
|
| 660 |
+
" \"character\": null,\n",
|
| 661 |
+
" \"severity\": null,\n",
|
| 662 |
+
" \"aggravating\": null,\n",
|
| 663 |
+
" \"relieving\": null,\n",
|
| 664 |
+
" \"ros\": {},\n",
|
| 665 |
+
" \"emergency\": false,\n",
|
| 666 |
+
" \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
|
| 667 |
+
"}\n",
|
| 668 |
+
"\n",
|
| 669 |
+
"── G: Full app pipeline ──\n",
|
| 670 |
+
"[Ollama] Starting inference for model 'llama3.1:8b'...\n",
|
| 671 |
+
"[Ollama] Inference completed in 5.23s total.\n",
|
| 672 |
+
"CombinedOutput:\n",
|
| 673 |
+
"{\n",
|
| 674 |
+
" \"chief_complaint\": \"chest pain\",\n",
|
| 675 |
+
" \"onset\": null,\n",
|
| 676 |
+
" \"location\": null,\n",
|
| 677 |
+
" \"duration\": null,\n",
|
| 678 |
+
" \"character\": null,\n",
|
| 679 |
+
" \"severity\": null,\n",
|
| 680 |
+
" \"aggravating\": null,\n",
|
| 681 |
+
" \"relieving\": null,\n",
|
| 682 |
+
" \"ros\": {},\n",
|
| 683 |
+
" \"emergency\": false,\n",
|
| 684 |
+
" \"reply\": \"Can you tell me more about when this chest pain started?\"\n",
|
| 685 |
+
"}\n",
|
| 686 |
+
"\n",
|
| 687 |
+
"Stage : hpi\n",
|
| 688 |
+
"Missing : ['HPI:onset', 'HPI:location', 'HPI:duration', 'HPI:character', 'HPI:severity', 'HPI:aggravating', 'HPI:relieving', 'ROS (3 more systems needed)']\n",
|
| 689 |
+
"Reply : 'Can you tell me more about when this chest pain started?'\n",
|
| 690 |
+
"\n",
|
| 691 |
+
"OK: Real LLM reply returned\n",
|
| 692 |
+
"[DONE] Cell 4\n"
|
| 693 |
+
]
|
| 694 |
+
}
|
| 695 |
+
]
|
| 696 |
+
},
|
| 697 |
+
{
|
| 698 |
+
"cell_type": "code",
|
| 699 |
+
"source": [
|
| 700 |
+
"# Auto-heal Ollama before each turn\n",
|
| 701 |
+
"import requests, subprocess, os, time\n",
|
| 702 |
+
"def ensure_ollama():\n",
|
| 703 |
+
" try:\n",
|
| 704 |
+
" requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
|
| 705 |
+
" except: pass\n",
|
| 706 |
+
" print(\"⚠️ Ollama dead — restarting...\")\n",
|
| 707 |
+
" env = os.environ.copy()\n",
|
| 708 |
+
" env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
|
| 709 |
+
" env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
|
| 710 |
+
" env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
|
| 711 |
+
" subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
|
| 712 |
+
" stdout=open(\"/content/ollama.log\",\"a\"),\n",
|
| 713 |
+
" stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
|
| 714 |
+
" for _ in range(30):\n",
|
| 715 |
+
" try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
|
| 716 |
+
" except: time.sleep(1)\n",
|
| 717 |
+
"\n",
|
| 718 |
+
"ensure_ollama()\n",
|
| 719 |
+
"\n",
|
| 720 |
+
"import subprocess, time, requests, os\n",
|
| 721 |
+
"\n",
|
| 722 |
+
"subprocess.run([\"pkill\", \"-f\", \"uvicorn\"], capture_output=True)\n",
|
| 723 |
+
"time.sleep(2)\n",
|
| 724 |
+
"\n",
|
| 725 |
+
"env = os.environ.copy()\n",
|
| 726 |
+
"env[\"MOCK_LLM\"] = \"false\"\n",
|
| 727 |
+
"env[\"MODEL_NAME\"] = \"llama3.1:8b\"\n",
|
| 728 |
+
"env[\"OLLAMA_HOST\"] = \"http://localhost:11434\"\n",
|
| 729 |
+
"\n",
|
| 730 |
+
"log = open(\"/content/api.log\", \"w\")\n",
|
| 731 |
+
"proc = subprocess.Popen(\n",
|
| 732 |
+
" [\"python\", \"-m\", \"uvicorn\", \"app.main:app\",\n",
|
| 733 |
+
" \"--host\", \"0.0.0.0\", \"--port\", \"7860\", \"--log-level\", \"info\"],\n",
|
| 734 |
+
" cwd=\"/content/medintake-ai\",\n",
|
| 735 |
+
" env=env, stdout=log, stderr=log,\n",
|
| 736 |
+
" preexec_fn=os.setpgrp\n",
|
| 737 |
+
")\n",
|
| 738 |
+
"print(f\"uvicorn PID: {proc.pid}\")\n",
|
| 739 |
+
"\n",
|
| 740 |
+
"for i in range(20):\n",
|
| 741 |
+
" try:\n",
|
| 742 |
+
" r = requests.get(\"http://localhost:7860/health\", timeout=2)\n",
|
| 743 |
+
" if r.status_code == 200:\n",
|
| 744 |
+
" d = r.json()\n",
|
| 745 |
+
" print(f\"✅ FastAPI ready after {i+1}s\")\n",
|
| 746 |
+
" print(f\" mock_mode = {d.get('mock_mode')} ← must be False\")\n",
|
| 747 |
+
" break\n",
|
| 748 |
+
" except: pass\n",
|
| 749 |
+
" print(f\" ...{i+1}s\")\n",
|
| 750 |
+
" time.sleep(1)\n",
|
| 751 |
+
"else:\n",
|
| 752 |
+
" print(\"❌ Failed — dumping api.log:\")\n",
|
| 753 |
+
" print(open(\"/content/api.log\").read()[-2000:])"
|
| 754 |
+
],
|
| 755 |
+
"metadata": {
|
| 756 |
+
"colab": {
|
| 757 |
+
"base_uri": "https://localhost:8080/"
|
| 758 |
+
},
|
| 759 |
+
"id": "qhgapTbx0qRi",
|
| 760 |
+
"outputId": "886d3a8c-928e-4051-926a-878534769de1"
|
| 761 |
+
},
|
| 762 |
+
"execution_count": null,
|
| 763 |
+
"outputs": [
|
| 764 |
+
{
|
| 765 |
+
"output_type": "stream",
|
| 766 |
+
"name": "stdout",
|
| 767 |
+
"text": [
|
| 768 |
+
"uvicorn PID: 19612\n",
|
| 769 |
+
" ...1s\n",
|
| 770 |
+
" ...2s\n",
|
| 771 |
+
"✅ FastAPI ready after 3s\n",
|
| 772 |
+
" mock_mode = False ← must be False\n"
|
| 773 |
+
]
|
| 774 |
+
}
|
| 775 |
+
]
|
| 776 |
+
},
|
| 777 |
+
{
|
| 778 |
+
"cell_type": "code",
|
| 779 |
+
"source": [
|
| 780 |
+
"# Auto-heal Ollama before each turn\n",
|
| 781 |
+
"import requests, subprocess, os, time\n",
|
| 782 |
+
"def ensure_ollama():\n",
|
| 783 |
+
" try:\n",
|
| 784 |
+
" requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
|
| 785 |
+
" except: pass\n",
|
| 786 |
+
" print(\"⚠️ Ollama dead — restarting...\")\n",
|
| 787 |
+
" env = os.environ.copy()\n",
|
| 788 |
+
" env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
|
| 789 |
+
" env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
|
| 790 |
+
" env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
|
| 791 |
+
" subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
|
| 792 |
+
" stdout=open(\"/content/ollama.log\",\"a\"),\n",
|
| 793 |
+
" stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
|
| 794 |
+
" for _ in range(30):\n",
|
| 795 |
+
" try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
|
| 796 |
+
" except: time.sleep(1)\n",
|
| 797 |
+
"\n",
|
| 798 |
+
"ensure_ollama()\n",
|
| 799 |
+
"\n",
|
| 800 |
+
"import subprocess, time, requests\n",
|
| 801 |
+
"\n",
|
| 802 |
+
"PUBLIC_IP = requests.get(\"https://ipv4.icanhazip.com\", timeout=5).text.strip()\n",
|
| 803 |
+
"print(f\"Tunnel password: {PUBLIC_IP}\")\n",
|
| 804 |
+
"print(\"Starting tunnel...\")\n",
|
| 805 |
+
"\n",
|
| 806 |
+
"# Start lt as background process, capture output to file\n",
|
| 807 |
+
"tunnel_log = open(\"/content/tunnel.log\", \"w\")\n",
|
| 808 |
+
"proc = subprocess.Popen(\n",
|
| 809 |
+
" [\"lt\", \"--port\", \"7860\"],\n",
|
| 810 |
+
" stdout=tunnel_log, stderr=tunnel_log,\n",
|
| 811 |
+
" preexec_fn=__import__(\"os\").setpgrp\n",
|
| 812 |
+
")\n",
|
| 813 |
+
"print(f\"Tunnel PID: {proc.pid}\")\n",
|
| 814 |
+
"\n",
|
| 815 |
+
"# Wait for URL to appear in log\n",
|
| 816 |
+
"for i in range(15):\n",
|
| 817 |
+
" time.sleep(1)\n",
|
| 818 |
+
" try:\n",
|
| 819 |
+
" txt = open(\"/content/tunnel.log\").read()\n",
|
| 820 |
+
" if \"loca.lt\" in txt or \"https://\" in txt:\n",
|
| 821 |
+
" for line in txt.splitlines():\n",
|
| 822 |
+
" if \"https://\" in line:\n",
|
| 823 |
+
" print(f\"\\n🌐 PUBLIC URL: {line.strip()}\")\n",
|
| 824 |
+
" break\n",
|
| 825 |
+
" except: pass\n",
|
| 826 |
+
" print(f\" ...waiting for URL {i+1}s\")\n",
|
| 827 |
+
"else:\n",
|
| 828 |
+
" print(\"⚠️ URL not found yet — run: !cat /content/tunnel.log\")\n",
|
| 829 |
+
"\n",
|
| 830 |
+
"print(\"\\n✅ Cell 5B done — proceed to Cell 6\")"
|
| 831 |
+
],
|
| 832 |
+
"metadata": {
|
| 833 |
+
"colab": {
|
| 834 |
+
"base_uri": "https://localhost:8080/"
|
| 835 |
+
},
|
| 836 |
+
"id": "ub55lE6d3LMA",
|
| 837 |
+
"outputId": "9aac6f5a-e022-493b-868c-c5fd5e425297"
|
| 838 |
+
},
|
| 839 |
+
"execution_count": null,
|
| 840 |
+
"outputs": [
|
| 841 |
+
{
|
| 842 |
+
"output_type": "stream",
|
| 843 |
+
"name": "stdout",
|
| 844 |
+
"text": [
|
| 845 |
+
"Tunnel password: 34.87.70.249\n",
|
| 846 |
+
"Starting tunnel...\n",
|
| 847 |
+
"Tunnel PID: 19630\n",
|
| 848 |
+
" ...waiting for URL 1s\n",
|
| 849 |
+
"\n",
|
| 850 |
+
"🌐 PUBLIC URL: your url is: https://proud-bears-drum.loca.lt\n",
|
| 851 |
+
"\n",
|
| 852 |
+
"✅ Cell 5B done — proceed to Cell 6\n"
|
| 853 |
+
]
|
| 854 |
+
}
|
| 855 |
+
]
|
| 856 |
+
},
|
| 857 |
+
{
|
| 858 |
+
"cell_type": "code",
|
| 859 |
+
"source": [
|
| 860 |
+
"\n",
|
| 861 |
+
"# Auto-heal Ollama before each turn\n",
|
| 862 |
+
"import requests, subprocess, os, time\n",
|
| 863 |
+
"def ensure_ollama():\n",
|
| 864 |
+
" try:\n",
|
| 865 |
+
" requests.get(\"http://localhost:11434/api/tags\", timeout=2); return\n",
|
| 866 |
+
" except: pass\n",
|
| 867 |
+
" print(\"⚠️ Ollama dead — restarting...\")\n",
|
| 868 |
+
" env = os.environ.copy()\n",
|
| 869 |
+
" env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
|
| 870 |
+
" env[\"LD_LIBRARY_PATH\"] = \"/usr/lib64-nvidia:/usr/local/lib/ollama:/usr/local/cuda/lib64\"\n",
|
| 871 |
+
" env[\"OLLAMA_KEEP_ALIVE\"] = \"2h\"\n",
|
| 872 |
+
" subprocess.Popen([\"ollama\",\"serve\"], env=env,\n",
|
| 873 |
+
" stdout=open(\"/content/ollama.log\",\"a\"),\n",
|
| 874 |
+
" stderr=subprocess.STDOUT, preexec_fn=os.setpgrp)\n",
|
| 875 |
+
" for _ in range(30):\n",
|
| 876 |
+
" try: requests.get(\"http://localhost:11434/api/tags\",timeout=2); print(\"✅ Ollama back\"); return\n",
|
| 877 |
+
" except: time.sleep(1)\n",
|
| 878 |
+
"\n",
|
| 879 |
+
"ensure_ollama()\n",
|
| 880 |
+
"\n",
|
| 881 |
+
"import requests, json, time, subprocess\n",
|
| 882 |
+
"\n",
|
| 883 |
+
"SESSION_ID = \"debug_session_001\" # keep same across all turns\n",
|
| 884 |
+
"USER_MSG = \"I have chest pain\" # ← change each turn\n",
|
| 885 |
+
"\n",
|
| 886 |
+
"print(f\"Session: {SESSION_ID} | Message: {USER_MSG}\")\n",
|
| 887 |
+
"\n",
|
| 888 |
+
"t0 = time.time()\n",
|
| 889 |
+
"r = requests.post(\"http://localhost:7860/chat\",\n",
|
| 890 |
+
" json={\"session_id\": SESSION_ID, \"message\": USER_MSG},\n",
|
| 891 |
+
" timeout=120)\n",
|
| 892 |
+
"elapsed = time.time() - t0\n",
|
| 893 |
+
"\n",
|
| 894 |
+
"d = r.json()\n",
|
| 895 |
+
"print(f\"\\nHTTP {r.status_code} ({elapsed:.1f}s)\")\n",
|
| 896 |
+
"print(json.dumps(d, indent=2))\n",
|
| 897 |
+
"print(f\"\\nStage : {d.get('state')}\")\n",
|
| 898 |
+
"print(f\"Reply : {d.get('reply')}\")\n",
|
| 899 |
+
"\n",
|
| 900 |
+
"if d.get(\"brief\"):\n",
|
| 901 |
+
" print(\"\\n📋 CLINICAL BRIEF:\")\n",
|
| 902 |
+
" print(json.dumps(d[\"brief\"], indent=2))\n",
|
| 903 |
+
"\n",
|
| 904 |
+
"FALLBACK = {\"Could you tell me more?\", \"\", None, \"Could you please repeat that?\"}\n",
|
| 905 |
+
"if d.get(\"reply\") in FALLBACK:\n",
|
| 906 |
+
" print(\"\\n⚠️ FALLBACK reply — dumping api.log:\")\n",
|
| 907 |
+
" print(open(\"/content/api.log\").read()[-2000:])\n",
|
| 908 |
+
"\n",
|
| 909 |
+
"# Quick GPU check\n",
|
| 910 |
+
"smi = subprocess.run([\"nvidia-smi\",\"--query-gpu=memory.used,memory.total\",\n",
|
| 911 |
+
" \"--format=csv,noheader\"], capture_output=True, text=True)\n",
|
| 912 |
+
"ps = subprocess.run([\"ollama\",\"ps\"], capture_output=True, text=True)\n",
|
| 913 |
+
"print(f\"\\nGPU RAM : {smi.stdout.strip()}\")\n",
|
| 914 |
+
"print(f\"ollama ps: {ps.stdout.strip()}\")\n",
|
| 915 |
+
"print(subprocess.run([\"tail\",\"-n\",\"15\",\"/content/api.log\"],\n",
|
| 916 |
+
" capture_output=True, text=True).stdout)"
|
| 917 |
+
],
|
| 918 |
+
"metadata": {
|
| 919 |
+
"colab": {
|
| 920 |
+
"base_uri": "https://localhost:8080/"
|
| 921 |
+
},
|
| 922 |
+
"id": "4kokp0w50rQQ",
|
| 923 |
+
"outputId": "a0de78f6-fe97-487b-ed0d-f4ebaa4ed770"
|
| 924 |
+
},
|
| 925 |
+
"execution_count": null,
|
| 926 |
+
"outputs": [
|
| 927 |
+
{
|
| 928 |
+
"output_type": "stream",
|
| 929 |
+
"name": "stdout",
|
| 930 |
+
"text": [
|
| 931 |
+
"Session: debug_session_001 | Message: I have chest pain\n",
|
| 932 |
+
"\n",
|
| 933 |
+
"HTTP 200 (6.2s)\n",
|
| 934 |
+
"{\n",
|
| 935 |
+
" \"reply\": \"Can you tell me more about when this chest pain started?\",\n",
|
| 936 |
+
" \"state\": \"hpi\",\n",
|
| 937 |
+
" \"brief\": null\n",
|
| 938 |
+
"}\n",
|
| 939 |
+
"\n",
|
| 940 |
+
"Stage : hpi\n",
|
| 941 |
+
"Reply : Can you tell me more about when this chest pain started?\n",
|
| 942 |
+
"\n",
|
| 943 |
+
"GPU RAM : 5369 MiB, 15360 MiB\n",
|
| 944 |
+
"ollama ps: NAME ID SIZE PROCESSOR CONTEXT UNTIL \n",
|
| 945 |
+
"llama3.1:8b 46e0c10c039e 5.5 GB 100% GPU 4096 2 hours from now\n",
|
| 946 |
+
"INFO: Waiting for application startup.\n",
|
| 947 |
+
"INFO: Application startup complete.\n",
|
| 948 |
+
"INFO: Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)\n",
|
| 949 |
+
"INFO: 127.0.0.1:49798 - \"GET /health HTTP/1.1\" 200 OK\n",
|
| 950 |
+
"\n",
|
| 951 |
+
"[1777158113.511] [API] -> POST /chat received for debug_session_001\n",
|
| 952 |
+
"[1777158113.512] [API] Read existing state snapshot.\n",
|
| 953 |
+
"[1777158113.512] [API] Starting new graph invoke...\n",
|
| 954 |
+
"[1777158113.521] [Graph Node] Requesting LLM inference...\n",
|
| 955 |
+
"[Ollama] Starting inference for model 'llama3.1:8b'...\n",
|
| 956 |
+
"[Ollama] Inference completed in 6.17s total.\n",
|
| 957 |
+
"[1777158119.696] [Graph Node] LLM returned. Preparing node dictionaries...\n",
|
| 958 |
+
"[1777158119.697] [API] <- Graph invoke returned in 6.19s\n",
|
| 959 |
+
"[1777158119.698] [API] Chat completed in 6.19s total. Reply length: 56\n",
|
| 960 |
+
"INFO: 127.0.0.1:49810 - \"POST /chat HTTP/1.1\" 200 OK\n",
|
| 961 |
+
"\n"
|
| 962 |
+
]
|
| 963 |
+
}
|
| 964 |
+
]
|
| 965 |
+
},
|
| 966 |
+
{
|
| 967 |
+
"cell_type": "code",
|
| 968 |
+
"source": [
|
| 969 |
+
"import subprocess\n",
|
| 970 |
+
"for log in [\"/content/api.log\", \"/content/ollama.log\"]:\n",
|
| 971 |
+
" print(f\"\\n{'='*55}\\n {log}\\n{'='*55}\")\n",
|
| 972 |
+
" print(subprocess.run([\"tail\",\"-n\",\"40\",log], capture_output=True, text=True).stdout or \"(empty)\")\n"
|
| 973 |
+
],
|
| 974 |
+
"metadata": {
|
| 975 |
+
"colab": {
|
| 976 |
+
"base_uri": "https://localhost:8080/"
|
| 977 |
+
},
|
| 978 |
+
"id": "1WZOi3Hn0sff",
|
| 979 |
+
"outputId": "6edc93ea-2f1b-4fb8-c631-bb84032fe1e8"
|
| 980 |
+
},
|
| 981 |
+
"execution_count": null,
|
| 982 |
+
"outputs": [
|
| 983 |
+
{
|
| 984 |
+
"output_type": "stream",
|
| 985 |
+
"name": "stdout",
|
| 986 |
+
"text": [
|
| 987 |
+
"\n",
|
| 988 |
+
"=======================================================\n",
|
| 989 |
+
" /content/api.log\n",
|
| 990 |
+
"=======================================================\n",
|
| 991 |
+
"INFO: Started server process [19612]\n",
|
| 992 |
+
"INFO: Waiting for application startup.\n",
|
| 993 |
+
"INFO: Application startup complete.\n",
|
| 994 |
+
"INFO: Uvicorn running on http://0.0.0.0:7860 (Press CTRL+C to quit)\n",
|
| 995 |
+
"INFO: 127.0.0.1:49798 - \"GET /health HTTP/1.1\" 200 OK\n",
|
| 996 |
+
"\n",
|
| 997 |
+
"[1777158113.511] [API] -> POST /chat received for debug_session_001\n",
|
| 998 |
+
"[1777158113.512] [API] Read existing state snapshot.\n",
|
| 999 |
+
"[1777158113.512] [API] Starting new graph invoke...\n",
|
| 1000 |
+
"[1777158113.521] [Graph Node] Requesting LLM inference...\n",
|
| 1001 |
+
"[Ollama] Starting inference for model 'llama3.1:8b'...\n",
|
| 1002 |
+
"[Ollama] Inference completed in 6.17s total.\n",
|
| 1003 |
+
"[1777158119.696] [Graph Node] LLM returned. Preparing node dictionaries...\n",
|
| 1004 |
+
"[1777158119.697] [API] <- Graph invoke returned in 6.19s\n",
|
| 1005 |
+
"[1777158119.698] [API] Chat completed in 6.19s total. Reply length: 56\n",
|
| 1006 |
+
"INFO: 127.0.0.1:49810 - \"POST /chat HTTP/1.1\" 200 OK\n",
|
| 1007 |
+
"\n",
|
| 1008 |
+
"\n",
|
| 1009 |
+
"=======================================================\n",
|
| 1010 |
+
" /content/ollama.log\n",
|
| 1011 |
+
"=======================================================\n",
|
| 1012 |
+
"load_tensors: CPU_Mapped model buffer size = 281.81 MiB\n",
|
| 1013 |
+
"load_tensors: CUDA0 model buffer size = 4403.49 MiB\n",
|
| 1014 |
+
"llama_context: constructing llama_context\n",
|
| 1015 |
+
"llama_context: n_seq_max = 1\n",
|
| 1016 |
+
"llama_context: n_ctx = 4096\n",
|
| 1017 |
+
"llama_context: n_ctx_seq = 4096\n",
|
| 1018 |
+
"llama_context: n_batch = 512\n",
|
| 1019 |
+
"llama_context: n_ubatch = 512\n",
|
| 1020 |
+
"llama_context: causal_attn = 1\n",
|
| 1021 |
+
"llama_context: flash_attn = auto\n",
|
| 1022 |
+
"llama_context: kv_unified = false\n",
|
| 1023 |
+
"llama_context: freq_base = 500000.0\n",
|
| 1024 |
+
"llama_context: freq_scale = 1\n",
|
| 1025 |
+
"llama_context: n_ctx_seq (4096) < n_ctx_train (131072) -- the full capacity of the model will not be utilized\n",
|
| 1026 |
+
"llama_context: CUDA_Host output buffer size = 0.50 MiB\n",
|
| 1027 |
+
"llama_kv_cache: CUDA0 KV buffer size = 512.00 MiB\n",
|
| 1028 |
+
"llama_kv_cache: size = 512.00 MiB ( 4096 cells, 32 layers, 1/1 seqs), K (f16): 256.00 MiB, V (f16): 256.00 MiB\n",
|
| 1029 |
+
"llama_context: Flash Attention was auto, set to enabled\n",
|
| 1030 |
+
"llama_context: CUDA0 compute buffer size = 258.50 MiB\n",
|
| 1031 |
+
"llama_context: CUDA_Host compute buffer size = 16.01 MiB\n",
|
| 1032 |
+
"llama_context: graph nodes = 999\n",
|
| 1033 |
+
"llama_context: graph splits = 2\n",
|
| 1034 |
+
"time=2026-04-25T23:01:03.205Z level=INFO source=server.go:1402 msg=\"llama runner started in 2.68 seconds\"\n",
|
| 1035 |
+
"time=2026-04-25T23:01:03.205Z level=INFO source=sched.go:561 msg=\"loaded runners\" count=1\n",
|
| 1036 |
+
"time=2026-04-25T23:01:03.205Z level=INFO source=server.go:1364 msg=\"waiting for llama runner to start responding\"\n",
|
| 1037 |
+
"time=2026-04-25T23:01:03.206Z level=INFO source=server.go:1402 msg=\"llama runner started in 2.68 seconds\"\n",
|
| 1038 |
+
"[GIN] 2026/04/25 - 23:01:09 | 200 | 9.705078074s | 127.0.0.1 | POST \"/api/chat\"\n",
|
| 1039 |
+
"[GIN] 2026/04/25 - 23:01:09 | 200 | 59.798µs | 127.0.0.1 | HEAD \"/\"\n",
|
| 1040 |
+
"[GIN] 2026/04/25 - 23:01:09 | 200 | 129.278µs | 127.0.0.1 | GET \"/api/ps\"\n",
|
| 1041 |
+
"[GIN] 2026/04/25 - 23:01:36 | 200 | 354.357µs | 127.0.0.1 | GET \"/api/tags\"\n",
|
| 1042 |
+
"[GIN] 2026/04/25 - 23:01:36 | 200 | 33.021µs | 127.0.0.1 | HEAD \"/\"\n",
|
| 1043 |
+
"[GIN] 2026/04/25 - 23:01:36 | 200 | 38.107µs | 127.0.0.1 | GET \"/api/ps\"\n",
|
| 1044 |
+
"[GIN] 2026/04/25 - 23:01:42 | 200 | 5.606222729s | 127.0.0.1 | POST \"/api/chat\"\n",
|
| 1045 |
+
"[GIN] 2026/04/25 - 23:01:47 | 200 | 5.227727634s | 127.0.0.1 | POST \"/api/chat\"\n",
|
| 1046 |
+
"[GIN] 2026/04/25 - 23:01:47 | 200 | 510.297µs | 127.0.0.1 | GET \"/api/tags\"\n",
|
| 1047 |
+
"[GIN] 2026/04/25 - 23:01:51 | 200 | 622.673µs | 127.0.0.1 | GET \"/api/tags\"\n",
|
| 1048 |
+
"[GIN] 2026/04/25 - 23:01:53 | 200 | 655.176µs | 127.0.0.1 | GET \"/api/tags\"\n",
|
| 1049 |
+
"[GIN] 2026/04/25 - 23:01:59 | 200 | 6.17069071s | 127.0.0.1 | POST \"/api/chat\"\n",
|
| 1050 |
+
"[GIN] 2026/04/25 - 23:01:59 | 200 | 35.997µs | 127.0.0.1 | HEAD \"/\"\n",
|
| 1051 |
+
"[GIN] 2026/04/25 - 23:01:59 | 200 | 38.825µs | 127.0.0.1 | GET \"/api/ps\"\n",
|
| 1052 |
+
"\n"
|
| 1053 |
+
]
|
| 1054 |
+
}
|
| 1055 |
+
]
|
| 1056 |
+
},
|
| 1057 |
+
{
|
| 1058 |
+
"cell_type": "code",
|
| 1059 |
+
"source": [
|
| 1060 |
+
"from google.colab import drive\n",
|
| 1061 |
+
"drive.mount('/content/drive')"
|
| 1062 |
+
],
|
| 1063 |
+
"metadata": {
|
| 1064 |
+
"colab": {
|
| 1065 |
+
"base_uri": "https://localhost:8080/",
|
| 1066 |
+
"height": 356
|
| 1067 |
+
},
|
| 1068 |
+
"id": "xxtmukiS11_T",
|
| 1069 |
+
"outputId": "016c6964-4288-4647-ae12-b28b067c2552"
|
| 1070 |
+
},
|
| 1071 |
+
"execution_count": null,
|
| 1072 |
+
"outputs": [
|
| 1073 |
+
{
|
| 1074 |
+
"output_type": "error",
|
| 1075 |
+
"ename": "MessageError",
|
| 1076 |
+
"evalue": "Error: credential propagation was unsuccessful",
|
| 1077 |
+
"traceback": [
|
| 1078 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
| 1079 |
+
"\u001b[0;31mMessageError\u001b[0m Traceback (most recent call last)",
|
| 1080 |
+
"\u001b[0;32m/tmp/ipykernel_14360/1408506528.py\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mgoogle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcolab\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mdrive\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdrive\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/content/drive'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
| 1081 |
+
"\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36mmount\u001b[0;34m(mountpoint, force_remount, timeout_ms, readonly)\u001b[0m\n\u001b[1;32m 95\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mmount\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m120000\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreadonly\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 96\u001b[0m \u001b[0;34m\"\"\"Mount your Google Drive at the specified mountpoint path.\"\"\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 97\u001b[0;31m return _mount(\n\u001b[0m\u001b[1;32m 98\u001b[0m \u001b[0mmountpoint\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 99\u001b[0m \u001b[0mforce_remount\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mforce_remount\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
| 1082 |
+
"\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/drive.py\u001b[0m in \u001b[0;36m_mount\u001b[0;34m(mountpoint, force_remount, timeout_ms, ephemeral, readonly)\u001b[0m\n\u001b[1;32m 132\u001b[0m )\n\u001b[1;32m 133\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mephemeral\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m _message.blocking_request(\n\u001b[0m\u001b[1;32m 135\u001b[0m \u001b[0;34m'request_auth'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'authType'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'dfs_ephemeral'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
|
| 1083 |
+
"\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mblocking_request\u001b[0;34m(request_type, request, timeout_sec, parent)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0mrequest_type\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparent\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mparent\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mexpect_reply\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 175\u001b[0m )\n\u001b[0;32m--> 176\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mread_reply_from_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mrequest_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtimeout_sec\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
|
| 1084 |
+
"\u001b[0;32m/usr/local/lib/python3.12/dist-packages/google/colab/_message.py\u001b[0m in \u001b[0;36mread_reply_from_input\u001b[0;34m(message_id, timeout_sec)\u001b[0m\n\u001b[1;32m 101\u001b[0m ):\n\u001b[1;32m 102\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m'error'\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mreply\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 103\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mMessageError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreply\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'error'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 104\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mreply\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'data'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 105\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
|
| 1085 |
+
"\u001b[0;31mMessageError\u001b[0m: Error: credential propagation was unsuccessful"
|
| 1086 |
+
]
|
| 1087 |
+
}
|
| 1088 |
+
]
|
| 1089 |
+
},
|
| 1090 |
+
{
|
| 1091 |
+
"cell_type": "code",
|
| 1092 |
+
"source": [
|
| 1093 |
+
"import subprocess, os, time\n",
|
| 1094 |
+
"\n",
|
| 1095 |
+
"subprocess.run([\"pkill\",\"-f\",\"ollama\"])\n",
|
| 1096 |
+
"time.sleep(3)\n",
|
| 1097 |
+
"\n",
|
| 1098 |
+
"env = os.environ.copy()\n",
|
| 1099 |
+
"env.pop(\"CUDA_VISIBLE_DEVICES\", None)\n",
|
| 1100 |
+
"\n",
|
| 1101 |
+
"subprocess.Popen([\"ollama\",\"serve\"], env=env)\n",
|
| 1102 |
+
"print(\"Restarted Ollama\")"
|
| 1103 |
+
],
|
| 1104 |
+
"metadata": {
|
| 1105 |
+
"colab": {
|
| 1106 |
+
"base_uri": "https://localhost:8080/"
|
| 1107 |
+
},
|
| 1108 |
+
"id": "SKcQv5Ng0thH",
|
| 1109 |
+
"outputId": "e3b95518-e432-4669-8fd0-50b86fdb216f"
|
| 1110 |
+
},
|
| 1111 |
+
"execution_count": null,
|
| 1112 |
+
"outputs": [
|
| 1113 |
+
{
|
| 1114 |
+
"output_type": "stream",
|
| 1115 |
+
"name": "stdout",
|
| 1116 |
+
"text": [
|
| 1117 |
+
"Restarted Ollama\n"
|
| 1118 |
+
]
|
| 1119 |
+
}
|
| 1120 |
+
]
|
| 1121 |
+
},
|
| 1122 |
+
{
|
| 1123 |
+
"cell_type": "code",
|
| 1124 |
+
"source": [],
|
| 1125 |
+
"metadata": {
|
| 1126 |
+
"id": "T51mIyDV1wiD"
|
| 1127 |
+
},
|
| 1128 |
+
"execution_count": null,
|
| 1129 |
+
"outputs": []
|
| 1130 |
+
}
|
| 1131 |
+
]
|
| 1132 |
+
}
|