Spaces:
Sleeping
Sleeping
Commit ·
c32bf13
0
Parent(s):
all done
Browse files- .env.example +13 -0
- .gitignore +12 -0
- DEMO_SCRIPT.md +238 -0
- DEVLOG.md +256 -0
- INTRO_VIDEO_SCRIPT.md +239 -0
- README.md +73 -0
- SUBMISSION_WRITEUP.md +139 -0
- agents/__init__.py +0 -0
- agents/cloud_agents.py +274 -0
- agents/symptom_agent.py +112 -0
- app.py +490 -0
- database/__init__.py +0 -0
- database/db.py +150 -0
- rag/__init__.py +0 -0
- rag/data/essential_medicines.json +416 -0
- rag/data/icd10_common.json +103 -0
- rag/retriever.py +149 -0
- requirements.txt +11 -0
- transcription/__init__.py +0 -0
- transcription/transcriber.py +125 -0
.env.example
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Google AI Studio API key for cloud Gemma 4
|
| 2 |
+
# Get yours at https://aistudio.google.com
|
| 3 |
+
GEMINI_API_KEY=your_api_key_here
|
| 4 |
+
|
| 5 |
+
# Whisper model size: tiny, base, small, medium, large-v3
|
| 6 |
+
WHISPER_MODEL=base
|
| 7 |
+
|
| 8 |
+
# Ollama local model for symptom extraction
|
| 9 |
+
OLLAMA_MODEL=gemma4:e2b
|
| 10 |
+
|
| 11 |
+
# Cloud Gemma model for SOAP notes, summary, translation
|
| 12 |
+
# Available: gemma-4-26b-a4b-it (faster MoE) or gemma-4-31b-it (most capable)
|
| 13 |
+
CLOUD_MODEL=gemma-4-26b-a4b-it
|
.gitignore
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.pyc
|
| 4 |
+
*.pyo
|
| 5 |
+
*.db
|
| 6 |
+
*.sqlite
|
| 7 |
+
.DS_Store
|
| 8 |
+
venv/
|
| 9 |
+
.venv/
|
| 10 |
+
dist/
|
| 11 |
+
*.egg-info/
|
| 12 |
+
chroma_db/
|
DEMO_SCRIPT.md
ADDED
|
@@ -0,0 +1,238 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hospital Copilot — Demo Video Script
|
| 2 |
+
## "The Last Patient"
|
| 3 |
+
|
| 4 |
+
**Hackathon:** Gemma 4 for Good
|
| 5 |
+
**Video target runtime:** 3 minutes 30 seconds
|
| 6 |
+
**Tone:** Emotional, human, hopeful
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## Logline
|
| 11 |
+
|
| 12 |
+
> *"In Ghana, one doctor serves an average of 10,000 patients.
|
| 13 |
+
> After seeing 40 patients, the last one should not get less care than the first."*
|
| 14 |
+
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
## Characters
|
| 18 |
+
|
| 19 |
+
| Role | Description |
|
| 20 |
+
|---|---|
|
| 21 |
+
| **Dr. Kwame Mensah** | Young district hospital doctor, mid-30s, visibly exhausted at end of shift |
|
| 22 |
+
| **Maame Akosua** | Elderly woman, 68, from a rural village, brought in by her grandson |
|
| 23 |
+
| **Kofi** | Grandson, 20s, worried, occasionally translates for his grandmother |
|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## Setting
|
| 28 |
+
|
| 29 |
+
**Location:** Small district hospital consultation room
|
| 30 |
+
**Time of day:** Late afternoon — warm golden light, end of a long day
|
| 31 |
+
**Props needed:**
|
| 32 |
+
- Desk with laptop showing Hospital Copilot
|
| 33 |
+
- Stethoscope, BP cuff
|
| 34 |
+
- Stack of paper files (the visual symbol of burnout)
|
| 35 |
+
- Small fan, pen holder, basic clinic decor
|
| 36 |
+
- Waiting room shot (even 5 people reads as "busy" on camera)
|
| 37 |
+
|
| 38 |
+
---
|
| 39 |
+
|
| 40 |
+
## Scene-by-Scene Breakdown
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
### SCENE 1 — The Weight
|
| 45 |
+
**Duration:** 30 seconds | **No dialogue**
|
| 46 |
+
|
| 47 |
+
> Camera opens on a wall clock: **4:47 PM**
|
| 48 |
+
>
|
| 49 |
+
> Dr. Mensah is hunched over his desk, hand-writing notes from a thick stack of paper files.
|
| 50 |
+
> His eyes are tired. His hand moves slowly.
|
| 51 |
+
>
|
| 52 |
+
> A nurse opens the door:
|
| 53 |
+
> **Nurse:** *"Doctor, one more patient."*
|
| 54 |
+
>
|
| 55 |
+
> He pauses. Looks at the unfinished stack. Then nods.
|
| 56 |
+
> **Dr. Mensah:** *"Send them in."*
|
| 57 |
+
|
| 58 |
+
**Director note:** Linger on the paper stack. That stack = the problem Hospital Copilot solves.
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
### SCENE 2 — The Patient Arrives
|
| 63 |
+
**Duration:** 20 seconds
|
| 64 |
+
|
| 65 |
+
> Maame Akosua shuffles in slowly, supported by Kofi.
|
| 66 |
+
> She looks unwell. A little frightened. Out of place.
|
| 67 |
+
>
|
| 68 |
+
> Dr. Mensah stands to greet her warmly — despite his exhaustion.
|
| 69 |
+
> He quietly opens Hospital Copilot on his laptop.
|
| 70 |
+
> He clicks **▶ Start Consultation.**
|
| 71 |
+
>
|
| 72 |
+
> The mic activates. He turns his full attention to her.
|
| 73 |
+
> The laptop screen is visible in the background — words beginning to appear.
|
| 74 |
+
|
| 75 |
+
**Director note:** The app should be visible but not the focus. The focus is the human connection.
|
| 76 |
+
|
| 77 |
+
---
|
| 78 |
+
|
| 79 |
+
### SCENE 3 — The Consultation
|
| 80 |
+
**Duration:** 90 seconds
|
| 81 |
+
|
| 82 |
+
> Dr. Mensah and Maame Akosua speak. Kofi occasionally translates.
|
| 83 |
+
> The conversation flows naturally — unhurried, warm.
|
| 84 |
+
|
| 85 |
+
**CONSULTATION SCRIPT:**
|
| 86 |
+
|
| 87 |
+
> **Doctor:** Good afternoon Maame. How have you been since your last visit?
|
| 88 |
+
>
|
| 89 |
+
> **Patient (Kofi translating):** She says not too well. She has been having headaches every morning for two weeks. Especially at the back of her head.
|
| 90 |
+
>
|
| 91 |
+
> **Doctor:** Has she been taking her blood pressure medication?
|
| 92 |
+
>
|
| 93 |
+
> **Kofi:** She finished her Amlodipine two weeks ago. She could not afford to buy more.
|
| 94 |
+
>
|
| 95 |
+
> **Doctor:** I understand. Let me check her pressure now.
|
| 96 |
+
> *(places BP cuff, reads monitor)*
|
| 97 |
+
> It is 162 over 98 — that is quite high. The headaches are from the blood pressure.
|
| 98 |
+
> Any dizziness? Blurred vision?
|
| 99 |
+
>
|
| 100 |
+
> **Kofi:** Sometimes dizzy when she stands up. No problem with vision.
|
| 101 |
+
>
|
| 102 |
+
> **Doctor:** Any chest pain or shortness of breath?
|
| 103 |
+
>
|
| 104 |
+
> **Kofi:** No.
|
| 105 |
+
>
|
| 106 |
+
> **Doctor:** Good. She needs to restart her Amlodipine — five milligrams every morning.
|
| 107 |
+
> I am also adding Lisinopril, ten milligrams once a day.
|
| 108 |
+
> She must reduce salt, avoid alcohol, walk thirty minutes daily.
|
| 109 |
+
> Come back in two weeks. If she gets severe headache or chest pain — come immediately.
|
| 110 |
+
>
|
| 111 |
+
> **Kofi (to grandmother in Twi):** *...translates...*
|
| 112 |
+
>
|
| 113 |
+
> **Maame Akosua (nodding slowly):** *"Yoo, medaase."* (Okay, thank you.)
|
| 114 |
+
|
| 115 |
+
**Camera cuts during this scene:**
|
| 116 |
+
- Close-up on Maame Akosua's face — worried but trusting
|
| 117 |
+
- Laptop screen in background: words streaming into transcript in real time
|
| 118 |
+
- Dr. Mensah's hands on the BP cuff — competent, caring
|
| 119 |
+
- The paper stack on the desk — untouched, waiting
|
| 120 |
+
|
| 121 |
+
---
|
| 122 |
+
|
| 123 |
+
### SCENE 4 — The Wow Moment
|
| 124 |
+
**Duration:** 60 seconds
|
| 125 |
+
|
| 126 |
+
> Consultation ends. Dr. Mensah clicks **⏹ End Consultation.**
|
| 127 |
+
>
|
| 128 |
+
> He looks at Maame Akosua kindly while the screen processes.
|
| 129 |
+
> Then clicks **⚡ Generate Notes.**
|
| 130 |
+
>
|
| 131 |
+
> **Camera slowly pushes in on the laptop screen:**
|
| 132 |
+
>
|
| 133 |
+
> — Transcript appears with speaker labels:
|
| 134 |
+
> *"Doctor: Her pressure is 162 over 98..."*
|
| 135 |
+
> *"Patient: Sometimes dizzy when she stands up..."*
|
| 136 |
+
>
|
| 137 |
+
> — ICD-10 panel populates:
|
| 138 |
+
> *"I10 — Essential (primary) hypertension *(confidence: 0.94)*"*
|
| 139 |
+
>
|
| 140 |
+
> — Drug reference appears:
|
| 141 |
+
> *"Amlodipine (Calcium Channel Blocker) — 5mg once daily..."*
|
| 142 |
+
> *"Lisinopril (ACE Inhibitor) — 10mg once daily..."*
|
| 143 |
+
>
|
| 144 |
+
> — SOAP note renders, fully formatted with bold section headers
|
| 145 |
+
>
|
| 146 |
+
> — Patient Summary appears in plain English
|
| 147 |
+
>
|
| 148 |
+
> **Dr. Mensah reads the patient summary aloud, slowly, to Kofi:**
|
| 149 |
+
> *"Your grandmother has high blood pressure. She needs to take one tablet every morning.
|
| 150 |
+
> Reduce salt in her food. Walk a little every day.
|
| 151 |
+
> Come back in two weeks. If she gets a very bad headache or chest pain, come immediately."*
|
| 152 |
+
>
|
| 153 |
+
> Kofi translates quietly into Twi.
|
| 154 |
+
>
|
| 155 |
+
> Maame Akosua looks up. For the first time — she understands her own diagnosis.
|
| 156 |
+
> She nods. A small, relieved smile.
|
| 157 |
+
>
|
| 158 |
+
> **Kofi (softly, to Dr. Mensah):** *"She says... thank you for explaining. The other doctors never explained."*
|
| 159 |
+
>
|
| 160 |
+
> Dr. Mensah nods quietly. No words needed.
|
| 161 |
+
|
| 162 |
+
**Director note:** Hold on Maame Akosua's face when she smiles. This is the emotional peak of the video.
|
| 163 |
+
|
| 164 |
+
---
|
| 165 |
+
|
| 166 |
+
### SCENE 5 — The Contrast
|
| 167 |
+
**Duration:** 20 seconds | **No dialogue**
|
| 168 |
+
|
| 169 |
+
> Dr. Mensah closes the consultation on his laptop. The notes are saved.
|
| 170 |
+
> He looks at the paper stack. Picks up one file — and closes it.
|
| 171 |
+
> The work is done.
|
| 172 |
+
>
|
| 173 |
+
> He looks at the clock: **5:03 PM.**
|
| 174 |
+
>
|
| 175 |
+
> He stands. Puts on his jacket. Turns off the desk lamp.
|
| 176 |
+
> He walks out — on time, for once.
|
| 177 |
+
>
|
| 178 |
+
> Final shot: the empty chair where Maame Akosua sat.
|
| 179 |
+
> The laptop screen still glowing softly.
|
| 180 |
+
|
| 181 |
+
---
|
| 182 |
+
|
| 183 |
+
### SCENE 6 — Title Card
|
| 184 |
+
**Duration:** 10 seconds | **Music swells**
|
| 185 |
+
|
| 186 |
+
```
|
| 187 |
+
Hospital Copilot
|
| 188 |
+
|
| 189 |
+
Built for doctors who see 40 patients a day.
|
| 190 |
+
So the last patient gets the same care as the first.
|
| 191 |
+
|
| 192 |
+
─────────────────────────────────────────
|
| 193 |
+
Powered by Gemma 4 · Built for Ghana
|
| 194 |
+
Gemma 4 for Good Hackathon 2026
|
| 195 |
+
```
|
| 196 |
+
|
| 197 |
+
---
|
| 198 |
+
|
| 199 |
+
## Why This Lands With Judges
|
| 200 |
+
|
| 201 |
+
| Story element | What it communicates |
|
| 202 |
+
|---|---|
|
| 203 |
+
| Clock at 4:47 PM + paper stack | The problem is real, visible, universal |
|
| 204 |
+
| Doctor still standing up to greet her | He is a good doctor being let down by a broken system |
|
| 205 |
+
| AI invisible while doctor talks | Correct human-AI relationship — AI serves, human cares |
|
| 206 |
+
| ICD codes + dosages appearing | Clinical credibility — this is not a chatbot, it's a medical tool |
|
| 207 |
+
| Patient finally understanding her diagnosis | The mission of the whole project in one moment |
|
| 208 |
+
| "The other doctors never explained" | Indicts the old system without saying a word about AI |
|
| 209 |
+
| Doctor going home on time | The promise: better system = better life for everyone |
|
| 210 |
+
|
| 211 |
+
---
|
| 212 |
+
|
| 213 |
+
## Filming Checklist
|
| 214 |
+
|
| 215 |
+
- [ ] Clinic room location secured
|
| 216 |
+
- [ ] Laptop positioned so screen is visible in background shots
|
| 217 |
+
- [ ] Hospital Copilot running and tested before filming
|
| 218 |
+
- [ ] BP cuff and stethoscope as props
|
| 219 |
+
- [ ] Paper file stack prepared
|
| 220 |
+
- [ ] Scene 4 filmed with real app — let notes generate live, do not fake it
|
| 221 |
+
- [ ] Maame Akosua's smile — hold for 3 seconds minimum
|
| 222 |
+
- [ ] Background music: soft, warm, instrumental — swell on Scene 6
|
| 223 |
+
|
| 224 |
+
---
|
| 225 |
+
|
| 226 |
+
## Consultation Script (Standalone — for practising)
|
| 227 |
+
|
| 228 |
+
> Doctor: Good afternoon Maame. How have you been since your last visit?
|
| 229 |
+
> Patient: Not too well. I have been having headaches every morning. At the back of my head.
|
| 230 |
+
> Doctor: Have you been taking your blood pressure medication?
|
| 231 |
+
> Patient: I finished my Amlodipine two weeks ago. I could not afford to buy more.
|
| 232 |
+
> Doctor: I see. Let me check your pressure. It is 162 over 98 — quite high. Any dizziness or blurred vision?
|
| 233 |
+
> Patient: Sometimes dizzy when I stand up quickly. No problem with vision.
|
| 234 |
+
> Doctor: Any chest pain or shortness of breath?
|
| 235 |
+
> Patient: No, nothing like that.
|
| 236 |
+
> Doctor: Good. You need to restart your Amlodipine — five milligrams every morning. I am also adding Lisinopril, ten milligrams once a day. Reduce salt in your food, avoid alcohol, and walk thirty minutes daily. Come back in two weeks. If you get a severe headache or chest pain — come in immediately.
|
| 237 |
+
> Patient: Okay doctor. I will take the medication every day this time.
|
| 238 |
+
> Doctor: Good. And please do not stop without telling me first.
|
DEVLOG.md
ADDED
|
@@ -0,0 +1,256 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hospital Copilot — Development Log
|
| 2 |
+
|
| 3 |
+
**Hackathon:** Gemma 4 for Good
|
| 4 |
+
**Team:** Ricky (fredrickandoh17@gmail.com)
|
| 5 |
+
**Stack:** Python · Gradio · Gemma 4 · faster-whisper · ChromaDB · SQLite
|
| 6 |
+
**Started:** 2026-05-16
|
| 7 |
+
|
| 8 |
+
---
|
| 9 |
+
|
| 10 |
+
## Project Goal
|
| 11 |
+
|
| 12 |
+
Build an AI clinical assistant that listens to doctor-patient consultations and automatically produces:
|
| 13 |
+
- Live transcription of the conversation
|
| 14 |
+
- Structured symptom extraction (symptoms, medications, duration, allergies, follow-up actions)
|
| 15 |
+
- SOAP notes grounded with real ICD-10 codes and drug dosages
|
| 16 |
+
- Plain-language patient summary
|
| 17 |
+
- Structured patient records saved to a local database
|
| 18 |
+
|
| 19 |
+
**Why:** Reduce doctor burnout from paperwork, improve care quality, and support healthcare workers in low-resource settings like Ghana.
|
| 20 |
+
|
| 21 |
+
---
|
| 22 |
+
|
| 23 |
+
## Architecture Overview
|
| 24 |
+
|
| 25 |
+
```
|
| 26 |
+
Microphone
|
| 27 |
+
└─► faster-whisper (STT, local CPU) → raw transcript
|
| 28 |
+
└─► Gemma 4 26B cloud (speaker labelling) → Doctor:/Patient: transcript
|
| 29 |
+
├─► Gemma 4 E2B via Ollama (symptom JSON) → local CPU
|
| 30 |
+
└─► ChromaDB + MiniLM (RAG retrieval) → ICD-10 codes + drug info
|
| 31 |
+
└─► Gemma 4 26B cloud (SOAP note, patient summary)
|
| 32 |
+
└─► SQLite (patients, sessions, notes, symptoms)
|
| 33 |
+
└─► Gradio UI
|
| 34 |
+
```
|
| 35 |
+
|
| 36 |
+
---
|
| 37 |
+
|
| 38 |
+
## Features Implemented
|
| 39 |
+
|
| 40 |
+
### Core Pipeline
|
| 41 |
+
| Feature | Status | Implementation |
|
| 42 |
+
|---|---|---|
|
| 43 |
+
| Live mic transcription | ✅ | faster-whisper `small` model, 3s chunks, VAD filter |
|
| 44 |
+
| Speaker diarization | ✅ | Gemma 4 post-hoc Doctor:/Patient: labelling |
|
| 45 |
+
| Symptom extraction | ✅ | Gemma 4 E2B via Ollama — JSON: chief complaint, symptoms, duration, severity, medications, allergies, vitals, history, follow-up actions |
|
| 46 |
+
| RAG ICD-10 retrieval | ✅ | ChromaDB + all-MiniLM-L6-v2, 90+ Ghana-relevant codes |
|
| 47 |
+
| RAG drug grounding | ✅ | ChromaDB, 40+ WHO Essential Medicines with dosages |
|
| 48 |
+
| SOAP note generation | ✅ | Gemma 4 26B cloud, RAG context injected into prompt |
|
| 49 |
+
| Patient summary | ✅ | Gemma 4 26B cloud, plain English |
|
| 50 |
+
| Patient records (SQLite) | ✅ | patients, sessions, notes, symptoms tables |
|
| 51 |
+
| Patient registration | ✅ | Name, DOB, gender, phone |
|
| 52 |
+
| Records viewer | ✅ | Load any patient's most recent session |
|
| 53 |
+
|
| 54 |
+
### Translation (Twi/Akan)
|
| 55 |
+
| Status | Note |
|
| 56 |
+
|---|---|
|
| 57 |
+
| ⏸️ Paused | Gemma 4 returned 500 INTERNAL errors on Twi translation. Identified root cause: Twi is a low-resource language and Gemma 4 is not purpose-built for it. Decision: implement NLLB-200 (Meta's No Language Left Behind model) which was specifically trained on Akan/Twi. Deferred until core pipeline is stable. |
|
| 58 |
+
|
| 59 |
+
### Gemma 4 Advanced Features (Added 2026-05-18)
|
| 60 |
+
| Feature | Status | Implementation |
|
| 61 |
+
|---|---|---|
|
| 62 |
+
| **Reasoning mode (thinking)** | ✅ | `ThinkingConfig(thinking_budget=2048, include_thoughts=False)` on SOAP generation — Gemma 4 reasons step-by-step internally before writing the note |
|
| 63 |
+
| **Function calling (symptom extraction)** | ✅ | `FunctionDeclaration` schema with `FunctionCallingMode.ANY` — guaranteed valid structured output, no JSON parsing |
|
| 64 |
+
| **Multimodal image/document analysis** | ✅ | `Part.from_bytes()` with lab result / prescription images — extracted findings injected into SOAP context |
|
| 65 |
+
|
| 66 |
+
---
|
| 67 |
+
|
| 68 |
+
## Technical Decisions
|
| 69 |
+
|
| 70 |
+
### 1. Multi-agent Gemma 4 architecture
|
| 71 |
+
**Decision:** Use multiple specialised Gemma 4 instances rather than one large model for everything.
|
| 72 |
+
**Reasoning:** Different tasks have different speed/accuracy requirements:
|
| 73 |
+
- Symptom extraction: needs to be fast, structured JSON → small local model (E2B)
|
| 74 |
+
- SOAP notes: needs medical reasoning and long output → large cloud model (26B)
|
| 75 |
+
- Speaker labelling: needs language understanding → cloud model
|
| 76 |
+
- Embeddings: needs speed, runs every session → lightweight MiniLM locally
|
| 77 |
+
|
| 78 |
+
### 2. Local vs cloud split
|
| 79 |
+
**Decision:** Run small models locally (Ollama E2B, Whisper, MiniLM, ChromaDB), large inference on cloud API.
|
| 80 |
+
**Reasoning:** User has no GPU. CPU-only local inference is viable for small quantised models (Q4_K_M gemma4:e2b runs at ~5-10 tok/s). Large models (26B+) are impractical on CPU — cloud API provides them at acceptable latency.
|
| 81 |
+
|
| 82 |
+
### 3. RAG with ChromaDB + MiniLM
|
| 83 |
+
**Decision:** Use local vector store over calling the cloud model with full knowledge base in prompt.
|
| 84 |
+
**Reasoning:**
|
| 85 |
+
- Injecting 70k ICD-10 codes into every prompt would exceed context limits and cost tokens
|
| 86 |
+
- Local ChromaDB persists to disk, zero latency after first build
|
| 87 |
+
- MiniLM-L6-v2 (~80MB) gives good semantic similarity for medical terms on CPU
|
| 88 |
+
- Retrieves top-5 most relevant codes per consultation — keeps prompt tight and accurate
|
| 89 |
+
|
| 90 |
+
### 4. Gradio over Streamlit
|
| 91 |
+
**Decision:** Use Gradio for the UI.
|
| 92 |
+
**Reasoning:** Gradio has better support for streaming, audio, and timer-based polling. Streamlit's re-run model makes real-time transcript updates difficult. Gradio's `gr.Timer` makes 2-second polling trivial.
|
| 93 |
+
|
| 94 |
+
### 5. Gemma 4 reasoning mode — temperature requirement
|
| 95 |
+
**Decision:** Set `temperature=1.0` when `thinking_config` is enabled, not `0.3`.
|
| 96 |
+
**Reasoning:** Google's API requires temperature=1.0 when using ThinkingConfig — lower values raise an error. The thinking process itself introduces determinism so output quality is not degraded. Added graceful fallback: if the model doesn't support thinking (e.g. older model version), retry without `thinking_config`.
|
| 97 |
+
|
| 98 |
+
### 6. Function calling mode = ANY
|
| 99 |
+
**Decision:** Use `FunctionCallingMode.ANY` (force the model to always call the function) rather than `AUTO`.
|
| 100 |
+
**Reasoning:** `AUTO` mode allows the model to optionally use the function or just return text — unreliable for extraction tasks. `ANY` mode guarantees the model returns a structured function call every time, eliminating the JSON parse errors we had with the prompt-based approach.
|
| 101 |
+
|
| 102 |
+
### 7. Symptom extraction: local first, cloud fallback
|
| 103 |
+
**Decision:** Keep Gemma 4 E2B (Ollama, local) as primary for symptom extraction, cloud function calling as fallback.
|
| 104 |
+
**Reasoning:** Preserves the "local AI, privacy-preserving" story for the hackathon. Cloud fallback ensures reliability when Ollama returns malformed JSON or fails. Both paths return the same dict structure.
|
| 105 |
+
|
| 106 |
+
### 8. Transcript repair before downstream processing
|
| 107 |
+
**Problem:** faster-whisper `small` on CPU makes errors — mishears medical terms, missing punctuation, run-on sentences. Downstream models (symptom extraction, SOAP generation) produce lower quality output when given a garbled transcript.
|
| 108 |
+
**Decision:** Add a `clean_and_label_transcript()` step using Gemma 4 cloud that simultaneously repairs ASR errors AND labels speakers in one API call. This runs after `stop_consultation()` before any downstream processing.
|
| 109 |
+
**What it fixes:** Incorrect drug names, missing punctuation, filler words (um/uh), run-on sentences, garbled medical terminology.
|
| 110 |
+
**What it preserves:** All clinical facts — symptoms, medications, durations, dosages. Never adds or invents information.
|
| 111 |
+
**Why one call:** Combining repair + labelling saves one API round-trip and is cheaper than two separate calls.
|
| 112 |
+
|
| 113 |
+
### 9. Speaker diarization: Gemma 4 post-hoc vs pyannote-audio
|
| 114 |
+
**Decision:** Use Gemma 4 cloud to infer Doctor/Patient labels from transcript text.
|
| 115 |
+
**Reasoning:**
|
| 116 |
+
- `pyannote-audio` requires HuggingFace account, model license acceptance, and token setup
|
| 117 |
+
- For a hackathon demo, Gemma 4 inference from linguistic context is good enough
|
| 118 |
+
- Doctors and patients have very different speech patterns (questions vs symptom descriptions) that Gemma 4 reliably distinguishes
|
| 119 |
+
- Can always upgrade to pyannote later
|
| 120 |
+
|
| 121 |
+
### 6. SQLite for storage
|
| 122 |
+
**Decision:** Local SQLite over PostgreSQL or cloud database.
|
| 123 |
+
**Reasoning:** Desktop app, no server, no network dependency. SQLite is reliable, zero-config, and sufficient for demo-scale data. Schema: patients → sessions → notes + symptoms.
|
| 124 |
+
|
| 125 |
+
### 7. Whisper model: small over base
|
| 126 |
+
**Decision:** Upgrade from `base` to `small` Whisper model.
|
| 127 |
+
**Reasoning:** `base` had poor accuracy on real speech, especially medical terminology. `small` is ~4x more accurate on medical vocabulary and still runs acceptably on CPU (~2-3x slower than base but real-time viable with 3-second chunking). `medium` was considered but too slow for live demo.
|
| 128 |
+
|
| 129 |
+
---
|
| 130 |
+
|
| 131 |
+
## Issues Encountered & Resolutions
|
| 132 |
+
|
| 133 |
+
### Issue 1: `google-generativeai` deprecated
|
| 134 |
+
**Error:** `FutureWarning: All support for the google.generativeai package has ended`
|
| 135 |
+
**Root cause:** Google deprecated the old `google-generativeai` SDK in favour of `google-genai`
|
| 136 |
+
**Resolution:** Replaced `google-generativeai` with `google-genai>=1.0.0` in requirements. Updated `cloud_agents.py` to use `from google import genai` and `genai.Client()` pattern.
|
| 137 |
+
|
| 138 |
+
### Issue 2: Wrong Gemma 4 cloud model name
|
| 139 |
+
**Error:** `404 NOT_FOUND: models/gemma-4-27b-it is not found`
|
| 140 |
+
**Root cause:** Model name `gemma-4-27b-it` does not exist on Google AI Studio API.
|
| 141 |
+
**Resolution:** Listed available models via API (`client.models.list()`). Correct names are:
|
| 142 |
+
- `gemma-4-26b-a4b-it` (26B MoE, faster)
|
| 143 |
+
- `gemma-4-31b-it` (31B dense, most capable)
|
| 144 |
+
Updated default in `cloud_agents.py` and `.env`.
|
| 145 |
+
|
| 146 |
+
### Issue 3: Twi translation 500 INTERNAL error
|
| 147 |
+
**Error:** `500 INTERNAL: Internal error encountered` on `translate_to_twi()`
|
| 148 |
+
**Root cause:** Gemma 4 struggles with Twi (Akan) — a low-resource language with limited training data. The model likely has insufficient Twi coverage to translate medical content reliably, causing server-side failures.
|
| 149 |
+
**Resolution (temporary):** Removed Twi translation from the pipeline. Added try/except guards around all cloud agent calls so one failure doesn't break the entire `generate_notes()` flow.
|
| 150 |
+
**Planned fix:** Integrate NLLB-200 (`facebook/nllb-200-distilled-600M`) — Meta's purpose-built model for 200 low-resource languages including Akan/Twi.
|
| 151 |
+
|
| 152 |
+
### Issue 4: Ollama version too old for Gemma 4
|
| 153 |
+
**Error:** `Error: pull model manifest: 412: The model you are attempting to pull requires a newer version of Ollama`
|
| 154 |
+
**Root cause:** System Ollama was v0.19.0. Gemma 4 requires a newer version.
|
| 155 |
+
**Resolution:** Reinstall Ollama via the official install script: `curl -fsSL https://ollama.com/install.sh | sh` then `sudo systemctl restart ollama`. Note: Linux package managers (snap, apt) ship outdated Ollama versions — always use the curl script.
|
| 156 |
+
|
| 157 |
+
### Issue 5: `chromadb.PersistentClient | None` TypeError
|
| 158 |
+
**Error:** `TypeError: unsupported operand type(s) for |: 'function' and 'NoneType'`
|
| 159 |
+
**Root cause:** `chromadb.PersistentClient` is a factory function, not a class. Using it in a `X | None` type annotation evaluates at runtime and fails.
|
| 160 |
+
**Resolution:** Added `from __future__ import annotations` to `rag/retriever.py` — this makes all annotations lazy (strings at runtime), bypassing the evaluation issue.
|
| 161 |
+
|
| 162 |
+
### Issue 6: White empty boxes in UI (RAG panels)
|
| 163 |
+
**Issue:** `gr.Markdown` components rendered as white boxes on dark Gradio theme, even when empty.
|
| 164 |
+
**Root cause:** Gradio's default light background on Markdown components clashes with the dark theme. Empty panels had no content but still showed as white rectangles.
|
| 165 |
+
**Resolution:** Moved RAG panels (ICD-10, Drug Reference, Symptoms) into `gr.Accordion` components. Accordions collapse when not needed and have theme-consistent styling. Also added CSS `background: transparent` for markdown panels.
|
| 166 |
+
|
| 167 |
+
### Issue 9: Gemma 4 image input — wrong contents structure
|
| 168 |
+
**Error:** `500 INTERNAL` then `Part.from_text() takes 1 positional argument but 2 were given`
|
| 169 |
+
**Root cause:** Two sequential mistakes in the multimodal contents format:
|
| 170 |
+
1. First attempt wrapped parts in `types.Content(role="user", parts=[...])` — not needed
|
| 171 |
+
2. Used `types.Part.from_text(IMAGE_PROMPT)` — this method does not exist in the SDK
|
| 172 |
+
**Resolution:** Per official Gemma 4 docs (philschmid.de/gemma-4-gemini-api), the correct format is a plain list mixing `Part.from_bytes()` and a raw string:
|
| 173 |
+
```python
|
| 174 |
+
contents=[
|
| 175 |
+
types.Part.from_bytes(data=file_bytes, mime_type=mime_type),
|
| 176 |
+
IMAGE_PROMPT, # plain string, not Part.from_text()
|
| 177 |
+
]
|
| 178 |
+
```
|
| 179 |
+
All Gemma 4 models (including 26B and 31B) are fully multimodal. The initial 500 error was caused by the wrong content structure, not a model limitation.
|
| 180 |
+
|
| 181 |
+
### Issue 10: pyannote-audio abandoned in favour of Gemma 4
|
| 182 |
+
**Decision made:** Started implementing pyannote-audio for speaker diarization, then stopped.
|
| 183 |
+
**Reason:** User confirmed Gemma 4 post-hoc labelling is sufficient for the demo. pyannote requires HuggingFace account, model license acceptance, and heavy torch dependency. Gemma 4 language-based inference is actually more reliable for medical conversations because it uses *context* (doctors ask questions, patients describe symptoms) rather than raw audio signal (which can fail when two speakers have similar voices).
|
| 184 |
+
|
| 185 |
+
### Issue 10: Gradio CSS parameter deprecation warning
|
| 186 |
+
**Warning:** `UserWarning: The parameters have been moved from the Blocks constructor to the launch() method`
|
| 187 |
+
**Root cause:** Gradio 6.0 moved `css` parameter from `gr.Blocks(css=...)` to `demo.launch(css=...)`.
|
| 188 |
+
**Resolution:** Moved `css=CSS` to `demo.launch(...)`.
|
| 189 |
+
|
| 190 |
+
### Issue 8: uv installing to wrong Python version
|
| 191 |
+
**Issue:** `chromadb` and `sentence-transformers` installed but not importable from venv.
|
| 192 |
+
**Root cause:** The venv was created with Python 3.11 (via uv) but system also has Python 3.12. Running `uv pip install` without specifying the environment installed to the wrong location.
|
| 193 |
+
**Resolution:** Used `VIRTUAL_ENV=/path/to/.venv uv pip install ...` to target the correct venv, or used `/path/to/.venv/bin/python -m pip install ...`.
|
| 194 |
+
|
| 195 |
+
---
|
| 196 |
+
|
| 197 |
+
## What Was Considered and Rejected
|
| 198 |
+
|
| 199 |
+
| Option | Rejected because |
|
| 200 |
+
|---|---|
|
| 201 |
+
| Streamlit UI | Real-time transcript polling is awkward in Streamlit's re-run model |
|
| 202 |
+
| PostgreSQL storage | Overkill for desktop demo; SQLite is zero-config |
|
| 203 |
+
| pyannote-audio diarization | Requires HF account + model license; too much setup for hackathon timeline |
|
| 204 |
+
| Full 70k ICD-10 dataset | Too large to embed in demo time; curated Ghana-relevant subset is more impactful |
|
| 205 |
+
| Running everything on cloud API | Wanted to demonstrate hybrid local+cloud multi-agent architecture |
|
| 206 |
+
| Whisper `large-v3` | Too slow on CPU for real-time; `small` is the sweet spot |
|
| 207 |
+
| Gemma 4 for Twi translation | Low-resource language; model returned 500 errors. NLLB-200 is the right tool |
|
| 208 |
+
|
| 209 |
+
---
|
| 210 |
+
|
| 211 |
+
## Remaining Work / Roadmap
|
| 212 |
+
|
| 213 |
+
- [ ] **Twi translation via NLLB-200** — integrate `facebook/nllb-200-distilled-600M` locally
|
| 214 |
+
- [ ] **PDF export** — export SOAP note + patient summary as printable PDF (fpdf2 already in deps)
|
| 215 |
+
- [ ] **Multi-session history** — view all past sessions for a patient, not just the most recent
|
| 216 |
+
- [ ] **Upgrade to Whisper `medium`** if demo machine is fast enough
|
| 217 |
+
- [ ] **ICD-10 code expansion** — add full 70k code dataset for production use
|
| 218 |
+
- [ ] **MedGemma** — self-host `medgemma-4b-it` or `medgemma-27b-it` for higher-accuracy medical image analysis
|
| 219 |
+
- [ ] **Long-context patient history** — load all previous session notes into SOAP prompt for longitudinal care reasoning
|
| 220 |
+
|
| 221 |
+
---
|
| 222 |
+
|
| 223 |
+
## File Structure
|
| 224 |
+
|
| 225 |
+
```
|
| 226 |
+
hosptial_copilot/
|
| 227 |
+
├── app.py Main Gradio app + UI
|
| 228 |
+
├── agents/
|
| 229 |
+
│ ├── cloud_agents.py Gemma 4 cloud: SOAP, summary, speaker labelling
|
| 230 |
+
│ └── symptom_agent.py Gemma 4 E2B local: symptom JSON extraction
|
| 231 |
+
├── transcription/
|
| 232 |
+
│ └── transcriber.py faster-whisper live mic streaming
|
| 233 |
+
├── rag/
|
| 234 |
+
│ ├── retriever.py ChromaDB + MiniLM embedding + retrieval
|
| 235 |
+
│ └── data/
|
| 236 |
+
│ ├── icd10_common.json 90+ ICD-10 codes (Ghana-relevant)
|
| 237 |
+
│ └── essential_medicines.json 40+ WHO Essential Medicines
|
| 238 |
+
├── database/
|
| 239 |
+
│ └── db.py SQLite schema + helpers
|
| 240 |
+
├── requirements.txt
|
| 241 |
+
├── .env.example
|
| 242 |
+
├── .gitignore
|
| 243 |
+
├── README.md
|
| 244 |
+
└── DEVLOG.md This file
|
| 245 |
+
```
|
| 246 |
+
|
| 247 |
+
---
|
| 248 |
+
|
| 249 |
+
## Environment Variables
|
| 250 |
+
|
| 251 |
+
| Variable | Default | Description |
|
| 252 |
+
|---|---|---|
|
| 253 |
+
| `GEMINI_API_KEY` | — | Google AI Studio API key (required) |
|
| 254 |
+
| `WHISPER_MODEL` | `small` | Whisper model size: tiny/base/small/medium/large-v3 |
|
| 255 |
+
| `OLLAMA_MODEL` | `gemma4:e2b` | Local Ollama model for symptom extraction |
|
| 256 |
+
| `CLOUD_MODEL` | `gemma-4-26b-a4b-it` | Google AI Studio model name |
|
INTRO_VIDEO_SCRIPT.md
ADDED
|
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hospital Copilot — Introductory Video Script
|
| 2 |
+
### "Before the Last Patient"
|
| 3 |
+
|
| 4 |
+
**Hackathon:** Gemma 4 for Good · 2026
|
| 5 |
+
**Format:** Narrated short film with text overlays
|
| 6 |
+
**Runtime:** 90–120 seconds
|
| 7 |
+
**Tone:** Honest, grounded, emotional
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## PURPOSE OF THIS VIDEO
|
| 12 |
+
|
| 13 |
+
This is the introductory video that plays BEFORE the demo.
|
| 14 |
+
It establishes the problem emotionally, introduces the solution, and honestly
|
| 15 |
+
frames what the viewer is about to see as a proof of concept — not a live
|
| 16 |
+
hospital deployment, but a fully functional system that would work identically
|
| 17 |
+
in a real clinical setting.
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## SCENE-BY-SCENE SCRIPT
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
### SCENE 1 — THE PROBLEM
|
| 26 |
+
**Timestamp:** 0:00 – 0:35
|
| 27 |
+
**Visual:** Black screen → single stat → doctor's desk at end of day
|
| 28 |
+
|
| 29 |
+
**[Black screen. Silence. Then a single line of text fades in:]**
|
| 30 |
+
|
| 31 |
+
> *"In Ghana, there is 1 doctor for every 10,000 people."*
|
| 32 |
+
|
| 33 |
+
**[Fade to: a doctor's desk late in the day. Stack of paper files.
|
| 34 |
+
A tired hand writing notes. A clock on the wall.]**
|
| 35 |
+
|
| 36 |
+
**NARRATOR (Voice-over):**
|
| 37 |
+
|
| 38 |
+
> "Every day, doctors in Ghana see between 30 and 50 patients.
|
| 39 |
+
> They diagnose. They treat. They care.
|
| 40 |
+
>
|
| 41 |
+
> But when the last patient leaves — the work is not done.
|
| 42 |
+
>
|
| 43 |
+
> The notes still need to be written.
|
| 44 |
+
> The records updated.
|
| 45 |
+
> The paperwork filed.
|
| 46 |
+
>
|
| 47 |
+
> For every hour spent with a patient —
|
| 48 |
+
> another hour is spent on documentation.
|
| 49 |
+
>
|
| 50 |
+
> That is an hour stolen from the next patient.
|
| 51 |
+
> From rest. From family.
|
| 52 |
+
>
|
| 53 |
+
> This is the hidden cost of healthcare —
|
| 54 |
+
> and it is burning doctors out."
|
| 55 |
+
|
| 56 |
+
**[Text overlay appears on screen:]**
|
| 57 |
+
|
| 58 |
+
> *"Medical burnout affects 70% of doctors in sub-Saharan Africa."*
|
| 59 |
+
> *— WHO, 2024*
|
| 60 |
+
|
| 61 |
+
---
|
| 62 |
+
|
| 63 |
+
### SCENE 2 — THE HUMAN COST
|
| 64 |
+
**Timestamp:** 0:35 – 0:55
|
| 65 |
+
**Visual:** Empty waiting room chairs. Then a full one. A patient waiting alone.
|
| 66 |
+
|
| 67 |
+
**NARRATOR (Voice-over):**
|
| 68 |
+
|
| 69 |
+
> "When a doctor is exhausted, the patient feels it.
|
| 70 |
+
>
|
| 71 |
+
> Shorter consultations.
|
| 72 |
+
> Less explanation.
|
| 73 |
+
> Less time to listen.
|
| 74 |
+
>
|
| 75 |
+
> The patient who comes in last
|
| 76 |
+
> gets less than the patient who came in first.
|
| 77 |
+
>
|
| 78 |
+
> That is not a failure of the doctor.
|
| 79 |
+
> That is a failure of the system.
|
| 80 |
+
>
|
| 81 |
+
> We built something to fix it."
|
| 82 |
+
|
| 83 |
+
---
|
| 84 |
+
|
| 85 |
+
### SCENE 3 — THE SOLUTION
|
| 86 |
+
**Timestamp:** 0:55 – 1:15
|
| 87 |
+
**Visual:** Laptop screen showing Hospital Copilot. Live transcript streaming.
|
| 88 |
+
ICD codes appearing. SOAP note generating.
|
| 89 |
+
|
| 90 |
+
**NARRATOR (Voice-over):**
|
| 91 |
+
|
| 92 |
+
> "Hospital Copilot is an AI clinical assistant powered by Gemma 4.
|
| 93 |
+
>
|
| 94 |
+
> It listens to the consultation — with the doctor's permission —
|
| 95 |
+
> and handles the documentation automatically.
|
| 96 |
+
>
|
| 97 |
+
> Live transcription.
|
| 98 |
+
> Symptom extraction.
|
| 99 |
+
> SOAP notes.
|
| 100 |
+
> Patient summaries.
|
| 101 |
+
>
|
| 102 |
+
> Grounded in real ICD-10 codes
|
| 103 |
+
> and WHO-approved drug dosages.
|
| 104 |
+
>
|
| 105 |
+
> The doctor talks to the patient.
|
| 106 |
+
> The AI handles the rest."
|
| 107 |
+
|
| 108 |
+
---
|
| 109 |
+
|
| 110 |
+
### SCENE 4 — THE HONEST DISCLAIMER
|
| 111 |
+
**Timestamp:** 1:15 – 1:30
|
| 112 |
+
**Visual:** Plain background. No music. Just voice. Calm and direct.
|
| 113 |
+
|
| 114 |
+
**[Music fades out completely here. Silence under the voice.]**
|
| 115 |
+
|
| 116 |
+
**NARRATOR (Voice-over):**
|
| 117 |
+
|
| 118 |
+
> "What you are about to see is a proof of concept.
|
| 119 |
+
>
|
| 120 |
+
> We are not in a hospital.
|
| 121 |
+
> The patient is not real.
|
| 122 |
+
> The setting is simulated.
|
| 123 |
+
>
|
| 124 |
+
> But the AI is real.
|
| 125 |
+
> The technology is real.
|
| 126 |
+
> The output is real.
|
| 127 |
+
>
|
| 128 |
+
> Everything you will see —
|
| 129 |
+
> the transcription, the clinical notes, the intelligence —
|
| 130 |
+
> is exactly what would happen
|
| 131 |
+
> in an actual hospital consultation.
|
| 132 |
+
>
|
| 133 |
+
> This is what it could look like.
|
| 134 |
+
> This is what it should look like.
|
| 135 |
+
> Starting today."
|
| 136 |
+
|
| 137 |
+
---
|
| 138 |
+
|
| 139 |
+
### SCENE 5 — CLOSING TITLE CARD
|
| 140 |
+
**Timestamp:** 1:30 – 1:45
|
| 141 |
+
**Visual:** Clean dark screen. Text appears line by line with gentle fade.
|
| 142 |
+
|
| 143 |
+
```
|
| 144 |
+
Hospital Copilot
|
| 145 |
+
|
| 146 |
+
Powered by Gemma 4
|
| 147 |
+
Built for Ghana. Built for Good.
|
| 148 |
+
|
| 149 |
+
─────────────────────────────────────
|
| 150 |
+
Gemma 4 for Good Hackathon · 2026
|
| 151 |
+
```
|
| 152 |
+
|
| 153 |
+
**[Soft transition into the demo video]**
|
| 154 |
+
|
| 155 |
+
---
|
| 156 |
+
|
| 157 |
+
## FILMING GUIDE
|
| 158 |
+
|
| 159 |
+
### Scenes and What You Need
|
| 160 |
+
|
| 161 |
+
| Scene | Location | Props |
|
| 162 |
+
|---|---|---|
|
| 163 |
+
| Opening desk shot | Any desk — dim warm light | Paper files, pen, clock |
|
| 164 |
+
| Waiting room | Any corridor or room with chairs | Chairs, natural light |
|
| 165 |
+
| Laptop screen | Anywhere with the app running | Laptop showing Hospital Copilot |
|
| 166 |
+
| Title card | No filming needed | Post-production text overlay |
|
| 167 |
+
|
| 168 |
+
### Narrator
|
| 169 |
+
- Does not need to be on camera — voice-over recorded separately works perfectly
|
| 170 |
+
- Speak slowly and deliberately — pause between each short line
|
| 171 |
+
- Tone: calm, serious, hopeful — not dramatic or over-performed
|
| 172 |
+
|
| 173 |
+
### Music
|
| 174 |
+
- Use soft, minimal instrumental music — piano or ambient
|
| 175 |
+
- Volume: low throughout Scenes 1–3
|
| 176 |
+
- **Fade to silence completely** at Scene 4 (the disclaimer)
|
| 177 |
+
- Silence makes the disclaimer land harder than any music would
|
| 178 |
+
- Bring music back softly under the title card in Scene 5
|
| 179 |
+
|
| 180 |
+
---
|
| 181 |
+
|
| 182 |
+
## FULL NARRATION — CLEAN READ-THROUGH
|
| 183 |
+
|
| 184 |
+
*(Use this for recording the voice-over in one take)*
|
| 185 |
+
|
| 186 |
+
---
|
| 187 |
+
|
| 188 |
+
In Ghana, there is one doctor for every ten thousand people.
|
| 189 |
+
|
| 190 |
+
Every day, doctors see between thirty and fifty patients.
|
| 191 |
+
They diagnose. They treat. They care.
|
| 192 |
+
|
| 193 |
+
But when the last patient leaves — the work is not done.
|
| 194 |
+
The notes still need to be written. The records updated. The paperwork filed.
|
| 195 |
+
|
| 196 |
+
For every hour spent with a patient, another hour is spent on documentation.
|
| 197 |
+
That is an hour stolen from the next patient. From rest. From family.
|
| 198 |
+
This is the hidden cost of healthcare — and it is burning doctors out.
|
| 199 |
+
|
| 200 |
+
When a doctor is exhausted, the patient feels it.
|
| 201 |
+
Shorter consultations. Less explanation. Less time to listen.
|
| 202 |
+
The patient who comes in last gets less than the patient who came in first.
|
| 203 |
+
|
| 204 |
+
That is not a failure of the doctor. That is a failure of the system.
|
| 205 |
+
We built something to fix it.
|
| 206 |
+
|
| 207 |
+
Hospital Copilot is an AI clinical assistant powered by Gemma 4.
|
| 208 |
+
It listens to the consultation — with the doctor's permission —
|
| 209 |
+
and handles the documentation automatically.
|
| 210 |
+
|
| 211 |
+
Live transcription. Symptom extraction. SOAP notes. Patient summaries.
|
| 212 |
+
Grounded in real ICD-10 codes and WHO-approved drug dosages.
|
| 213 |
+
The doctor talks to the patient. The AI handles the rest.
|
| 214 |
+
|
| 215 |
+
---
|
| 216 |
+
|
| 217 |
+
What you are about to see is a proof of concept.
|
| 218 |
+
We are not in a hospital. The patient is not real. The setting is simulated.
|
| 219 |
+
|
| 220 |
+
But the AI is real. The technology is real. The output is real.
|
| 221 |
+
|
| 222 |
+
Everything you will see — the transcription, the clinical notes, the intelligence —
|
| 223 |
+
is exactly what would happen in an actual hospital consultation.
|
| 224 |
+
|
| 225 |
+
This is what it could look like.
|
| 226 |
+
This is what it should look like.
|
| 227 |
+
Starting today.
|
| 228 |
+
|
| 229 |
+
---
|
| 230 |
+
|
| 231 |
+
## KEY CREATIVE DECISIONS
|
| 232 |
+
|
| 233 |
+
| Decision | Reason |
|
| 234 |
+
|---|---|
|
| 235 |
+
| Silence under the disclaimer | Removes all distraction — the honesty lands harder without music |
|
| 236 |
+
| Short punchy lines in narration | Easier to absorb, more memorable, feels confident not rushed |
|
| 237 |
+
| "Failure of the system, not the doctor" | Positions the app as supporting doctors, not replacing them |
|
| 238 |
+
| "Starting today" as the final line | Confident, present-tense — not a future promise, a current reality |
|
| 239 |
+
| Owning "proof of concept" plainly | More credible to judges than overselling — shows maturity and integrity |
|
README.md
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hospital Copilot
|
| 2 |
+
|
| 3 |
+
AI-powered medical documentation assistant for the **Gemma 4 for Good** hackathon.
|
| 4 |
+
|
| 5 |
+
Listens to doctor-patient consultations and automatically generates SOAP notes, patient summaries, symptom extractions, and Twi (Akan) translations — reducing paperwork and language barriers in Ghanaian healthcare.
|
| 6 |
+
|
| 7 |
+
## Features
|
| 8 |
+
|
| 9 |
+
- **Live transcription** via faster-whisper (runs on CPU)
|
| 10 |
+
- **Symptom extraction** via Gemma 4 E2B (local, Ollama, CPU)
|
| 11 |
+
- **SOAP note generation** via Gemma 4 27B (Google AI Studio)
|
| 12 |
+
- **Patient summary** in plain English
|
| 13 |
+
- **English ↔ Twi translation** for Ghanaian patients
|
| 14 |
+
- **Patient records** stored in local SQLite
|
| 15 |
+
|
| 16 |
+
## Setup
|
| 17 |
+
|
| 18 |
+
### 1. Install dependencies
|
| 19 |
+
|
| 20 |
+
```bash
|
| 21 |
+
pip install -r requirements.txt
|
| 22 |
+
```
|
| 23 |
+
|
| 24 |
+
### 2. Install Ollama and pull Gemma 4
|
| 25 |
+
|
| 26 |
+
```bash
|
| 27 |
+
# Install Ollama: https://ollama.com
|
| 28 |
+
ollama pull gemma4:e2b
|
| 29 |
+
```
|
| 30 |
+
|
| 31 |
+
### 3. Configure environment
|
| 32 |
+
|
| 33 |
+
```bash
|
| 34 |
+
cp .env.example .env
|
| 35 |
+
# Edit .env and add your Google AI Studio API key
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
Get a free API key at https://aistudio.google.com
|
| 39 |
+
|
| 40 |
+
### 4. Run
|
| 41 |
+
|
| 42 |
+
```bash
|
| 43 |
+
python app.py
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
Open http://localhost:7860 in your browser.
|
| 47 |
+
|
| 48 |
+
## Project Structure
|
| 49 |
+
|
| 50 |
+
```
|
| 51 |
+
hosptial_copilot/
|
| 52 |
+
├── app.py # Gradio UI + app logic
|
| 53 |
+
├── agents/
|
| 54 |
+
│ ├── symptom_agent.py # Local Gemma 4 (Ollama) symptom extractor
|
| 55 |
+
│ └── cloud_agents.py # Cloud Gemma 4: SOAP, summary, translation
|
| 56 |
+
├── transcription/
|
| 57 |
+
│ └── transcriber.py # faster-whisper live mic transcription
|
| 58 |
+
├── database/
|
| 59 |
+
│ └── db.py # SQLite helpers
|
| 60 |
+
├── requirements.txt
|
| 61 |
+
└── .env.example
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
## Architecture
|
| 65 |
+
|
| 66 |
+
```
|
| 67 |
+
Microphone
|
| 68 |
+
└─► faster-whisper (local, CPU) → raw transcript
|
| 69 |
+
├─► Gemma 4 E2B via Ollama → symptom JSON (local CPU)
|
| 70 |
+
└─► Gemma 4 27B via API → SOAP note + summary + Twi translation
|
| 71 |
+
└─► SQLite → patient records
|
| 72 |
+
└─► Gradio UI → doctor dashboard
|
| 73 |
+
```
|
SUBMISSION_WRITEUP.md
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# MediScribe AI
|
| 2 |
+
## An Offline-First Multilingual Clinical Assistant Powered by Gemma 4
|
| 3 |
+
|
| 4 |
+
**Track:** Health & Sciences
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Overview
|
| 9 |
+
|
| 10 |
+
In many clinics across Africa, doctors spend more time documenting than treating. MediScribe AI is a desktop AI assistant that listens to a doctor-patient consultation, transcribes it in real time, and automatically generates a structured SOAP note, a plain-language patient summary, and a structured symptom record — all reviewed and approved by the doctor before anything is saved.
|
| 11 |
+
|
| 12 |
+
The system was built specifically for Ghanaian healthcare contexts, targeting clinics where internet access may be intermittent and where patients may speak languages other than English.
|
| 13 |
+
|
| 14 |
+
---
|
| 15 |
+
|
| 16 |
+
## The Problem
|
| 17 |
+
|
| 18 |
+
Healthcare workers in developing regions face crushing administrative workloads. Manual note-taking after every consultation reduces patient interaction time, increases burnout, and introduces inconsistencies in medical records. Language barriers between English-trained doctors and local-language-speaking patients add further friction. Many cloud-first AI tools are impractical where connectivity is unreliable.
|
| 19 |
+
|
| 20 |
+
---
|
| 21 |
+
|
| 22 |
+
## What We Built
|
| 23 |
+
|
| 24 |
+
MediScribe AI is a single Python application with a Gradio web UI. The consultation workflow is:
|
| 25 |
+
|
| 26 |
+
1. The doctor registers or selects a patient.
|
| 27 |
+
2. Consultation starts — the microphone opens and audio begins streaming.
|
| 28 |
+
3. faster-whisper transcribes speech in real time in 3-second blocks on CPU.
|
| 29 |
+
4. After the consultation ends, Gemma 4 (cloud) repairs ASR errors and labels each turn as Doctor or Patient.
|
| 30 |
+
5. The doctor clicks **Generate Notes**. The system:
|
| 31 |
+
- Extracts structured symptoms via **local Gemma 4 E2B** (Ollama, CPU) — chief complaint, symptom list, duration, severity, vitals, medications, allergies, follow-up actions
|
| 32 |
+
- Falls back to **cloud Gemma 4 function calling** if local extraction fails, returning a guaranteed-valid structured schema
|
| 33 |
+
- Runs **semantic RAG retrieval** against a local ChromaDB knowledge base to surface relevant ICD-10 codes and WHO essential medicines dosages
|
| 34 |
+
- Generates a **SOAP note** using cloud Gemma 4 with reasoning/thinking mode enabled, grounded by the RAG context
|
| 35 |
+
- Generates a **plain-language patient summary**
|
| 36 |
+
6. The doctor reviews both outputs in editable panels before approving.
|
| 37 |
+
7. Everything is saved to local SQLite — transcript, SOAP note, summary, and structured symptom JSON.
|
| 38 |
+
|
| 39 |
+
An optional document upload allows the doctor to attach a photo or PDF of a lab result, prescription, or X-ray. Gemma 4's multimodal capability reads the document and automatically includes the findings in the SOAP note.
|
| 40 |
+
|
| 41 |
+
---
|
| 42 |
+
|
| 43 |
+
## Why Gemma 4
|
| 44 |
+
|
| 45 |
+
We used Gemma 4 across every AI-powered step of the pipeline:
|
| 46 |
+
|
| 47 |
+
| Task | Model | Where |
|
| 48 |
+
|---|---|---|
|
| 49 |
+
| Symptom extraction (primary) | Gemma 4 E2B | Local CPU via Ollama |
|
| 50 |
+
| Symptom extraction (fallback) | Gemma 4 26B — function calling | Google AI Studio API |
|
| 51 |
+
| Transcript repair + speaker labelling | Gemma 4 26B | Google AI Studio API |
|
| 52 |
+
| SOAP note generation | Gemma 4 26B — reasoning mode | Google AI Studio API |
|
| 53 |
+
| Patient summary | Gemma 4 26B | Google AI Studio API |
|
| 54 |
+
| Medical document analysis | Gemma 4 26B — multimodal | Google AI Studio API |
|
| 55 |
+
|
| 56 |
+
The local Gemma 4 E2B model (quantized, running via Ollama on CPU) handles the privacy-sensitive symptom extraction step, keeping structured clinical data local when possible. The cloud Gemma 4 model handles the tasks requiring stronger reasoning — particularly SOAP note generation, which uses the model's built-in thinking mode to reason through the clinical picture before writing the note.
|
| 57 |
+
|
| 58 |
+
Gemma 4's native function calling was used to implement a guaranteed-valid structured output schema for symptom extraction — eliminating JSON parsing failures that plagued earlier prompt-only approaches.
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
## System Architecture
|
| 63 |
+
|
| 64 |
+
**Frontend:** Gradio web UI running locally on port 7860. Two main tabs — Live Consultation and Patient Records. No external server required.
|
| 65 |
+
|
| 66 |
+
**Speech-to-Text:** faster-whisper (`small` model, CPU, int8 quantized) with sounddevice for microphone streaming. Audio is processed in 3-second chunks with VAD filtering. Full session audio is saved as WAV after the consultation ends.
|
| 67 |
+
|
| 68 |
+
**Symptom Extraction Agent (`agents/symptom_agent.py`):** Calls local Gemma 4 E2B via Ollama with a structured JSON prompt. On any failure (model unavailable, invalid JSON, malformed response), automatically falls back to cloud Gemma 4 function calling with a defined schema, guaranteeing a valid structured output.
|
| 69 |
+
|
| 70 |
+
**Cloud Agent (`agents/cloud_agents.py`):** Wraps the Google GenAI SDK. Implements transcript repair, SOAP generation (with `ThinkingConfig`), patient summary, function-calling symptom extraction, and multimodal document analysis. Temperature is set to 0.3 for clinical outputs; 1.0 when thinking mode is active (required by the API).
|
| 71 |
+
|
| 72 |
+
**RAG Pipeline (`rag/retriever.py`):** `all-MiniLM-L6-v2` sentence-transformers embeddings + ChromaDB persistent vector store. Two collections: 90+ ICD-10 codes (Ghana-relevant and general) and 40+ WHO Essential Medicines entries. The top-5 ICD codes and top-3 drug matches are retrieved per consultation and injected into the SOAP note prompt as grounding context.
|
| 73 |
+
|
| 74 |
+
**Database (`database/db.py`):** SQLite with four tables — `patients`, `sessions`, `notes`, and `symptoms`. Stores the full cleaned transcript, SOAP note, English summary, and structured symptom JSON per session.
|
| 75 |
+
|
| 76 |
+
---
|
| 77 |
+
|
| 78 |
+
## Key Features
|
| 79 |
+
|
| 80 |
+
### Real-Time Transcription with Post-Processing
|
| 81 |
+
faster-whisper streams transcription as the consultation proceeds. After the session ends, Gemma 4 repairs ASR errors (medical terminology, drug names, run-on sentences) and labels each turn as Doctor or Patient in a single pass.
|
| 82 |
+
|
| 83 |
+
### Dual-Mode Symptom Extraction
|
| 84 |
+
Local-first extraction via Gemma 4 E2B keeps sensitive data off the network whenever possible. Automatic cloud fallback via function calling ensures the pipeline never silently fails.
|
| 85 |
+
|
| 86 |
+
### RAG-Grounded SOAP Notes
|
| 87 |
+
ICD-10 codes and drug dosage references are retrieved semantically before SOAP note generation. This grounds the model's clinical output in verifiable reference data rather than relying purely on parametric knowledge.
|
| 88 |
+
|
| 89 |
+
### Multimodal Document Analysis
|
| 90 |
+
Doctors can upload a photo or PDF of a lab result, prescription, or report. Gemma 4 reads it and its findings are automatically included as context in the SOAP note generation step.
|
| 91 |
+
|
| 92 |
+
### Human-in-the-Loop Validation
|
| 93 |
+
Every generated output is shown to the doctor in a readable panel with an editable fallback before anything is committed to the database. Doctors approve; the AI drafts.
|
| 94 |
+
|
| 95 |
+
### Local Storage
|
| 96 |
+
All patient records, transcripts, SOAP notes, and symptom data are stored in a local SQLite database. No patient data leaves the device except for the API calls to generate notes.
|
| 97 |
+
|
| 98 |
+
---
|
| 99 |
+
|
| 100 |
+
## Technical Challenges
|
| 101 |
+
|
| 102 |
+
### Reliable Structured Output from a Local Small Model
|
| 103 |
+
Gemma 4 E2B running on CPU occasionally produces malformed JSON or misses required fields. We implemented a two-tier extraction strategy: local Ollama first with JSON validation, cloud function calling as a typed-schema fallback. This eliminated silent data loss in the pipeline.
|
| 104 |
+
|
| 105 |
+
### ASR Quality on Medical Vocabulary
|
| 106 |
+
faster-whisper on CPU struggles with drug names, medical abbreviations, and Ghanaian proper names. We addressed this by adding a dedicated Gemma 4 repair pass after the consultation ends, correcting the transcript before any clinical information is extracted.
|
| 107 |
+
|
| 108 |
+
### Thinking Mode Compatibility
|
| 109 |
+
Gemma 4's reasoning mode requires `temperature=1.0` and is not supported on all model variants. We implemented a graceful fallback that detects API errors related to `ThinkingConfig` and retries without it, so SOAP generation never fails silently.
|
| 110 |
+
|
| 111 |
+
### RAG Grounding for Clinical Accuracy
|
| 112 |
+
SOAP notes generated without reference context showed inconsistent ICD code suggestions and occasionally incorrect drug dosages. Adding RAG retrieval with ChromaDB significantly improved specificity and reduced hallucinated medication instructions.
|
| 113 |
+
|
| 114 |
+
---
|
| 115 |
+
|
| 116 |
+
## What Is Not Yet Built
|
| 117 |
+
|
| 118 |
+
Twi/English translation is planned (NLLB-200) but not yet implemented — stubs exist in `cloud_agents.py`. Speaker diarization is partially scaffolded (session audio is saved as WAV) but not yet wired up. The system currently requires internet access for SOAP generation and transcript repair; a fully offline mode would require a larger local model than E2B.
|
| 119 |
+
|
| 120 |
+
---
|
| 121 |
+
|
| 122 |
+
## Impact
|
| 123 |
+
|
| 124 |
+
MediScribe AI reduces the documentation burden on doctors by automating the most time-consuming parts of post-consultation admin: writing SOAP notes, summarizing for patients, and coding diagnoses. Because it runs locally and saves to a local database, it is viable in clinics with unreliable connectivity. The human-in-the-loop design keeps the doctor fully in control — the AI is a drafter, not an authority.
|
| 125 |
+
|
| 126 |
+
---
|
| 127 |
+
|
| 128 |
+
## Stack Summary
|
| 129 |
+
|
| 130 |
+
| Component | Technology |
|
| 131 |
+
|---|---|
|
| 132 |
+
| UI | Gradio (Python, port 7860) |
|
| 133 |
+
| Speech-to-Text | faster-whisper small, CPU, int8 |
|
| 134 |
+
| Local AI | Gemma 4 E2B via Ollama |
|
| 135 |
+
| Cloud AI | Gemma 4 26B-IT via Google AI Studio |
|
| 136 |
+
| Embeddings | all-MiniLM-L6-v2 (sentence-transformers) |
|
| 137 |
+
| Vector Store | ChromaDB (local, persistent) |
|
| 138 |
+
| Database | SQLite |
|
| 139 |
+
| Language | Python 3.11 |
|
agents/__init__.py
ADDED
|
File without changes
|
agents/cloud_agents.py
ADDED
|
@@ -0,0 +1,274 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import base64
|
| 3 |
+
from pathlib import Path
|
| 4 |
+
from google import genai
|
| 5 |
+
from google.genai import types
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
load_dotenv()
|
| 9 |
+
|
| 10 |
+
CLOUD_MODEL = os.getenv("CLOUD_MODEL", "gemma-4-26b-a4b-it")
|
| 11 |
+
|
| 12 |
+
_client = genai.Client(api_key=os.environ["GEMINI_API_KEY"])
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def _call(prompt: str, use_thinking: bool = False) -> str:
|
| 16 |
+
try:
|
| 17 |
+
config = types.GenerateContentConfig(temperature=0.3)
|
| 18 |
+
if use_thinking:
|
| 19 |
+
config = types.GenerateContentConfig(
|
| 20 |
+
temperature=1.0, # required when thinking is enabled
|
| 21 |
+
thinking_config=types.ThinkingConfig(
|
| 22 |
+
include_thoughts=False, # reason internally, return only final answer
|
| 23 |
+
thinking_budget=2048,
|
| 24 |
+
),
|
| 25 |
+
)
|
| 26 |
+
response = _client.models.generate_content(
|
| 27 |
+
model=CLOUD_MODEL,
|
| 28 |
+
contents=prompt,
|
| 29 |
+
config=config,
|
| 30 |
+
)
|
| 31 |
+
return response.text.strip()
|
| 32 |
+
except Exception as e:
|
| 33 |
+
raise RuntimeError(f"Gemma API error: {e}") from e
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ── Function calling — symptom schema ────────────────────────────────────────
|
| 37 |
+
|
| 38 |
+
SYMPTOM_SCHEMA = types.FunctionDeclaration(
|
| 39 |
+
name="record_symptoms",
|
| 40 |
+
description="Record all structured clinical information extracted from the consultation transcript.",
|
| 41 |
+
parameters={
|
| 42 |
+
"type": "object",
|
| 43 |
+
"properties": {
|
| 44 |
+
"chief_complaint": {"type": "string", "description": "Main reason for the visit"},
|
| 45 |
+
"symptoms": {"type": "array", "items": {"type": "string"}, "description": "List of reported symptoms"},
|
| 46 |
+
"duration": {"type": "string", "description": "How long symptoms have been present"},
|
| 47 |
+
"severity": {"type": "string", "enum": ["mild", "moderate", "severe"], "description": "Overall severity"},
|
| 48 |
+
"associated_symptoms": {"type": "array", "items": {"type": "string"}},
|
| 49 |
+
"medications_mentioned":{"type": "array", "items": {"type": "string"}, "description": "Drugs or treatments mentioned"},
|
| 50 |
+
"allergies": {"type": "array", "items": {"type": "string"}},
|
| 51 |
+
"vitals_mentioned": {
|
| 52 |
+
"type": "object",
|
| 53 |
+
"properties": {
|
| 54 |
+
"temperature": {"type": "string"},
|
| 55 |
+
"blood_pressure": {"type": "string"},
|
| 56 |
+
"pulse": {"type": "string"},
|
| 57 |
+
"weight": {"type": "string"},
|
| 58 |
+
},
|
| 59 |
+
},
|
| 60 |
+
"relevant_history": {"type": "string", "description": "Past medical history mentioned"},
|
| 61 |
+
"follow_up_actions": {"type": "array", "items": {"type": "string"}, "description": "Next steps, tests, referrals"},
|
| 62 |
+
},
|
| 63 |
+
"required": ["chief_complaint", "symptoms"],
|
| 64 |
+
},
|
| 65 |
+
)
|
| 66 |
+
|
| 67 |
+
_SYMPTOM_TOOL = types.Tool(function_declarations=[SYMPTOM_SCHEMA])
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def extract_symptoms_cloud(transcript: str) -> dict:
|
| 71 |
+
"""
|
| 72 |
+
Use cloud Gemma 4 function calling to extract structured symptoms.
|
| 73 |
+
Returns a guaranteed-valid dict — no JSON parsing errors.
|
| 74 |
+
"""
|
| 75 |
+
if not transcript.strip():
|
| 76 |
+
return {}
|
| 77 |
+
try:
|
| 78 |
+
response = _client.models.generate_content(
|
| 79 |
+
model=CLOUD_MODEL,
|
| 80 |
+
contents=f"Extract all clinical information from this consultation transcript:\n\n{transcript}",
|
| 81 |
+
config=types.GenerateContentConfig(
|
| 82 |
+
tools=[_SYMPTOM_TOOL],
|
| 83 |
+
tool_config=types.ToolConfig(
|
| 84 |
+
function_calling_config=types.FunctionCallingConfig(
|
| 85 |
+
mode="ANY",
|
| 86 |
+
allowed_function_names=["record_symptoms"],
|
| 87 |
+
)
|
| 88 |
+
),
|
| 89 |
+
temperature=0.1,
|
| 90 |
+
),
|
| 91 |
+
)
|
| 92 |
+
for part in response.candidates[0].content.parts:
|
| 93 |
+
if part.function_call:
|
| 94 |
+
return dict(part.function_call.args)
|
| 95 |
+
except Exception as e:
|
| 96 |
+
print(f"[FunctionCalling] Cloud extraction failed: {e}")
|
| 97 |
+
return {}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
# ── Transcript repair + speaker labelling ─────────────────────────────────────
|
| 101 |
+
|
| 102 |
+
REPAIR_PROMPT = """You are a medical transcription editor. You will receive a raw speech-to-text transcript of a doctor-patient consultation. The transcript may contain:
|
| 103 |
+
- Misheared words or garbled medical terms
|
| 104 |
+
- Missing punctuation and sentence breaks
|
| 105 |
+
- Run-on sentences
|
| 106 |
+
- Filler words (um, uh, like, you know)
|
| 107 |
+
- Incorrectly transcribed drug names, symptoms, or medical terminology
|
| 108 |
+
- Words run together without spaces
|
| 109 |
+
|
| 110 |
+
Your job is to:
|
| 111 |
+
1. REPAIR the transcript — fix obvious errors, correct medical terminology, add punctuation, split run-on sentences, remove filler words
|
| 112 |
+
2. LABEL each speaker — prefix each turn with "Doctor:" or "Patient:"
|
| 113 |
+
- Doctors: ask clinical questions, give diagnoses, prescribe medications, explain treatment
|
| 114 |
+
- Patients: describe symptoms, answer questions, mention their history, express concerns
|
| 115 |
+
3. Start a new labelled line each time the speaker changes
|
| 116 |
+
4. Do NOT add, invent, or remove any clinical facts — only fix language/transcription errors
|
| 117 |
+
5. Keep all mentioned symptoms, medications, durations, and instructions intact
|
| 118 |
+
|
| 119 |
+
Raw transcript:
|
| 120 |
+
{transcript}
|
| 121 |
+
|
| 122 |
+
Cleaned and labelled transcript:"""
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def clean_and_label_transcript(transcript: str) -> str:
|
| 126 |
+
"""
|
| 127 |
+
Repair ASR errors and add Doctor/Patient speaker labels in one Gemma 4 call.
|
| 128 |
+
Falls back to raw transcript on failure.
|
| 129 |
+
"""
|
| 130 |
+
if not transcript.strip():
|
| 131 |
+
return transcript
|
| 132 |
+
try:
|
| 133 |
+
return _call(REPAIR_PROMPT.format(transcript=transcript))
|
| 134 |
+
except Exception as e:
|
| 135 |
+
print(f"[TranscriptRepair] Failed ({e}), using raw transcript.")
|
| 136 |
+
return transcript
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def label_speakers(transcript: str) -> str:
|
| 140 |
+
"""Alias kept for backwards compatibility — now delegates to clean_and_label."""
|
| 141 |
+
return clean_and_label_transcript(transcript)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
# ── SOAP Note (with reasoning mode) ──────────────────────────────────────────
|
| 145 |
+
|
| 146 |
+
SOAP_PROMPT = """You are an experienced medical scribe and clinician. Generate a professional SOAP note from the following doctor-patient consultation transcript.
|
| 147 |
+
|
| 148 |
+
{rag_context}
|
| 149 |
+
|
| 150 |
+
Think carefully about the clinical picture before writing. Format with these exact sections:
|
| 151 |
+
|
| 152 |
+
**S - Subjective**
|
| 153 |
+
(Patient's reported complaints, history, and symptoms in their own words)
|
| 154 |
+
|
| 155 |
+
**O - Objective**
|
| 156 |
+
(Observable, measurable findings: vitals, physical exam findings, lab values if mentioned)
|
| 157 |
+
|
| 158 |
+
**A - Assessment**
|
| 159 |
+
(Clinical impression and working diagnosis. Include the most likely ICD-10 code.)
|
| 160 |
+
|
| 161 |
+
**P - Plan**
|
| 162 |
+
(Medications with correct dosages from the reference above, investigations ordered, referrals, follow-up schedule, patient education)
|
| 163 |
+
|
| 164 |
+
Transcript:
|
| 165 |
+
{transcript}
|
| 166 |
+
|
| 167 |
+
SOAP Note:"""
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def generate_soap_note(transcript: str, rag_context: str = "") -> str:
|
| 171 |
+
if not transcript.strip():
|
| 172 |
+
return "No transcript available."
|
| 173 |
+
context_block = f"\nClinical Reference:\n{rag_context}\n" if rag_context else ""
|
| 174 |
+
try:
|
| 175 |
+
return _call(
|
| 176 |
+
SOAP_PROMPT.format(transcript=transcript, rag_context=context_block),
|
| 177 |
+
use_thinking=True,
|
| 178 |
+
)
|
| 179 |
+
except RuntimeError as e:
|
| 180 |
+
if "thinking" in str(e).lower() or "ThinkingConfig" in str(e):
|
| 181 |
+
# model doesn't support thinking — retry without it
|
| 182 |
+
print("[Reasoning] Thinking not supported on this model, retrying without.")
|
| 183 |
+
return _call(
|
| 184 |
+
SOAP_PROMPT.format(transcript=transcript, rag_context=context_block),
|
| 185 |
+
use_thinking=False,
|
| 186 |
+
)
|
| 187 |
+
raise
|
| 188 |
+
|
| 189 |
+
|
| 190 |
+
# ── Patient Summary ───────────────────────────────────────────────────────────
|
| 191 |
+
|
| 192 |
+
SUMMARY_PROMPT = """You are a compassionate medical communicator. Write a clear, friendly patient summary from this consultation that:
|
| 193 |
+
- Uses simple, non-technical language
|
| 194 |
+
- Explains what was discussed and decided
|
| 195 |
+
- Lists medications and dosages prescribed
|
| 196 |
+
- States next steps and follow-up plan
|
| 197 |
+
- Is encouraging and reassuring in tone
|
| 198 |
+
|
| 199 |
+
Transcript:
|
| 200 |
+
{transcript}
|
| 201 |
+
|
| 202 |
+
Patient Summary:"""
|
| 203 |
+
|
| 204 |
+
|
| 205 |
+
def generate_patient_summary(transcript: str) -> str:
|
| 206 |
+
if not transcript.strip():
|
| 207 |
+
return "No transcript available."
|
| 208 |
+
return _call(SUMMARY_PROMPT.format(transcript=transcript))
|
| 209 |
+
|
| 210 |
+
|
| 211 |
+
# ── Medical image / document analysis ────────────────────────────────────────
|
| 212 |
+
|
| 213 |
+
IMAGE_PROMPT = """You are a medical document analyst. Carefully examine this medical document (lab result, prescription, X-ray report, or clinical record).
|
| 214 |
+
|
| 215 |
+
Extract ALL clinical information present and structure it clearly:
|
| 216 |
+
|
| 217 |
+
**Document Type:** (lab result / prescription / imaging report / other)
|
| 218 |
+
|
| 219 |
+
**Key Findings:**
|
| 220 |
+
(List every test, value, measurement, or finding with its result and reference range if shown)
|
| 221 |
+
|
| 222 |
+
**Abnormal Values:**
|
| 223 |
+
(Highlight any results outside normal range)
|
| 224 |
+
|
| 225 |
+
**Medications / Dosages:**
|
| 226 |
+
(Any drugs, doses, or treatment instructions visible)
|
| 227 |
+
|
| 228 |
+
**Clinical Notes:**
|
| 229 |
+
(Any doctor notes, diagnoses, or instructions on the document)
|
| 230 |
+
|
| 231 |
+
**Summary for SOAP Note:**
|
| 232 |
+
(One paragraph summarising what this document adds to the clinical picture)"""
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
def analyze_medical_document(file_path: str) -> str:
|
| 236 |
+
"""
|
| 237 |
+
Extract clinical data from an uploaded image or PDF using Gemma 4 multimodal.
|
| 238 |
+
Contents format: [Part.from_bytes(...), "text string"] — per official Gemma 4 docs.
|
| 239 |
+
"""
|
| 240 |
+
suffix = Path(file_path).suffix.lower()
|
| 241 |
+
mime_map = {
|
| 242 |
+
".jpg": "image/jpeg",
|
| 243 |
+
".jpeg": "image/jpeg",
|
| 244 |
+
".png": "image/png",
|
| 245 |
+
".webp": "image/webp",
|
| 246 |
+
".pdf": "application/pdf",
|
| 247 |
+
}
|
| 248 |
+
mime_type = mime_map.get(suffix, "image/jpeg")
|
| 249 |
+
|
| 250 |
+
with open(file_path, "rb") as f:
|
| 251 |
+
file_bytes = f.read()
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
response = _client.models.generate_content(
|
| 255 |
+
model=CLOUD_MODEL,
|
| 256 |
+
contents=[
|
| 257 |
+
types.Part.from_bytes(data=file_bytes, mime_type=mime_type),
|
| 258 |
+
IMAGE_PROMPT,
|
| 259 |
+
],
|
| 260 |
+
config=types.GenerateContentConfig(temperature=0.1),
|
| 261 |
+
)
|
| 262 |
+
return response.text.strip()
|
| 263 |
+
except Exception as e:
|
| 264 |
+
raise RuntimeError(f"Image analysis failed: {e}") from e
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
# ── Translation stubs (disabled — NLLB-200 planned) ──────────────────────────
|
| 268 |
+
|
| 269 |
+
def translate_to_twi(english_text: str) -> str:
|
| 270 |
+
return ""
|
| 271 |
+
|
| 272 |
+
|
| 273 |
+
def translate_to_english(twi_text: str) -> str:
|
| 274 |
+
return ""
|
agents/symptom_agent.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import json
|
| 3 |
+
import ollama
|
| 4 |
+
from agents.cloud_agents import extract_symptoms_cloud
|
| 5 |
+
|
| 6 |
+
OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "gemma4:e2b")
|
| 7 |
+
|
| 8 |
+
SYMPTOM_PROMPT = """You are a medical symptom extraction AI. Extract all clinical information from this transcript into valid JSON only.
|
| 9 |
+
|
| 10 |
+
Return ONLY valid JSON — no markdown, no explanation, no code fences:
|
| 11 |
+
{{
|
| 12 |
+
"chief_complaint": "main reason for visit",
|
| 13 |
+
"symptoms": ["list", "of", "symptoms"],
|
| 14 |
+
"duration": "how long symptoms have been present",
|
| 15 |
+
"severity": "mild | moderate | severe",
|
| 16 |
+
"associated_symptoms": ["other symptoms"],
|
| 17 |
+
"medications_mentioned": ["drugs or treatments mentioned"],
|
| 18 |
+
"allergies": ["any allergies mentioned"],
|
| 19 |
+
"vitals_mentioned": {{
|
| 20 |
+
"temperature": null,
|
| 21 |
+
"blood_pressure": null,
|
| 22 |
+
"pulse": null,
|
| 23 |
+
"weight": null
|
| 24 |
+
}},
|
| 25 |
+
"relevant_history": "past medical history",
|
| 26 |
+
"follow_up_actions": ["follow-up steps, tests, referrals"]
|
| 27 |
+
}}
|
| 28 |
+
|
| 29 |
+
Transcript:
|
| 30 |
+
{transcript}"""
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _extract_via_ollama(transcript: str) -> dict:
|
| 34 |
+
"""Primary: local Gemma 4 E2B via Ollama."""
|
| 35 |
+
response = ollama.chat(
|
| 36 |
+
model=OLLAMA_MODEL,
|
| 37 |
+
messages=[{"role": "user", "content": SYMPTOM_PROMPT.format(transcript=transcript)}],
|
| 38 |
+
options={"temperature": 0.1},
|
| 39 |
+
)
|
| 40 |
+
raw = response["message"]["content"].strip()
|
| 41 |
+
|
| 42 |
+
# strip markdown fences if present
|
| 43 |
+
if "```" in raw:
|
| 44 |
+
parts = raw.split("```")
|
| 45 |
+
raw = parts[1] if len(parts) > 1 else parts[0]
|
| 46 |
+
if raw.startswith("json"):
|
| 47 |
+
raw = raw[4:]
|
| 48 |
+
raw = raw.strip()
|
| 49 |
+
|
| 50 |
+
result = json.loads(raw)
|
| 51 |
+
# must be a dict with at least chief_complaint to be valid
|
| 52 |
+
if not isinstance(result, dict) or "chief_complaint" not in result:
|
| 53 |
+
raise ValueError("Invalid symptom structure from Ollama")
|
| 54 |
+
return result
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def extract_symptoms(transcript: str) -> dict:
|
| 58 |
+
"""
|
| 59 |
+
Extract structured symptoms from transcript.
|
| 60 |
+
Tries local Gemma 4 E2B (Ollama) first — fast, private.
|
| 61 |
+
Falls back to cloud Gemma 4 function calling on any failure — guaranteed valid schema.
|
| 62 |
+
"""
|
| 63 |
+
if not transcript.strip():
|
| 64 |
+
return {}
|
| 65 |
+
|
| 66 |
+
try:
|
| 67 |
+
result = _extract_via_ollama(transcript)
|
| 68 |
+
print("[Symptoms] Extracted via local Gemma 4 E2B (Ollama)")
|
| 69 |
+
return result
|
| 70 |
+
except Exception as e:
|
| 71 |
+
print(f"[Symptoms] Ollama failed ({e}), falling back to cloud function calling...")
|
| 72 |
+
|
| 73 |
+
try:
|
| 74 |
+
result = extract_symptoms_cloud(transcript)
|
| 75 |
+
print("[Symptoms] Extracted via cloud Gemma 4 function calling")
|
| 76 |
+
return result
|
| 77 |
+
except Exception as e:
|
| 78 |
+
print(f"[Symptoms] Cloud fallback also failed: {e}")
|
| 79 |
+
return {"error": str(e)}
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def format_symptoms_for_display(symptoms: dict) -> str:
|
| 83 |
+
if not symptoms or "error" in symptoms:
|
| 84 |
+
return "_No symptoms extracted._"
|
| 85 |
+
|
| 86 |
+
lines = []
|
| 87 |
+
if cc := symptoms.get("chief_complaint"):
|
| 88 |
+
lines.append(f"**Chief Complaint:** {cc}")
|
| 89 |
+
if s := symptoms.get("symptoms"):
|
| 90 |
+
lines.append(f"**Symptoms:** {', '.join(s)}")
|
| 91 |
+
if d := symptoms.get("duration"):
|
| 92 |
+
lines.append(f"**Duration:** {d}")
|
| 93 |
+
if sev := symptoms.get("severity"):
|
| 94 |
+
lines.append(f"**Severity:** {sev}")
|
| 95 |
+
if assoc := symptoms.get("associated_symptoms"):
|
| 96 |
+
lines.append(f"**Associated:** {', '.join(assoc)}")
|
| 97 |
+
if meds := symptoms.get("medications_mentioned"):
|
| 98 |
+
lines.append(f"**Medications:** {', '.join(meds)}")
|
| 99 |
+
if allerg := symptoms.get("allergies"):
|
| 100 |
+
lines.append(f"**Allergies:** {', '.join(allerg)}")
|
| 101 |
+
|
| 102 |
+
vitals = symptoms.get("vitals_mentioned") or {}
|
| 103 |
+
vital_parts = [f"{k}: {v}" for k, v in vitals.items() if v]
|
| 104 |
+
if vital_parts:
|
| 105 |
+
lines.append(f"**Vitals:** {', '.join(vital_parts)}")
|
| 106 |
+
if hist := symptoms.get("relevant_history"):
|
| 107 |
+
lines.append(f"**History:** {hist}")
|
| 108 |
+
if followup := symptoms.get("follow_up_actions"):
|
| 109 |
+
actions = "\n".join(f"- {a}" for a in followup)
|
| 110 |
+
lines.append(f"**Follow-up Actions:**\n{actions}")
|
| 111 |
+
|
| 112 |
+
return "\n\n".join(lines) if lines else "_No structured data found._"
|
app.py
ADDED
|
@@ -0,0 +1,490 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import threading
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
|
| 4 |
+
load_dotenv()
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
|
| 8 |
+
from database.db import (
|
| 9 |
+
init_db,
|
| 10 |
+
create_patient,
|
| 11 |
+
get_all_patients,
|
| 12 |
+
get_patient,
|
| 13 |
+
create_session,
|
| 14 |
+
update_transcript,
|
| 15 |
+
close_session,
|
| 16 |
+
save_note,
|
| 17 |
+
save_symptoms,
|
| 18 |
+
get_sessions_for_patient,
|
| 19 |
+
get_note_for_session,
|
| 20 |
+
get_symptoms_for_session,
|
| 21 |
+
)
|
| 22 |
+
from transcription.transcriber import LiveTranscriber
|
| 23 |
+
from agents.symptom_agent import extract_symptoms, format_symptoms_for_display
|
| 24 |
+
from agents.cloud_agents import generate_soap_note, generate_patient_summary, clean_and_label_transcript, analyze_medical_document
|
| 25 |
+
from rag.retriever import (
|
| 26 |
+
ensure_kb,
|
| 27 |
+
retrieve_icd_codes,
|
| 28 |
+
retrieve_drug_info,
|
| 29 |
+
format_icd_context,
|
| 30 |
+
format_drug_context,
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
# ── Startup ───────────────────────────────────────────────────────────────────
|
| 34 |
+
|
| 35 |
+
init_db()
|
| 36 |
+
ensure_kb()
|
| 37 |
+
|
| 38 |
+
# ── State ─────────────────────────────────────────────────────────────────────
|
| 39 |
+
|
| 40 |
+
_transcriber: LiveTranscriber | None = None
|
| 41 |
+
_transcript_parts: list[str] = []
|
| 42 |
+
_labelled_transcript: str = ""
|
| 43 |
+
_document_analysis: str = ""
|
| 44 |
+
_current_session_id: int | None = None
|
| 45 |
+
_transcript_lock = threading.Lock()
|
| 46 |
+
|
| 47 |
+
# ── Helpers ───────────────────────────────────────────────────────────────────
|
| 48 |
+
|
| 49 |
+
def _patient_choices() -> list[str]:
|
| 50 |
+
patients = get_all_patients()
|
| 51 |
+
return [f"{p['id']} — {p['name']}" for p in patients] if patients else []
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _parse_patient_choice(choice: str) -> int:
|
| 55 |
+
return int(choice.split("—")[0].strip())
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _full_transcript() -> str:
|
| 59 |
+
with _transcript_lock:
|
| 60 |
+
return " ".join(_transcript_parts)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def _format_icd_panel(codes: list[dict]) -> str:
|
| 64 |
+
if not codes:
|
| 65 |
+
return "_No ICD-10 suggestions._"
|
| 66 |
+
lines = ["### Suggested ICD-10 Codes\n"]
|
| 67 |
+
for c in codes:
|
| 68 |
+
lines.append(f"- **{c['code']}** — {c['description']} *(confidence: {c['score']})*")
|
| 69 |
+
return "\n".join(lines)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def _format_drug_panel(drugs: list[dict]) -> str:
|
| 73 |
+
if not drugs:
|
| 74 |
+
return "_No drug references matched._"
|
| 75 |
+
lines = ["### Drug Reference\n"]
|
| 76 |
+
for d in drugs:
|
| 77 |
+
lines.append(
|
| 78 |
+
f"**{d['name']}** ({d['class']})\n"
|
| 79 |
+
f"- Adult dose: {d['adult_dose']}\n"
|
| 80 |
+
f"- Indications: {d['indications']}\n"
|
| 81 |
+
f"- Caution: {d['contraindications']}\n"
|
| 82 |
+
)
|
| 83 |
+
return "\n".join(lines)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# ── Tab 1: Live Consultation ──────────────────────────────────────────────────
|
| 87 |
+
|
| 88 |
+
def register_patient(name, dob, gender, phone):
|
| 89 |
+
if not name.strip():
|
| 90 |
+
return gr.update(), "Please enter a patient name."
|
| 91 |
+
pid = create_patient(name.strip(), dob, gender, phone)
|
| 92 |
+
choices = _patient_choices()
|
| 93 |
+
new_val = next((c for c in choices if c.startswith(str(pid))), choices[-1] if choices else None)
|
| 94 |
+
return gr.update(choices=choices, value=new_val), f"Patient '{name}' registered (ID {pid})."
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def start_consultation(patient_choice, doctor_name):
|
| 98 |
+
global _transcriber, _transcript_parts, _current_session_id
|
| 99 |
+
|
| 100 |
+
if not patient_choice:
|
| 101 |
+
return "No patient selected.", "", gr.update(interactive=False), gr.update(interactive=True)
|
| 102 |
+
|
| 103 |
+
pid = _parse_patient_choice(patient_choice)
|
| 104 |
+
_current_session_id = create_session(pid, doctor_name or "Doctor")
|
| 105 |
+
|
| 106 |
+
with _transcript_lock:
|
| 107 |
+
_transcript_parts.clear()
|
| 108 |
+
global _labelled_transcript, _document_analysis
|
| 109 |
+
_labelled_transcript = ""
|
| 110 |
+
_document_analysis = ""
|
| 111 |
+
|
| 112 |
+
def on_text(text):
|
| 113 |
+
with _transcript_lock:
|
| 114 |
+
_transcript_parts.append(text)
|
| 115 |
+
_transcriber = LiveTranscriber(on_text=on_text)
|
| 116 |
+
_transcriber.start()
|
| 117 |
+
|
| 118 |
+
return (
|
| 119 |
+
"Recording... speak clearly.",
|
| 120 |
+
"",
|
| 121 |
+
gr.update(interactive=True),
|
| 122 |
+
gr.update(interactive=False),
|
| 123 |
+
)
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def poll_transcript():
|
| 127 |
+
return _full_transcript()
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def stop_consultation():
|
| 131 |
+
global _transcriber, _labelled_transcript
|
| 132 |
+
|
| 133 |
+
if _transcriber:
|
| 134 |
+
_transcriber.stop()
|
| 135 |
+
_transcriber = None
|
| 136 |
+
|
| 137 |
+
raw = _full_transcript()
|
| 138 |
+
if not raw:
|
| 139 |
+
return "No audio captured.", "", gr.update(interactive=False), gr.update(interactive=True)
|
| 140 |
+
|
| 141 |
+
if _current_session_id:
|
| 142 |
+
update_transcript(_current_session_id, raw)
|
| 143 |
+
|
| 144 |
+
_labelled_transcript = clean_and_label_transcript(raw)
|
| 145 |
+
|
| 146 |
+
return (
|
| 147 |
+
"Consultation ended. Transcript cleaned ✓ Click 'Generate Notes' to proceed.",
|
| 148 |
+
_labelled_transcript,
|
| 149 |
+
gr.update(interactive=False),
|
| 150 |
+
gr.update(interactive=True),
|
| 151 |
+
)
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
def upload_document(file):
|
| 155 |
+
"""Analyse an uploaded medical document with Gemma 4 vision."""
|
| 156 |
+
global _document_analysis
|
| 157 |
+
if file is None:
|
| 158 |
+
_document_analysis = ""
|
| 159 |
+
return "_No document uploaded._"
|
| 160 |
+
try:
|
| 161 |
+
result = analyze_medical_document(file.name)
|
| 162 |
+
_document_analysis = result
|
| 163 |
+
return result
|
| 164 |
+
except Exception as e:
|
| 165 |
+
_document_analysis = ""
|
| 166 |
+
return f"_Document analysis failed: {e}_"
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def generate_notes():
|
| 170 |
+
"""RAG retrieval → cloud agents → save to DB."""
|
| 171 |
+
# Use labelled transcript if available, fall back to raw
|
| 172 |
+
transcript = _labelled_transcript or _full_transcript()
|
| 173 |
+
if not transcript:
|
| 174 |
+
return "No transcript available.", "No transcript available.", "_No symptoms._", "_No ICD codes._", "_No drug info._", "", ""
|
| 175 |
+
|
| 176 |
+
# 1. Extract symptoms locally (Gemma 4 E2B via Ollama)
|
| 177 |
+
symptoms = extract_symptoms(transcript)
|
| 178 |
+
symptoms_md = format_symptoms_for_display(symptoms)
|
| 179 |
+
|
| 180 |
+
# 2. RAG retrieval
|
| 181 |
+
chief = symptoms.get("chief_complaint", "")
|
| 182 |
+
sym_list = symptoms.get("symptoms", [])
|
| 183 |
+
meds_list = symptoms.get("medications_mentioned", [])
|
| 184 |
+
rag_query = f"{chief} {' '.join(sym_list)}".strip() or transcript[:300]
|
| 185 |
+
|
| 186 |
+
icd_codes = retrieve_icd_codes(rag_query, n=5)
|
| 187 |
+
drug_info = retrieve_drug_info(meds_list, n=3) if meds_list else []
|
| 188 |
+
|
| 189 |
+
doc_section = f"\nUploaded Medical Document (lab result / prescription / report):\n{_document_analysis}\n" if _document_analysis else ""
|
| 190 |
+
rag_context = "\n".join(filter(None, [
|
| 191 |
+
format_icd_context(icd_codes),
|
| 192 |
+
format_drug_context(drug_info),
|
| 193 |
+
doc_section,
|
| 194 |
+
]))
|
| 195 |
+
|
| 196 |
+
# 3. Cloud agents
|
| 197 |
+
try:
|
| 198 |
+
soap = generate_soap_note(transcript, rag_context=rag_context)
|
| 199 |
+
except Exception as e:
|
| 200 |
+
soap = f"_SOAP note generation failed: {e}_"
|
| 201 |
+
|
| 202 |
+
try:
|
| 203 |
+
summary_en = generate_patient_summary(transcript)
|
| 204 |
+
except Exception as e:
|
| 205 |
+
summary_en = f"_Summary generation failed: {e}_"
|
| 206 |
+
|
| 207 |
+
# 4. Persist
|
| 208 |
+
if _current_session_id:
|
| 209 |
+
save_note(_current_session_id, soap, summary_en, summary_twi="")
|
| 210 |
+
save_symptoms(_current_session_id, symptoms)
|
| 211 |
+
close_session(_current_session_id)
|
| 212 |
+
|
| 213 |
+
return soap, summary_en, symptoms_md, _format_icd_panel(icd_codes), _format_drug_panel(drug_info), soap, summary_en
|
| 214 |
+
|
| 215 |
+
|
| 216 |
+
# ── Tab 2: Patient Records ────────────────────────────────────────────────────
|
| 217 |
+
|
| 218 |
+
def load_patient_records(patient_choice):
|
| 219 |
+
if not patient_choice:
|
| 220 |
+
return "Select a patient.", "", "", ""
|
| 221 |
+
|
| 222 |
+
pid = _parse_patient_choice(patient_choice)
|
| 223 |
+
patient = get_patient(pid)
|
| 224 |
+
if not patient:
|
| 225 |
+
return "Patient not found.", "", "", ""
|
| 226 |
+
|
| 227 |
+
sessions = get_sessions_for_patient(pid)
|
| 228 |
+
if not sessions:
|
| 229 |
+
return f"No sessions found for {patient['name']}.", "", "", ""
|
| 230 |
+
|
| 231 |
+
latest = sessions[0]
|
| 232 |
+
sid = latest["id"]
|
| 233 |
+
note = get_note_for_session(sid)
|
| 234 |
+
symptoms = get_symptoms_for_session(sid)
|
| 235 |
+
|
| 236 |
+
session_info = (
|
| 237 |
+
f"**Patient:** {patient['name']} | **DOB:** {patient.get('dob', 'N/A')} | "
|
| 238 |
+
f"**Gender:** {patient.get('gender', 'N/A')}\n\n"
|
| 239 |
+
f"**Session:** {latest['date']} | **Doctor:** {latest.get('doctor', 'N/A')}"
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
soap = note["soap_note"] if note else "_No SOAP note found._"
|
| 243 |
+
summary = note["summary_en"] if note else "_No summary found._"
|
| 244 |
+
symptoms_md = format_symptoms_for_display(symptoms)
|
| 245 |
+
|
| 246 |
+
return session_info, soap, summary, symptoms_md
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
# ── CSS ───────────────────────────────────────────────────────────────────────
|
| 250 |
+
|
| 251 |
+
CSS = """
|
| 252 |
+
body, .gradio-container { font-family: 'Segoe UI', system-ui, sans-serif; }
|
| 253 |
+
|
| 254 |
+
#header-banner {
|
| 255 |
+
background: linear-gradient(135deg, #1a6eb5, #0d4f8a);
|
| 256 |
+
color: white;
|
| 257 |
+
padding: 20px 28px;
|
| 258 |
+
border-radius: 12px;
|
| 259 |
+
margin-bottom: 20px;
|
| 260 |
+
}
|
| 261 |
+
#header-banner h1 { margin: 0; font-size: 1.9rem; font-weight: 700; letter-spacing: -0.5px; }
|
| 262 |
+
#header-banner p { margin: 5px 0 0; opacity: 0.85; font-size: 0.95rem; }
|
| 263 |
+
|
| 264 |
+
/* Fix markdown panels — transparent so they inherit theme bg */
|
| 265 |
+
.gr-markdown, .svelte-1ed2p3z, [data-testid="markdown"] {
|
| 266 |
+
background: transparent !important;
|
| 267 |
+
}
|
| 268 |
+
|
| 269 |
+
/* Note cards — rendered SOAP/summary display */
|
| 270 |
+
.note-card {
|
| 271 |
+
border: 1px solid #2d4a6e;
|
| 272 |
+
border-radius: 8px;
|
| 273 |
+
padding: 16px 20px !important;
|
| 274 |
+
min-height: 200px;
|
| 275 |
+
font-size: 0.92rem;
|
| 276 |
+
line-height: 1.7;
|
| 277 |
+
}
|
| 278 |
+
.note-card h1, .note-card h2, .note-card h3 {
|
| 279 |
+
color: #4a9eff;
|
| 280 |
+
margin-top: 12px;
|
| 281 |
+
font-size: 1rem;
|
| 282 |
+
}
|
| 283 |
+
.note-card strong { color: #7ec8ff; }
|
| 284 |
+
.note-card p { margin: 6px 0; }
|
| 285 |
+
.note-card ul, .note-card ol { padding-left: 20px; margin: 4px 0; }
|
| 286 |
+
|
| 287 |
+
/* Status bar */
|
| 288 |
+
.status-bar p { font-weight: 600; color: #4a9eff; font-size: 1rem; }
|
| 289 |
+
|
| 290 |
+
/* RAG accordion open panel */
|
| 291 |
+
.rag-content {
|
| 292 |
+
border-left: 3px solid #1a6eb5;
|
| 293 |
+
padding: 10px 14px;
|
| 294 |
+
border-radius: 0 6px 6px 0;
|
| 295 |
+
font-size: 0.9rem;
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
/* Tighten up accordion headers */
|
| 299 |
+
.gr-accordion .label-wrap { font-weight: 600 !important; }
|
| 300 |
+
|
| 301 |
+
/* Recording pulse indicator */
|
| 302 |
+
@keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.4} }
|
| 303 |
+
.recording p { animation: pulse 1.4s ease-in-out infinite; color: #ff4444 !important; font-weight: 700; }
|
| 304 |
+
"""
|
| 305 |
+
|
| 306 |
+
# ── Layout ────────────────────────────────────────────────────────────────────
|
| 307 |
+
|
| 308 |
+
with gr.Blocks(title="Hospital Copilot") as demo:
|
| 309 |
+
|
| 310 |
+
gr.HTML("""
|
| 311 |
+
<div id="header-banner">
|
| 312 |
+
<h1>🏥 Hospital Copilot</h1>
|
| 313 |
+
<p>AI-powered medical documentation · Gemma 4 · RAG-grounded · Ghana</p>
|
| 314 |
+
</div>
|
| 315 |
+
""")
|
| 316 |
+
|
| 317 |
+
with gr.Tabs():
|
| 318 |
+
|
| 319 |
+
# ── Tab 1: Live Consultation ──────────────────────────────────────
|
| 320 |
+
with gr.Tab("🎙️ Live Consultation"):
|
| 321 |
+
with gr.Row(equal_height=False):
|
| 322 |
+
|
| 323 |
+
# Left column — patient panel
|
| 324 |
+
with gr.Column(scale=1, min_width=280):
|
| 325 |
+
with gr.Group():
|
| 326 |
+
gr.Markdown("#### 👤 Select Patient")
|
| 327 |
+
patient_dd = gr.Dropdown(label="Patient", choices=_patient_choices(), interactive=True)
|
| 328 |
+
doctor_name = gr.Textbox(label="Doctor", placeholder="Dr. Mensah")
|
| 329 |
+
|
| 330 |
+
with gr.Accordion("➕ Register New Patient", open=False):
|
| 331 |
+
reg_name = gr.Textbox(label="Full Name", placeholder="Kofi Agyeman")
|
| 332 |
+
reg_dob = gr.Textbox(label="Date of Birth", placeholder="1985-03-15")
|
| 333 |
+
reg_gender = gr.Radio(["Male", "Female", "Other"], label="Gender", value="Male")
|
| 334 |
+
reg_phone = gr.Textbox(label="Phone", placeholder="+233 24 000 0000")
|
| 335 |
+
reg_btn = gr.Button("Register Patient", variant="primary")
|
| 336 |
+
reg_status = gr.Markdown()
|
| 337 |
+
|
| 338 |
+
reg_btn.click(
|
| 339 |
+
register_patient,
|
| 340 |
+
inputs=[reg_name, reg_dob, reg_gender, reg_phone],
|
| 341 |
+
outputs=[patient_dd, reg_status],
|
| 342 |
+
)
|
| 343 |
+
|
| 344 |
+
# Right column — consultation
|
| 345 |
+
with gr.Column(scale=3):
|
| 346 |
+
status_txt = gr.Markdown("_Ready. Select a patient and click Start._", elem_classes=["status-bar"])
|
| 347 |
+
|
| 348 |
+
with gr.Row():
|
| 349 |
+
start_btn = gr.Button("▶ Start Consultation", variant="primary", scale=1)
|
| 350 |
+
stop_btn = gr.Button("⏹ End Consultation", variant="stop", scale=1, interactive=False)
|
| 351 |
+
|
| 352 |
+
live_transcript = gr.Textbox(
|
| 353 |
+
label="Transcript (cleaned & speaker-labelled after consultation ends)",
|
| 354 |
+
lines=8, max_lines=16,
|
| 355 |
+
interactive=False,
|
| 356 |
+
placeholder="Transcript streams here as you speak. After you click End Consultation, Gemma 4 cleans and labels it automatically.",
|
| 357 |
+
)
|
| 358 |
+
timer = gr.Timer(value=2)
|
| 359 |
+
timer.tick(poll_transcript, outputs=live_transcript)
|
| 360 |
+
|
| 361 |
+
with gr.Accordion("🩺 Extracted Symptoms", open=False):
|
| 362 |
+
symptoms_live = gr.Markdown("_Will populate after Generate Notes._")
|
| 363 |
+
|
| 364 |
+
gr.Markdown("---")
|
| 365 |
+
|
| 366 |
+
with gr.Accordion("📎 Upload Medical Document (Lab Result / Prescription / Report)", open=False):
|
| 367 |
+
gr.Markdown(
|
| 368 |
+
"_Optional — upload a photo or PDF of a lab result, prescription, or any medical document. "
|
| 369 |
+
"Gemma 4 will read it and include the findings in the SOAP note automatically._"
|
| 370 |
+
)
|
| 371 |
+
with gr.Row():
|
| 372 |
+
doc_upload = gr.File(
|
| 373 |
+
label="Upload document",
|
| 374 |
+
file_types=[".jpg", ".jpeg", ".png", ".webp", ".pdf"],
|
| 375 |
+
scale=1,
|
| 376 |
+
)
|
| 377 |
+
doc_analyse_btn = gr.Button("🔍 Analyse Document", variant="secondary", scale=0)
|
| 378 |
+
doc_result = gr.Markdown("_No document uploaded._")
|
| 379 |
+
doc_analyse_btn.click(upload_document, inputs=[doc_upload], outputs=[doc_result])
|
| 380 |
+
|
| 381 |
+
generate_btn = gr.Button("⚡ Generate Notes from Transcript", variant="primary", size="lg")
|
| 382 |
+
|
| 383 |
+
# RAG panels — inside accordions so they don't show as white boxes
|
| 384 |
+
with gr.Row():
|
| 385 |
+
with gr.Accordion("🏷️ ICD-10 Suggestions", open=True):
|
| 386 |
+
icd_panel = gr.Markdown("_Click Generate Notes to see suggestions._")
|
| 387 |
+
with gr.Accordion("💊 Drug Reference", open=True):
|
| 388 |
+
drug_panel = gr.Markdown("_Click Generate Notes to see drug info._")
|
| 389 |
+
|
| 390 |
+
gr.Markdown("### 📋 Generated Notes")
|
| 391 |
+
with gr.Row():
|
| 392 |
+
with gr.Column():
|
| 393 |
+
gr.Markdown("#### 🗒️ SOAP Note")
|
| 394 |
+
soap_out = gr.Markdown(
|
| 395 |
+
"_SOAP note will appear here after generating._",
|
| 396 |
+
elem_classes=["note-card"],
|
| 397 |
+
)
|
| 398 |
+
with gr.Accordion("✏️ Edit SOAP Note", open=False):
|
| 399 |
+
soap_edit = gr.Textbox(lines=18, interactive=True, show_label=False)
|
| 400 |
+
|
| 401 |
+
with gr.Column():
|
| 402 |
+
gr.Markdown("#### 📄 Patient Summary")
|
| 403 |
+
summary_en_out = gr.Markdown(
|
| 404 |
+
"_Patient summary will appear here after generating._",
|
| 405 |
+
elem_classes=["note-card"],
|
| 406 |
+
)
|
| 407 |
+
with gr.Accordion("✏️ Edit Summary", open=False):
|
| 408 |
+
summary_edit = gr.Textbox(lines=10, interactive=True, show_label=False)
|
| 409 |
+
|
| 410 |
+
start_btn.click(
|
| 411 |
+
start_consultation,
|
| 412 |
+
inputs=[patient_dd, doctor_name],
|
| 413 |
+
outputs=[status_txt, live_transcript, stop_btn, start_btn],
|
| 414 |
+
)
|
| 415 |
+
stop_btn.click(
|
| 416 |
+
stop_consultation,
|
| 417 |
+
outputs=[status_txt, live_transcript, stop_btn, start_btn],
|
| 418 |
+
)
|
| 419 |
+
generate_btn.click(
|
| 420 |
+
generate_notes,
|
| 421 |
+
outputs=[soap_out, summary_en_out, symptoms_live, icd_panel, drug_panel, soap_edit, summary_edit],
|
| 422 |
+
)
|
| 423 |
+
|
| 424 |
+
# ── Tab 2: Patient Records ────────────────────────────────────────
|
| 425 |
+
with gr.Tab("📁 Patient Records"):
|
| 426 |
+
with gr.Row():
|
| 427 |
+
records_patient_dd = gr.Dropdown(
|
| 428 |
+
label="Select Patient", choices=_patient_choices(), interactive=True, scale=3,
|
| 429 |
+
)
|
| 430 |
+
load_btn = gr.Button("Load Records", variant="primary", scale=1)
|
| 431 |
+
|
| 432 |
+
session_info_md = gr.Markdown()
|
| 433 |
+
|
| 434 |
+
with gr.Row():
|
| 435 |
+
with gr.Column():
|
| 436 |
+
gr.Markdown("#### 🗒️ SOAP Note")
|
| 437 |
+
rec_soap = gr.Markdown("_Load a patient to see their SOAP note._", elem_classes=["note-card"])
|
| 438 |
+
with gr.Column():
|
| 439 |
+
gr.Markdown("#### 📄 Patient Summary")
|
| 440 |
+
rec_summary = gr.Markdown("_Load a patient to see their summary._", elem_classes=["note-card"])
|
| 441 |
+
|
| 442 |
+
with gr.Accordion("🩺 Extracted Symptoms", open=False):
|
| 443 |
+
rec_symptoms = gr.Markdown()
|
| 444 |
+
|
| 445 |
+
load_btn.click(
|
| 446 |
+
load_patient_records,
|
| 447 |
+
inputs=[records_patient_dd],
|
| 448 |
+
outputs=[session_info_md, rec_soap, rec_summary, rec_symptoms],
|
| 449 |
+
)
|
| 450 |
+
reg_btn.click(
|
| 451 |
+
lambda: gr.update(choices=_patient_choices()),
|
| 452 |
+
outputs=[records_patient_dd],
|
| 453 |
+
)
|
| 454 |
+
|
| 455 |
+
# ── Tab 3: About ──────────────────────────────────────────────────
|
| 456 |
+
with gr.Tab("ℹ️ About"):
|
| 457 |
+
gr.Markdown("""
|
| 458 |
+
## Hospital Copilot — Gemma 4 for Good
|
| 459 |
+
|
| 460 |
+
**Reducing doctor burnout. Improving care quality. Built for Ghana.**
|
| 461 |
+
|
| 462 |
+
### How it works
|
| 463 |
+
1. **Live Transcription** — faster-whisper converts speech to text in real time on CPU
|
| 464 |
+
2. **Symptom Extraction** — Gemma 4 E2B (local, Ollama) extracts structured clinical JSON
|
| 465 |
+
3. **RAG Retrieval** — sentence-transformers + ChromaDB matches ICD-10 codes and drug dosages
|
| 466 |
+
4. **SOAP Note Generation** — Gemma 4 26B (cloud) writes a grounded, accurate medical note
|
| 467 |
+
5. **Patient Summary** — plain-language summary the patient can take home
|
| 468 |
+
6. **Structured Records** — everything saved to local SQLite
|
| 469 |
+
|
| 470 |
+
### RAG Knowledge Base
|
| 471 |
+
| Collection | Entries | Source |
|
| 472 |
+
|---|---|---|
|
| 473 |
+
| ICD-10 codes | 90+ | Ghana-relevant + general conditions |
|
| 474 |
+
| Essential medicines | 40+ | WHO Essential Medicines List |
|
| 475 |
+
|
| 476 |
+
### Technology Stack
|
| 477 |
+
| Component | Model | Where |
|
| 478 |
+
|---|---|---|
|
| 479 |
+
| Speech-to-Text | faster-whisper (base) | Local CPU |
|
| 480 |
+
| Symptom Extraction | Gemma 4 E2B (Q4_K_M) | Local CPU via Ollama |
|
| 481 |
+
| Embeddings | all-MiniLM-L6-v2 | Local CPU |
|
| 482 |
+
| Vector Store | ChromaDB | Local disk |
|
| 483 |
+
| SOAP / Summary | Gemma 4 26B-IT | Google AI Studio API |
|
| 484 |
+
| Storage | SQLite | Local |
|
| 485 |
+
| UI | Gradio | Desktop |
|
| 486 |
+
""")
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
if __name__ == "__main__":
|
| 490 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, share=False, css=CSS)
|
database/__init__.py
ADDED
|
File without changes
|
database/db.py
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sqlite3
|
| 2 |
+
import json
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
DB_PATH = Path(__file__).parent.parent / "hospital_copilot.db"
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_conn():
|
| 10 |
+
conn = sqlite3.connect(DB_PATH)
|
| 11 |
+
conn.row_factory = sqlite3.Row
|
| 12 |
+
return conn
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def init_db():
|
| 16 |
+
with get_conn() as conn:
|
| 17 |
+
conn.executescript("""
|
| 18 |
+
CREATE TABLE IF NOT EXISTS patients (
|
| 19 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 20 |
+
name TEXT NOT NULL,
|
| 21 |
+
dob TEXT,
|
| 22 |
+
gender TEXT,
|
| 23 |
+
phone TEXT,
|
| 24 |
+
language TEXT DEFAULT 'en',
|
| 25 |
+
created_at TEXT DEFAULT (datetime('now'))
|
| 26 |
+
);
|
| 27 |
+
|
| 28 |
+
CREATE TABLE IF NOT EXISTS sessions (
|
| 29 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 30 |
+
patient_id INTEGER NOT NULL REFERENCES patients(id),
|
| 31 |
+
doctor TEXT,
|
| 32 |
+
date TEXT DEFAULT (datetime('now')),
|
| 33 |
+
transcript TEXT,
|
| 34 |
+
status TEXT DEFAULT 'open'
|
| 35 |
+
);
|
| 36 |
+
|
| 37 |
+
CREATE TABLE IF NOT EXISTS notes (
|
| 38 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 39 |
+
session_id INTEGER NOT NULL REFERENCES sessions(id),
|
| 40 |
+
soap_note TEXT,
|
| 41 |
+
summary_en TEXT,
|
| 42 |
+
summary_twi TEXT,
|
| 43 |
+
created_at TEXT DEFAULT (datetime('now'))
|
| 44 |
+
);
|
| 45 |
+
|
| 46 |
+
CREATE TABLE IF NOT EXISTS symptoms (
|
| 47 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 48 |
+
session_id INTEGER NOT NULL REFERENCES sessions(id),
|
| 49 |
+
data TEXT,
|
| 50 |
+
created_at TEXT DEFAULT (datetime('now'))
|
| 51 |
+
);
|
| 52 |
+
""")
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
# --- Patient helpers ---
|
| 56 |
+
|
| 57 |
+
def create_patient(name: str, dob: str = "", gender: str = "", phone: str = "", language: str = "en") -> int:
|
| 58 |
+
with get_conn() as conn:
|
| 59 |
+
cur = conn.execute(
|
| 60 |
+
"INSERT INTO patients (name, dob, gender, phone, language) VALUES (?, ?, ?, ?, ?)",
|
| 61 |
+
(name, dob, gender, phone, language),
|
| 62 |
+
)
|
| 63 |
+
return cur.lastrowid
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def get_all_patients() -> list[dict]:
|
| 67 |
+
with get_conn() as conn:
|
| 68 |
+
rows = conn.execute("SELECT * FROM patients ORDER BY name").fetchall()
|
| 69 |
+
return [dict(r) for r in rows]
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def get_patient(patient_id: int) -> dict | None:
|
| 73 |
+
with get_conn() as conn:
|
| 74 |
+
row = conn.execute("SELECT * FROM patients WHERE id = ?", (patient_id,)).fetchone()
|
| 75 |
+
return dict(row) if row else None
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# --- Session helpers ---
|
| 79 |
+
|
| 80 |
+
def create_session(patient_id: int, doctor: str = "Dr. Unknown") -> int:
|
| 81 |
+
with get_conn() as conn:
|
| 82 |
+
cur = conn.execute(
|
| 83 |
+
"INSERT INTO sessions (patient_id, doctor) VALUES (?, ?)",
|
| 84 |
+
(patient_id, doctor),
|
| 85 |
+
)
|
| 86 |
+
return cur.lastrowid
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def update_transcript(session_id: int, transcript: str):
|
| 90 |
+
with get_conn() as conn:
|
| 91 |
+
conn.execute(
|
| 92 |
+
"UPDATE sessions SET transcript = ? WHERE id = ?",
|
| 93 |
+
(transcript, session_id),
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def close_session(session_id: int):
|
| 98 |
+
with get_conn() as conn:
|
| 99 |
+
conn.execute(
|
| 100 |
+
"UPDATE sessions SET status = 'closed' WHERE id = ?",
|
| 101 |
+
(session_id,),
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
def get_sessions_for_patient(patient_id: int) -> list[dict]:
|
| 106 |
+
with get_conn() as conn:
|
| 107 |
+
rows = conn.execute(
|
| 108 |
+
"SELECT * FROM sessions WHERE patient_id = ? ORDER BY date DESC",
|
| 109 |
+
(patient_id,),
|
| 110 |
+
).fetchall()
|
| 111 |
+
return [dict(r) for r in rows]
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# --- Notes helpers ---
|
| 115 |
+
|
| 116 |
+
def save_note(session_id: int, soap_note: str, summary_en: str, summary_twi: str) -> int:
|
| 117 |
+
with get_conn() as conn:
|
| 118 |
+
cur = conn.execute(
|
| 119 |
+
"INSERT INTO notes (session_id, soap_note, summary_en, summary_twi) VALUES (?, ?, ?, ?)",
|
| 120 |
+
(session_id, soap_note, summary_en, summary_twi),
|
| 121 |
+
)
|
| 122 |
+
return cur.lastrowid
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def get_note_for_session(session_id: int) -> dict | None:
|
| 126 |
+
with get_conn() as conn:
|
| 127 |
+
row = conn.execute(
|
| 128 |
+
"SELECT * FROM notes WHERE session_id = ? ORDER BY created_at DESC LIMIT 1",
|
| 129 |
+
(session_id,),
|
| 130 |
+
).fetchone()
|
| 131 |
+
return dict(row) if row else None
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
# --- Symptom helpers ---
|
| 135 |
+
|
| 136 |
+
def save_symptoms(session_id: int, symptoms: dict):
|
| 137 |
+
with get_conn() as conn:
|
| 138 |
+
conn.execute(
|
| 139 |
+
"INSERT INTO symptoms (session_id, data) VALUES (?, ?)",
|
| 140 |
+
(session_id, json.dumps(symptoms)),
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def get_symptoms_for_session(session_id: int) -> dict:
|
| 145 |
+
with get_conn() as conn:
|
| 146 |
+
row = conn.execute(
|
| 147 |
+
"SELECT data FROM symptoms WHERE session_id = ? ORDER BY created_at DESC LIMIT 1",
|
| 148 |
+
(session_id,),
|
| 149 |
+
).fetchone()
|
| 150 |
+
return json.loads(row["data"]) if row else {}
|
rag/__init__.py
ADDED
|
File without changes
|
rag/data/essential_medicines.json
ADDED
|
@@ -0,0 +1,416 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"name": "Artemether-Lumefantrine (Coartem)",
|
| 4 |
+
"class": "Antimalarial",
|
| 5 |
+
"adult_dose": "4 tablets at 0, 8, 24, 36, 48 and 60 hours (80mg/480mg per dose)",
|
| 6 |
+
"pediatric_dose": "Weight-based: 5-14kg = 1 tab, 15-24kg = 2 tabs, 25-34kg = 3 tabs per dose",
|
| 7 |
+
"indications": "Uncomplicated Plasmodium falciparum malaria",
|
| 8 |
+
"contraindications": "First trimester pregnancy, severe malaria",
|
| 9 |
+
"notes": "Take with food or milk to improve absorption"
|
| 10 |
+
},
|
| 11 |
+
{
|
| 12 |
+
"name": "Artesunate IV",
|
| 13 |
+
"class": "Antimalarial",
|
| 14 |
+
"adult_dose": "2.4mg/kg IV at 0, 12, 24 hours then daily",
|
| 15 |
+
"pediatric_dose": "2.4mg/kg IV same schedule",
|
| 16 |
+
"indications": "Severe malaria, cerebral malaria",
|
| 17 |
+
"contraindications": "Known hypersensitivity",
|
| 18 |
+
"notes": "Preferred over quinine for severe malaria"
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"name": "Amoxicillin",
|
| 22 |
+
"class": "Antibiotic (Beta-lactam)",
|
| 23 |
+
"adult_dose": "500mg three times daily for 5-7 days",
|
| 24 |
+
"pediatric_dose": "25-50mg/kg/day in three divided doses",
|
| 25 |
+
"indications": "Respiratory infections, ear infections, UTI, skin infections, H. pylori",
|
| 26 |
+
"contraindications": "Penicillin allergy",
|
| 27 |
+
"notes": "Can be taken with or without food"
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"name": "Amoxicillin-Clavulanate (Augmentin)",
|
| 31 |
+
"class": "Antibiotic (Beta-lactam + inhibitor)",
|
| 32 |
+
"adult_dose": "625mg (500/125mg) three times daily for 5-7 days",
|
| 33 |
+
"pediatric_dose": "25-45mg/kg/day amoxicillin component in divided doses",
|
| 34 |
+
"indications": "Resistant infections, sinusitis, pneumonia, UTI, skin infections",
|
| 35 |
+
"contraindications": "Penicillin allergy, cholestatic jaundice from prior use",
|
| 36 |
+
"notes": "Take with food to reduce GI side effects"
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"name": "Azithromycin",
|
| 40 |
+
"class": "Antibiotic (Macrolide)",
|
| 41 |
+
"adult_dose": "500mg on day 1, then 250mg daily for 4 days (or 500mg daily x 3 days)",
|
| 42 |
+
"pediatric_dose": "10mg/kg on day 1, then 5mg/kg daily for 4 days",
|
| 43 |
+
"indications": "Respiratory infections, typhoid, STIs, community-acquired pneumonia",
|
| 44 |
+
"contraindications": "Macrolide allergy, liver disease",
|
| 45 |
+
"notes": "Take 1 hour before or 2 hours after meals"
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"name": "Ciprofloxacin",
|
| 49 |
+
"class": "Antibiotic (Fluoroquinolone)",
|
| 50 |
+
"adult_dose": "500mg twice daily for 5-7 days (UTI: 250-500mg twice daily x 3 days)",
|
| 51 |
+
"pediatric_dose": "Not recommended in children under 18 except specific indications",
|
| 52 |
+
"indications": "UTI, typhoid, diarrhoea, respiratory infections, skin infections",
|
| 53 |
+
"contraindications": "Children, pregnancy, tendon disorders, QT prolongation",
|
| 54 |
+
"notes": "Avoid dairy products, antacids within 2 hours"
|
| 55 |
+
},
|
| 56 |
+
{
|
| 57 |
+
"name": "Metronidazole",
|
| 58 |
+
"class": "Antibiotic/Antiprotozoal",
|
| 59 |
+
"adult_dose": "400-500mg three times daily for 5-7 days",
|
| 60 |
+
"pediatric_dose": "7.5mg/kg three times daily",
|
| 61 |
+
"indications": "Amoebic dysentery, giardiasis, anaerobic infections, bacterial vaginosis, H. pylori",
|
| 62 |
+
"contraindications": "First trimester pregnancy, alcohol use",
|
| 63 |
+
"notes": "Avoid alcohol during treatment and 48 hours after"
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"name": "Paracetamol (Acetaminophen)",
|
| 67 |
+
"class": "Analgesic/Antipyretic",
|
| 68 |
+
"adult_dose": "500-1000mg every 4-6 hours, maximum 4g/day",
|
| 69 |
+
"pediatric_dose": "10-15mg/kg every 4-6 hours, maximum 60mg/kg/day",
|
| 70 |
+
"indications": "Pain, fever, headache, post-operative analgesia",
|
| 71 |
+
"contraindications": "Severe liver disease",
|
| 72 |
+
"notes": "Most common OTC medication. Safe in pregnancy"
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"name": "Ibuprofen",
|
| 76 |
+
"class": "NSAID (Anti-inflammatory)",
|
| 77 |
+
"adult_dose": "400-600mg three times daily with food",
|
| 78 |
+
"pediatric_dose": "5-10mg/kg every 6-8 hours (>6 months)",
|
| 79 |
+
"indications": "Pain, fever, inflammation, dysmenorrhoea, arthritis",
|
| 80 |
+
"contraindications": "Peptic ulcer, renal impairment, third trimester pregnancy, asthma (some)",
|
| 81 |
+
"notes": "Always take with food"
|
| 82 |
+
},
|
| 83 |
+
{
|
| 84 |
+
"name": "Diclofenac",
|
| 85 |
+
"class": "NSAID",
|
| 86 |
+
"adult_dose": "75mg twice daily or 50mg three times daily",
|
| 87 |
+
"pediatric_dose": "1mg/kg twice to three times daily (>1 year)",
|
| 88 |
+
"indications": "Pain, inflammation, arthritis, musculoskeletal disorders",
|
| 89 |
+
"contraindications": "Peptic ulcer, heart failure, renal disease, third trimester pregnancy",
|
| 90 |
+
"notes": "Take with food"
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"name": "Tramadol",
|
| 94 |
+
"class": "Opioid analgesic",
|
| 95 |
+
"adult_dose": "50-100mg every 4-6 hours, maximum 400mg/day",
|
| 96 |
+
"pediatric_dose": "1-2mg/kg every 4-6 hours (>1 year, >10kg)",
|
| 97 |
+
"indications": "Moderate to severe pain",
|
| 98 |
+
"contraindications": "Epilepsy, MAOIs, respiratory depression, children under 12",
|
| 99 |
+
"notes": "Controlled substance. Risk of dependence"
|
| 100 |
+
},
|
| 101 |
+
{
|
| 102 |
+
"name": "Omeprazole",
|
| 103 |
+
"class": "Proton Pump Inhibitor (PPI)",
|
| 104 |
+
"adult_dose": "20-40mg once daily before breakfast",
|
| 105 |
+
"pediatric_dose": "0.7-1.4mg/kg once daily (max 20mg)",
|
| 106 |
+
"indications": "GERD, peptic ulcer, H. pylori eradication, NSAID-induced ulcer prevention",
|
| 107 |
+
"contraindications": "Hypersensitivity",
|
| 108 |
+
"notes": "Take 30 minutes before meals"
|
| 109 |
+
},
|
| 110 |
+
{
|
| 111 |
+
"name": "Metformin",
|
| 112 |
+
"class": "Antidiabetic (Biguanide)",
|
| 113 |
+
"adult_dose": "500mg twice daily with meals, increase to max 2000mg/day",
|
| 114 |
+
"pediatric_dose": "Not recommended under 10 years",
|
| 115 |
+
"indications": "Type 2 diabetes mellitus, pre-diabetes",
|
| 116 |
+
"contraindications": "Renal impairment (eGFR <30), liver failure, heart failure, contrast media",
|
| 117 |
+
"notes": "Take with food. First-line therapy for T2DM"
|
| 118 |
+
},
|
| 119 |
+
{
|
| 120 |
+
"name": "Glibenclamide (Glyburide)",
|
| 121 |
+
"class": "Antidiabetic (Sulphonylurea)",
|
| 122 |
+
"adult_dose": "2.5-5mg once daily before breakfast, max 15mg/day",
|
| 123 |
+
"pediatric_dose": "Not recommended",
|
| 124 |
+
"indications": "Type 2 diabetes when metformin insufficient",
|
| 125 |
+
"contraindications": "Type 1 diabetes, renal/hepatic failure, pregnancy",
|
| 126 |
+
"notes": "Risk of hypoglycaemia. Ensure regular meals"
|
| 127 |
+
},
|
| 128 |
+
{
|
| 129 |
+
"name": "Amlodipine",
|
| 130 |
+
"class": "Antihypertensive (Calcium Channel Blocker)",
|
| 131 |
+
"adult_dose": "5-10mg once daily",
|
| 132 |
+
"pediatric_dose": "2.5-5mg once daily (6-17 years)",
|
| 133 |
+
"indications": "Hypertension, angina",
|
| 134 |
+
"contraindications": "Severe aortic stenosis, cardiogenic shock",
|
| 135 |
+
"notes": "Common side effect: ankle oedema"
|
| 136 |
+
},
|
| 137 |
+
{
|
| 138 |
+
"name": "Lisinopril",
|
| 139 |
+
"class": "Antihypertensive (ACE Inhibitor)",
|
| 140 |
+
"adult_dose": "5-10mg once daily, max 40mg/day",
|
| 141 |
+
"pediatric_dose": "0.07mg/kg once daily (>6 years)",
|
| 142 |
+
"indications": "Hypertension, heart failure, diabetic nephropathy",
|
| 143 |
+
"contraindications": "Pregnancy, bilateral renal artery stenosis, angioedema history",
|
| 144 |
+
"notes": "Common side effect: dry cough. Check renal function and potassium"
|
| 145 |
+
},
|
| 146 |
+
{
|
| 147 |
+
"name": "Atenolol",
|
| 148 |
+
"class": "Antihypertensive (Beta-blocker)",
|
| 149 |
+
"adult_dose": "25-100mg once daily",
|
| 150 |
+
"pediatric_dose": "0.5-1mg/kg once daily",
|
| 151 |
+
"indications": "Hypertension, angina, arrhythmia, post-MI",
|
| 152 |
+
"contraindications": "Asthma, bradycardia, heart block, cardiogenic shock",
|
| 153 |
+
"notes": "Do not stop abruptly"
|
| 154 |
+
},
|
| 155 |
+
{
|
| 156 |
+
"name": "Hydrochlorothiazide",
|
| 157 |
+
"class": "Antihypertensive (Thiazide Diuretic)",
|
| 158 |
+
"adult_dose": "12.5-25mg once daily in the morning",
|
| 159 |
+
"pediatric_dose": "1-2mg/kg/day in one or two doses",
|
| 160 |
+
"indications": "Hypertension, oedema",
|
| 161 |
+
"contraindications": "Anuria, hypersensitivity to sulfonamides, gout",
|
| 162 |
+
"notes": "Monitor electrolytes, especially potassium"
|
| 163 |
+
},
|
| 164 |
+
{
|
| 165 |
+
"name": "Furosemide (Frusemide)",
|
| 166 |
+
"class": "Loop Diuretic",
|
| 167 |
+
"adult_dose": "20-80mg once or twice daily",
|
| 168 |
+
"pediatric_dose": "1-2mg/kg once or twice daily",
|
| 169 |
+
"indications": "Heart failure, oedema, hypertension, pulmonary oedema",
|
| 170 |
+
"contraindications": "Anuria, hypovolaemia, hypokalaemia",
|
| 171 |
+
"notes": "Monitor electrolytes and renal function"
|
| 172 |
+
},
|
| 173 |
+
{
|
| 174 |
+
"name": "Atorvastatin",
|
| 175 |
+
"class": "Lipid-lowering (Statin)",
|
| 176 |
+
"adult_dose": "10-80mg once daily at night",
|
| 177 |
+
"pediatric_dose": "10-20mg daily (10-17 years with familial hypercholesterolaemia)",
|
| 178 |
+
"indications": "Hypercholesterolaemia, cardiovascular risk reduction",
|
| 179 |
+
"contraindications": "Active liver disease, pregnancy, breastfeeding",
|
| 180 |
+
"notes": "Preferably take in the evening. Monitor LFTs"
|
| 181 |
+
},
|
| 182 |
+
{
|
| 183 |
+
"name": "Aspirin",
|
| 184 |
+
"class": "Antiplatelet/NSAID",
|
| 185 |
+
"adult_dose": "75-150mg once daily (antiplatelet); 300-900mg every 4-6 hours (analgesia)",
|
| 186 |
+
"pediatric_dose": "Avoid in children under 16 (Reye syndrome risk)",
|
| 187 |
+
"indications": "Antiplatelet: MI prevention, stroke prevention, ACS; Analgesic: pain fever",
|
| 188 |
+
"contraindications": "Children under 16, peptic ulcer, bleeding disorders, third trimester",
|
| 189 |
+
"notes": "Irreversibly inhibits platelets"
|
| 190 |
+
},
|
| 191 |
+
{
|
| 192 |
+
"name": "Cotrimoxazole (Trimethoprim-Sulfamethoxazole)",
|
| 193 |
+
"class": "Antibiotic/Anti-infective",
|
| 194 |
+
"adult_dose": "960mg (2 standard tablets) twice daily for 5-7 days",
|
| 195 |
+
"pediatric_dose": "4/20mg/kg twice daily",
|
| 196 |
+
"indications": "UTI, chest infections, toxoplasmosis prophylaxis in HIV, PCP prophylaxis",
|
| 197 |
+
"contraindications": "Sulfonamide allergy, G6PD deficiency, severe renal/hepatic failure",
|
| 198 |
+
"notes": "Ensure adequate fluid intake"
|
| 199 |
+
},
|
| 200 |
+
{
|
| 201 |
+
"name": "Chlorphenamine (Chlorpheniramine)",
|
| 202 |
+
"class": "Antihistamine (1st generation)",
|
| 203 |
+
"adult_dose": "4mg every 4-6 hours, max 24mg/day",
|
| 204 |
+
"pediatric_dose": "0.1mg/kg every 6 hours",
|
| 205 |
+
"indications": "Allergic reactions, urticaria, hay fever, pruritus",
|
| 206 |
+
"contraindications": "Glaucoma, urinary retention, MAOIs",
|
| 207 |
+
"notes": "Causes drowsiness. Avoid driving"
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"name": "Cetirizine",
|
| 211 |
+
"class": "Antihistamine (2nd generation)",
|
| 212 |
+
"adult_dose": "10mg once daily",
|
| 213 |
+
"pediatric_dose": "5mg once or twice daily (6-11 years); 2.5mg twice daily (2-5 years)",
|
| 214 |
+
"indications": "Allergic rhinitis, urticaria, eczema, allergic reactions",
|
| 215 |
+
"contraindications": "Severe renal impairment",
|
| 216 |
+
"notes": "Less sedating than chlorphenamine"
|
| 217 |
+
},
|
| 218 |
+
{
|
| 219 |
+
"name": "Salbutamol (Albuterol)",
|
| 220 |
+
"class": "Bronchodilator (Short-acting beta-2 agonist)",
|
| 221 |
+
"adult_dose": "100-200mcg (1-2 puffs) every 4-6 hours as needed",
|
| 222 |
+
"pediatric_dose": "100mcg (1 puff) as needed (under supervision)",
|
| 223 |
+
"indications": "Asthma, COPD, bronchospasm",
|
| 224 |
+
"contraindications": "Tachyarrhythmia",
|
| 225 |
+
"notes": "Shake inhaler. Rinse mouth after use if using spacer with steroid"
|
| 226 |
+
},
|
| 227 |
+
{
|
| 228 |
+
"name": "Prednisolone",
|
| 229 |
+
"class": "Corticosteroid",
|
| 230 |
+
"adult_dose": "5-60mg daily depending on indication",
|
| 231 |
+
"pediatric_dose": "1-2mg/kg/day in divided doses",
|
| 232 |
+
"indications": "Asthma exacerbation, severe allergic reactions, autoimmune conditions, inflammation",
|
| 233 |
+
"contraindications": "Systemic infections (without antibiotics), live vaccines",
|
| 234 |
+
"notes": "Do not stop abruptly if on long-term therapy"
|
| 235 |
+
},
|
| 236 |
+
{
|
| 237 |
+
"name": "Dexamethasone",
|
| 238 |
+
"class": "Corticosteroid",
|
| 239 |
+
"adult_dose": "0.5-24mg daily depending on indication",
|
| 240 |
+
"pediatric_dose": "0.08-0.3mg/kg/day",
|
| 241 |
+
"indications": "Severe asthma, croup, cerebral oedema, severe COVID-19, meningitis",
|
| 242 |
+
"contraindications": "Systemic fungal infections",
|
| 243 |
+
"notes": "More potent than prednisolone. IV/IM for severe conditions"
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"name": "ORS (Oral Rehydration Salts)",
|
| 247 |
+
"class": "Rehydration therapy",
|
| 248 |
+
"adult_dose": "200-400mL after each loose stool, aiming for 3L/day",
|
| 249 |
+
"pediatric_dose": "50-100mL/kg over 3-4 hours for mild dehydration",
|
| 250 |
+
"indications": "Diarrhoea, dehydration, cholera",
|
| 251 |
+
"contraindications": "Severe dehydration requiring IV fluids, ileus",
|
| 252 |
+
"notes": "First-line for diarrhoeal disease in all ages"
|
| 253 |
+
},
|
| 254 |
+
{
|
| 255 |
+
"name": "Zinc Sulphate",
|
| 256 |
+
"class": "Micronutrient supplement",
|
| 257 |
+
"adult_dose": "20mg once daily",
|
| 258 |
+
"pediatric_dose": "10mg/day under 6 months; 20mg/day over 6 months for 10-14 days with ORS",
|
| 259 |
+
"indications": "Diarrhoea (adjunct), zinc deficiency, growth faltering",
|
| 260 |
+
"contraindications": "Hypersensitivity",
|
| 261 |
+
"notes": "Reduces duration and severity of diarrhoea in children"
|
| 262 |
+
},
|
| 263 |
+
{
|
| 264 |
+
"name": "Folic Acid",
|
| 265 |
+
"class": "Vitamin/Haematinic",
|
| 266 |
+
"adult_dose": "5mg once daily (therapeutic); 400mcg daily (prophylactic in pregnancy)",
|
| 267 |
+
"pediatric_dose": "500mcg/kg/day",
|
| 268 |
+
"indications": "Megaloblastic anaemia, pregnancy (neural tube defect prevention), haemolytic anaemia",
|
| 269 |
+
"contraindications": "Undiagnosed anaemia (may mask B12 deficiency)",
|
| 270 |
+
"notes": "Ideally start 3 months before conception"
|
| 271 |
+
},
|
| 272 |
+
{
|
| 273 |
+
"name": "Ferrous Sulphate",
|
| 274 |
+
"class": "Iron supplement",
|
| 275 |
+
"adult_dose": "200mg (65mg elemental iron) three times daily",
|
| 276 |
+
"pediatric_dose": "3-6mg/kg/day elemental iron in divided doses",
|
| 277 |
+
"indications": "Iron deficiency anaemia, pregnancy iron supplementation",
|
| 278 |
+
"contraindications": "Haemochromatosis, repeated blood transfusions",
|
| 279 |
+
"notes": "Take on empty stomach or with vitamin C. Stools may turn black"
|
| 280 |
+
},
|
| 281 |
+
{
|
| 282 |
+
"name": "Vitamin A",
|
| 283 |
+
"class": "Fat-soluble vitamin",
|
| 284 |
+
"adult_dose": "200,000 IU once (for deficiency or measles)",
|
| 285 |
+
"pediatric_dose": "100,000 IU (6-11 months); 200,000 IU (>12 months) every 6 months",
|
| 286 |
+
"indications": "Vitamin A deficiency, measles, night blindness, malnutrition",
|
| 287 |
+
"contraindications": "Pregnancy (high dose teratogenic)",
|
| 288 |
+
"notes": "Part of national immunisation programme in Ghana"
|
| 289 |
+
},
|
| 290 |
+
{
|
| 291 |
+
"name": "Gentamicin",
|
| 292 |
+
"class": "Antibiotic (Aminoglycoside)",
|
| 293 |
+
"adult_dose": "5-7mg/kg IV/IM once daily",
|
| 294 |
+
"pediatric_dose": "7.5mg/kg/day divided every 8 hours (neonatal: 4-7mg/kg/day)",
|
| 295 |
+
"indications": "Serious gram-negative infections, sepsis, neonatal sepsis",
|
| 296 |
+
"contraindications": "Renal impairment, myasthenia gravis",
|
| 297 |
+
"notes": "Monitor renal function and drug levels. Ototoxic and nephrotoxic"
|
| 298 |
+
},
|
| 299 |
+
{
|
| 300 |
+
"name": "Benzylpenicillin (Penicillin G)",
|
| 301 |
+
"class": "Antibiotic (Beta-lactam)",
|
| 302 |
+
"adult_dose": "1.2-2.4g IV every 4-6 hours",
|
| 303 |
+
"pediatric_dose": "50,000-100,000 units/kg/day divided every 4-6 hours",
|
| 304 |
+
"indications": "Meningitis, septicaemia, pneumonia, syphilis, neonatal infections",
|
| 305 |
+
"contraindications": "Penicillin allergy",
|
| 306 |
+
"notes": "IV administration. Monitor for hypersensitivity"
|
| 307 |
+
},
|
| 308 |
+
{
|
| 309 |
+
"name": "Ceftriaxone",
|
| 310 |
+
"class": "Antibiotic (3rd generation Cephalosporin)",
|
| 311 |
+
"adult_dose": "1-2g IV/IM once daily",
|
| 312 |
+
"pediatric_dose": "50-100mg/kg once daily (max 4g/day)",
|
| 313 |
+
"indications": "Meningitis, severe pneumonia, typhoid, sepsis, gonorrhoea",
|
| 314 |
+
"contraindications": "Cephalosporin allergy, neonates with hyperbilirubinaemia",
|
| 315 |
+
"notes": "Can be given IM (with lidocaine) or IV"
|
| 316 |
+
},
|
| 317 |
+
{
|
| 318 |
+
"name": "Fluconazole",
|
| 319 |
+
"class": "Antifungal",
|
| 320 |
+
"adult_dose": "150mg single dose (vaginal candidiasis); 200mg daily (systemic)",
|
| 321 |
+
"pediatric_dose": "3-12mg/kg/day",
|
| 322 |
+
"indications": "Candidiasis, cryptococcal meningitis, tinea infections",
|
| 323 |
+
"contraindications": "Hepatic impairment (high doses), QT prolongation",
|
| 324 |
+
"notes": "Significant drug interactions including with warfarin and statins"
|
| 325 |
+
},
|
| 326 |
+
{
|
| 327 |
+
"name": "Tenofovir-Lamivudine-Dolutegravir (TLD)",
|
| 328 |
+
"class": "Antiretroviral (NRTI + INSTI)",
|
| 329 |
+
"adult_dose": "One tablet (300/300/50mg) once daily",
|
| 330 |
+
"pediatric_dose": "Weight-band dosing for children",
|
| 331 |
+
"indications": "HIV-1 infection (first-line regimen in Ghana)",
|
| 332 |
+
"contraindications": "Severe renal impairment",
|
| 333 |
+
"notes": "Current WHO-recommended first-line ART. Take at same time daily"
|
| 334 |
+
},
|
| 335 |
+
{
|
| 336 |
+
"name": "Isoniazid",
|
| 337 |
+
"class": "Anti-tuberculosis",
|
| 338 |
+
"adult_dose": "5mg/kg (max 300mg) once daily",
|
| 339 |
+
"pediatric_dose": "10mg/kg (max 300mg) once daily",
|
| 340 |
+
"indications": "Tuberculosis treatment and prophylaxis",
|
| 341 |
+
"contraindications": "Severe liver disease, peripheral neuropathy",
|
| 342 |
+
"notes": "Give with pyridoxine (B6) to prevent peripheral neuropathy"
|
| 343 |
+
},
|
| 344 |
+
{
|
| 345 |
+
"name": "Rifampicin",
|
| 346 |
+
"class": "Anti-tuberculosis",
|
| 347 |
+
"adult_dose": "10mg/kg (max 600mg) once daily on empty stomach",
|
| 348 |
+
"pediatric_dose": "15mg/kg (max 600mg) once daily",
|
| 349 |
+
"indications": "Tuberculosis, leprosy, Neisseria meningitidis prophylaxis",
|
| 350 |
+
"contraindications": "Severe liver disease, jaundice",
|
| 351 |
+
"notes": "Turns urine/sweat/tears orange. Many drug interactions"
|
| 352 |
+
},
|
| 353 |
+
{
|
| 354 |
+
"name": "Diazepam",
|
| 355 |
+
"class": "Benzodiazepine/Anticonvulsant",
|
| 356 |
+
"adult_dose": "5-10mg IV slowly for seizures; 2-10mg orally for anxiety",
|
| 357 |
+
"pediatric_dose": "0.2-0.5mg/kg IV (max 10mg) for seizures; rectal: 0.5mg/kg",
|
| 358 |
+
"indications": "Seizures, status epilepticus, anxiety, muscle spasm, alcohol withdrawal",
|
| 359 |
+
"contraindications": "Respiratory depression, sleep apnoea, severe liver disease",
|
| 360 |
+
"notes": "Controlled substance. Risk of dependence"
|
| 361 |
+
},
|
| 362 |
+
{
|
| 363 |
+
"name": "Phenobarbital",
|
| 364 |
+
"class": "Anticonvulsant (Barbiturate)",
|
| 365 |
+
"adult_dose": "60-180mg at night (maintenance); 15-20mg/kg IV for status epilepticus",
|
| 366 |
+
"pediatric_dose": "3-5mg/kg/day (maintenance); 15-20mg/kg IV (status epilepticus)",
|
| 367 |
+
"indications": "Epilepsy, status epilepticus, neonatal seizures",
|
| 368 |
+
"contraindications": "Respiratory depression, porphyria",
|
| 369 |
+
"notes": "Long-acting. First-line for neonatal seizures"
|
| 370 |
+
},
|
| 371 |
+
{
|
| 372 |
+
"name": "Misoprostol",
|
| 373 |
+
"class": "Prostaglandin (Uterotonic)",
|
| 374 |
+
"adult_dose": "600mcg sublingual or 800mcg rectally for PPH",
|
| 375 |
+
"pediatric_dose": "N/A",
|
| 376 |
+
"indications": "Prevention and treatment of postpartum haemorrhage, medical abortion, cervical ripening",
|
| 377 |
+
"contraindications": "Allergy to prostaglandins",
|
| 378 |
+
"notes": "Essential medicine for maternal health"
|
| 379 |
+
},
|
| 380 |
+
{
|
| 381 |
+
"name": "Magnesium Sulphate",
|
| 382 |
+
"class": "Anticonvulsant/Tocolytic",
|
| 383 |
+
"adult_dose": "4g IV loading over 20 min, then 1g/hour maintenance",
|
| 384 |
+
"pediatric_dose": "25-50mg/kg for hypomagnesaemia",
|
| 385 |
+
"indications": "Pre-eclampsia, eclampsia seizure prophylaxis and treatment",
|
| 386 |
+
"contraindications": "Renal failure, myasthenia gravis",
|
| 387 |
+
"notes": "Monitor deep tendon reflexes, respiratory rate, urine output"
|
| 388 |
+
},
|
| 389 |
+
{
|
| 390 |
+
"name": "Insulin (Regular/Soluble)",
|
| 391 |
+
"class": "Antidiabetic hormone",
|
| 392 |
+
"adult_dose": "Individualised; DKA: 0.1 units/kg/hour IV",
|
| 393 |
+
"pediatric_dose": "Individualised",
|
| 394 |
+
"indications": "Type 1 diabetes, Type 2 diabetes (uncontrolled), diabetic ketoacidosis",
|
| 395 |
+
"contraindications": "Hypoglycaemia",
|
| 396 |
+
"notes": "Store in refrigerator. Monitor blood glucose closely"
|
| 397 |
+
},
|
| 398 |
+
{
|
| 399 |
+
"name": "Hydrocortisone",
|
| 400 |
+
"class": "Corticosteroid",
|
| 401 |
+
"adult_dose": "100-500mg IV every 6-8 hours (emergency); 20-30mg oral daily (replacement)",
|
| 402 |
+
"pediatric_dose": "2-8mg/kg IV (emergency)",
|
| 403 |
+
"indications": "Adrenal crisis, severe allergic reactions, anaphylaxis, severe asthma",
|
| 404 |
+
"contraindications": "Systemic infections without antimicrobials",
|
| 405 |
+
"notes": "IV for emergencies. Mineralocorticoid effects at high doses"
|
| 406 |
+
},
|
| 407 |
+
{
|
| 408 |
+
"name": "Adrenaline (Epinephrine)",
|
| 409 |
+
"class": "Sympathomimetic",
|
| 410 |
+
"adult_dose": "0.5mg (0.5mL of 1:1000) IM for anaphylaxis; repeat after 5 min if needed",
|
| 411 |
+
"pediatric_dose": "0.01mg/kg IM (max 0.5mg)",
|
| 412 |
+
"indications": "Anaphylaxis, cardiac arrest, severe asthma",
|
| 413 |
+
"contraindications": "No absolute contraindications in life-threatening emergencies",
|
| 414 |
+
"notes": "Outer mid-thigh IM injection. Store away from light"
|
| 415 |
+
}
|
| 416 |
+
]
|
rag/data/icd10_common.json
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{"code": "B54", "description": "Unspecified malaria", "keywords": "malaria fever chills rigors sweating headache vomiting anaemia"},
|
| 3 |
+
{"code": "B50.0", "description": "Plasmodium falciparum malaria with cerebral complications", "keywords": "cerebral malaria coma seizures severe malaria falciparum"},
|
| 4 |
+
{"code": "B50.9", "description": "Plasmodium falciparum malaria, unspecified", "keywords": "malaria falciparum fever Africa Ghana tropical"},
|
| 5 |
+
{"code": "A01.0", "description": "Typhoid fever", "keywords": "typhoid enteric fever salmonella sustained fever abdominal pain rose spots"},
|
| 6 |
+
{"code": "A09", "description": "Diarrhoea and gastroenteritis of presumed infectious origin", "keywords": "diarrhoea gastroenteritis vomiting loose stools dehydration"},
|
| 7 |
+
{"code": "A00.9", "description": "Cholera, unspecified", "keywords": "cholera rice water stools severe dehydration watery diarrhoea"},
|
| 8 |
+
{"code": "A15.0", "description": "Tuberculosis of lung, confirmed by sputum microscopy", "keywords": "tuberculosis TB pulmonary cough blood haemoptysis night sweats weight loss"},
|
| 9 |
+
{"code": "A16.2", "description": "Tuberculosis of lung, without mention of bacteriological confirmation", "keywords": "TB tuberculosis cough chronic lung weight loss"},
|
| 10 |
+
{"code": "B20", "description": "Human immunodeficiency virus disease resulting in infectious and parasitic diseases", "keywords": "HIV AIDS opportunistic infection immune"},
|
| 11 |
+
{"code": "B24", "description": "Unspecified human immunodeficiency virus disease", "keywords": "HIV AIDS retroviral disease"},
|
| 12 |
+
{"code": "B19.9", "description": "Unspecified viral hepatitis without hepatic coma", "keywords": "hepatitis jaundice liver yellow eyes dark urine"},
|
| 13 |
+
{"code": "A77.9", "description": "Spotted fever, unspecified", "keywords": "spotted fever tick rickettsia rash fever"},
|
| 14 |
+
{"code": "B65.9", "description": "Schistosomiasis, unspecified", "keywords": "schistosomiasis bilharzia blood urine haematuria"},
|
| 15 |
+
{"code": "B76.9", "description": "Hookworm disease, unspecified", "keywords": "hookworm anaemia soil transmitted helminth worm"},
|
| 16 |
+
{"code": "B74.0", "description": "Filariasis due to Wuchereria bancrofti", "keywords": "filariasis lymphoedema elephantiasis swollen leg"},
|
| 17 |
+
|
| 18 |
+
{"code": "J06.9", "description": "Acute upper respiratory infection, unspecified", "keywords": "cold cough sore throat runny nose nasal congestion upper respiratory"},
|
| 19 |
+
{"code": "J00", "description": "Acute nasopharyngitis (common cold)", "keywords": "common cold rhinitis sneezing runny nose"},
|
| 20 |
+
{"code": "J02.9", "description": "Acute pharyngitis, unspecified", "keywords": "sore throat pharyngitis throat pain difficulty swallowing"},
|
| 21 |
+
{"code": "J03.9", "description": "Acute tonsillitis, unspecified", "keywords": "tonsillitis swollen tonsils sore throat fever pus"},
|
| 22 |
+
{"code": "J18.9", "description": "Pneumonia, unspecified organism", "keywords": "pneumonia chest infection cough fever shortness of breath sputum"},
|
| 23 |
+
{"code": "J20.9", "description": "Acute bronchitis, unspecified", "keywords": "bronchitis cough chest mucus productive cough"},
|
| 24 |
+
{"code": "J45.9", "description": "Asthma, unspecified", "keywords": "asthma wheeze shortness of breath inhaler bronchospasm"},
|
| 25 |
+
{"code": "J44.1", "description": "Chronic obstructive pulmonary disease with acute exacerbation", "keywords": "COPD breathlessness chronic lung disease exacerbation"},
|
| 26 |
+
{"code": "J30.9", "description": "Allergic rhinitis, unspecified", "keywords": "allergic rhinitis hay fever sneezing itchy nose dust allergy"},
|
| 27 |
+
|
| 28 |
+
{"code": "I10", "description": "Essential (primary) hypertension", "keywords": "hypertension high blood pressure HTN headache"},
|
| 29 |
+
{"code": "I11.9", "description": "Hypertensive heart disease without heart failure", "keywords": "hypertensive heart disease high blood pressure cardiac"},
|
| 30 |
+
{"code": "I50.9", "description": "Heart failure, unspecified", "keywords": "heart failure cardiac failure breathlessness oedema swollen legs"},
|
| 31 |
+
{"code": "I20.9", "description": "Angina pectoris, unspecified", "keywords": "angina chest pain exertion heart coronary"},
|
| 32 |
+
{"code": "I21.9", "description": "Acute myocardial infarction, unspecified", "keywords": "heart attack myocardial infarction MI chest pain severe"},
|
| 33 |
+
{"code": "I64", "description": "Stroke, not specified as haemorrhage or infarction", "keywords": "stroke CVA weakness facial droop speech difficulty paralysis"},
|
| 34 |
+
{"code": "I63.9", "description": "Cerebral infarction, unspecified", "keywords": "ischaemic stroke brain infarction weakness hemiplegia"},
|
| 35 |
+
|
| 36 |
+
{"code": "E11.9", "description": "Type 2 diabetes mellitus without complications", "keywords": "diabetes mellitus type 2 blood sugar hyperglycaemia thirst urination"},
|
| 37 |
+
{"code": "E10.9", "description": "Type 1 diabetes mellitus without complications", "keywords": "type 1 diabetes insulin dependent juvenile diabetes"},
|
| 38 |
+
{"code": "E11.5", "description": "Type 2 diabetes mellitus with peripheral circulatory complications", "keywords": "diabetic foot ulcer peripheral vascular disease gangrene"},
|
| 39 |
+
{"code": "E11.3", "description": "Type 2 diabetes mellitus with ophthalmic complications", "keywords": "diabetic retinopathy vision loss eye diabetes"},
|
| 40 |
+
{"code": "E66.9", "description": "Obesity, unspecified", "keywords": "obesity overweight BMI weight"},
|
| 41 |
+
{"code": "E46", "description": "Unspecified protein-calorie malnutrition", "keywords": "malnutrition underweight protein deficiency wasting"},
|
| 42 |
+
{"code": "E43", "description": "Unspecified severe protein-calorie malnutrition", "keywords": "severe malnutrition kwashiorkor marasmus oedema wasting"},
|
| 43 |
+
{"code": "D50.9", "description": "Iron deficiency anaemia, unspecified", "keywords": "anaemia iron deficiency pallor fatigue tiredness weakness"},
|
| 44 |
+
{"code": "D64.9", "description": "Anaemia, unspecified", "keywords": "anaemia pallor fatigue weakness low blood count"},
|
| 45 |
+
|
| 46 |
+
{"code": "K29.7", "description": "Gastritis, unspecified", "keywords": "gastritis stomach pain epigastric pain nausea indigestion"},
|
| 47 |
+
{"code": "K21.0", "description": "Gastro-oesophageal reflux disease with oesophagitis", "keywords": "GERD acid reflux heartburn regurgitation burning chest"},
|
| 48 |
+
{"code": "K35.9", "description": "Acute appendicitis, unspecified", "keywords": "appendicitis right lower quadrant pain RLQ nausea vomiting fever"},
|
| 49 |
+
{"code": "K80.20", "description": "Calculus of gallbladder without cholecystitis", "keywords": "gallstones cholelithiasis right upper quadrant pain fatty food"},
|
| 50 |
+
{"code": "K92.1", "description": "Melaena", "keywords": "melaena black stool blood stool GI bleed upper gastrointestinal"},
|
| 51 |
+
{"code": "K57.30", "description": "Diverticulosis of large intestine without perforation", "keywords": "diverticulosis colon lower abdominal pain constipation"},
|
| 52 |
+
|
| 53 |
+
{"code": "N39.0", "description": "Urinary tract infection, site not specified", "keywords": "UTI urinary infection dysuria frequency burning urination"},
|
| 54 |
+
{"code": "N40", "description": "Enlarged prostate", "keywords": "BPH benign prostatic hyperplasia urinary retention frequency nocturia"},
|
| 55 |
+
{"code": "N18.9", "description": "Chronic kidney disease, unspecified", "keywords": "CKD renal failure kidney disease creatinine oedema"},
|
| 56 |
+
{"code": "N10", "description": "Acute pyelonephritis", "keywords": "pyelonephritis kidney infection loin pain fever urinary"},
|
| 57 |
+
|
| 58 |
+
{"code": "G43.9", "description": "Migraine, unspecified", "keywords": "migraine severe headache throbbing nausea light sensitivity aura"},
|
| 59 |
+
{"code": "R51", "description": "Headache", "keywords": "headache head pain cephalalgia tension headache"},
|
| 60 |
+
{"code": "G40.9", "description": "Epilepsy, unspecified", "keywords": "epilepsy seizure convulsion fits loss of consciousness"},
|
| 61 |
+
{"code": "G35", "description": "Multiple sclerosis", "keywords": "multiple sclerosis MS weakness numbness vision fatigue"},
|
| 62 |
+
{"code": "F32.9", "description": "Depressive episode, unspecified", "keywords": "depression low mood sadness hopelessness sleep appetite loss"},
|
| 63 |
+
{"code": "F41.1", "description": "Generalised anxiety disorder", "keywords": "anxiety worry generalised anxiety disorder GAD nervousness"},
|
| 64 |
+
{"code": "F20.9", "description": "Schizophrenia, unspecified", "keywords": "schizophrenia psychosis hallucinations delusions mental illness"},
|
| 65 |
+
|
| 66 |
+
{"code": "M54.5", "description": "Low back pain", "keywords": "back pain lower back lumbar pain lumbago"},
|
| 67 |
+
{"code": "M54.2", "description": "Cervicalgia", "keywords": "neck pain cervical pain stiff neck"},
|
| 68 |
+
{"code": "M25.5", "description": "Pain in joint", "keywords": "joint pain arthralgia knee hip shoulder elbow joint"},
|
| 69 |
+
{"code": "M06.9", "description": "Rheumatoid arthritis, unspecified", "keywords": "rheumatoid arthritis RA joint swelling morning stiffness"},
|
| 70 |
+
{"code": "M10.9", "description": "Gout, unspecified", "keywords": "gout uric acid joint pain big toe swollen joint"},
|
| 71 |
+
|
| 72 |
+
{"code": "O80", "description": "Encounter for full-term uncomplicated delivery", "keywords": "normal delivery labour birth full term vaginal delivery"},
|
| 73 |
+
{"code": "O14.9", "description": "Pre-eclampsia, unspecified", "keywords": "pre-eclampsia hypertension pregnancy oedema proteinuria"},
|
| 74 |
+
{"code": "O20.0", "description": "Threatened abortion", "keywords": "threatened miscarriage bleeding pregnancy spotting"},
|
| 75 |
+
{"code": "O03.9", "description": "Spontaneous abortion, complete or unspecified", "keywords": "miscarriage spontaneous abortion pregnancy loss"},
|
| 76 |
+
{"code": "O42.9", "description": "Premature rupture of membranes, unspecified", "keywords": "PROM ruptured membranes water broke premature labour"},
|
| 77 |
+
{"code": "P07.3", "description": "Other preterm infants", "keywords": "premature baby preterm birth low birth weight"},
|
| 78 |
+
|
| 79 |
+
{"code": "L50.9", "description": "Urticaria, unspecified", "keywords": "hives urticaria itchy rash allergic skin reaction welts"},
|
| 80 |
+
{"code": "L20.9", "description": "Atopic dermatitis, unspecified", "keywords": "eczema atopic dermatitis itchy skin rash"},
|
| 81 |
+
{"code": "B35.9", "description": "Dermatophytosis, unspecified", "keywords": "ringworm tinea fungal skin infection itchy rash"},
|
| 82 |
+
{"code": "L03.9", "description": "Cellulitis, unspecified", "keywords": "cellulitis skin infection red swollen hot skin bacterial"},
|
| 83 |
+
|
| 84 |
+
{"code": "R50.9", "description": "Fever, unspecified", "keywords": "fever pyrexia high temperature febrile"},
|
| 85 |
+
{"code": "R05", "description": "Cough", "keywords": "cough dry cough productive cough chronic cough"},
|
| 86 |
+
{"code": "R06.0", "description": "Dyspnoea", "keywords": "breathlessness shortness of breath dyspnoea difficulty breathing"},
|
| 87 |
+
{"code": "R10.4", "description": "Other and unspecified abdominal pain", "keywords": "abdominal pain stomach ache belly pain"},
|
| 88 |
+
{"code": "R11.2", "description": "Nausea with vomiting, unspecified", "keywords": "nausea vomiting queasy sick stomach"},
|
| 89 |
+
{"code": "R55", "description": "Syncope and collapse", "keywords": "fainting syncope collapse blackout loss of consciousness"},
|
| 90 |
+
{"code": "R00.0", "description": "Tachycardia, unspecified", "keywords": "fast heart rate palpitations racing heart tachycardia"},
|
| 91 |
+
{"code": "R73.09", "description": "Other abnormal glucose", "keywords": "high blood sugar hyperglycaemia pre-diabetes glucose"},
|
| 92 |
+
|
| 93 |
+
{"code": "S09.9", "description": "Unspecified injury of head", "keywords": "head injury trauma concussion fall accident"},
|
| 94 |
+
{"code": "T14.9", "description": "Injury, unspecified", "keywords": "injury trauma wound accident"},
|
| 95 |
+
{"code": "S72.9", "description": "Fracture of femur, unspecified", "keywords": "broken bone fracture hip femur"},
|
| 96 |
+
{"code": "T78.40", "description": "Allergy, unspecified", "keywords": "allergy allergic reaction hypersensitivity"},
|
| 97 |
+
{"code": "T36.9", "description": "Poisoning by unspecified systemic antibiotic", "keywords": "antibiotic poisoning drug reaction adverse effect"},
|
| 98 |
+
|
| 99 |
+
{"code": "Z00.0", "description": "General adult medical examination", "keywords": "check-up routine examination general health review"},
|
| 100 |
+
{"code": "Z23", "description": "Encounter for immunization", "keywords": "vaccination immunization vaccine injection"},
|
| 101 |
+
{"code": "Z30.0", "description": "Encounter for general counselling and advice on contraception", "keywords": "family planning contraception birth control counselling"},
|
| 102 |
+
{"code": "Z71.3", "description": "Encounter for dietary counselling and surveillance", "keywords": "diet nutrition counselling weight management"}
|
| 103 |
+
]
|
rag/retriever.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import os
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
import chromadb
|
| 8 |
+
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction
|
| 9 |
+
|
| 10 |
+
DATA_DIR = Path(__file__).parent / "data"
|
| 11 |
+
DB_DIR = Path(__file__).parent.parent / "chroma_db"
|
| 12 |
+
|
| 13 |
+
EMBED_MODEL = "all-MiniLM-L6-v2"
|
| 14 |
+
|
| 15 |
+
_client: chromadb.PersistentClient | None = None
|
| 16 |
+
_icd_col = None
|
| 17 |
+
_drug_col = None
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _get_client():
|
| 21 |
+
global _client
|
| 22 |
+
if _client is None:
|
| 23 |
+
_client = chromadb.PersistentClient(path=str(DB_DIR))
|
| 24 |
+
return _client
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def _embedding_fn():
|
| 28 |
+
return SentenceTransformerEmbeddingFunction(model_name=EMBED_MODEL)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def build_knowledge_base(force: bool = False):
|
| 32 |
+
"""Embed ICD-10 codes and medicines into ChromaDB. Runs once; skipped if DB exists."""
|
| 33 |
+
client = _get_client()
|
| 34 |
+
ef = _embedding_fn()
|
| 35 |
+
|
| 36 |
+
existing = [c.name for c in client.list_collections()]
|
| 37 |
+
|
| 38 |
+
# ── ICD-10 ──────────────────────────────────────────────────────────────
|
| 39 |
+
if "icd10" not in existing or force:
|
| 40 |
+
if "icd10" in existing:
|
| 41 |
+
client.delete_collection("icd10")
|
| 42 |
+
col = client.create_collection("icd10", embedding_function=ef)
|
| 43 |
+
with open(DATA_DIR / "icd10_common.json") as f:
|
| 44 |
+
records = json.load(f)
|
| 45 |
+
col.add(
|
| 46 |
+
ids=[r["code"] for r in records],
|
| 47 |
+
documents=[f"{r['description']} {r['keywords']}" for r in records],
|
| 48 |
+
metadatas=[{"code": r["code"], "description": r["description"]} for r in records],
|
| 49 |
+
)
|
| 50 |
+
print(f"[RAG] Indexed {len(records)} ICD-10 codes")
|
| 51 |
+
|
| 52 |
+
# ── Medicines ────────────────────────────────────────────────────────────
|
| 53 |
+
if "medicines" not in existing or force:
|
| 54 |
+
if "medicines" in existing:
|
| 55 |
+
client.delete_collection("medicines")
|
| 56 |
+
col = client.create_collection("medicines", embedding_function=ef)
|
| 57 |
+
with open(DATA_DIR / "essential_medicines.json") as f:
|
| 58 |
+
records = json.load(f)
|
| 59 |
+
col.add(
|
| 60 |
+
ids=[str(i) for i in range(len(records))],
|
| 61 |
+
documents=[
|
| 62 |
+
f"{r['name']} {r['class']} {r['indications']}"
|
| 63 |
+
for r in records
|
| 64 |
+
],
|
| 65 |
+
metadatas=records,
|
| 66 |
+
)
|
| 67 |
+
print(f"[RAG] Indexed {len(records)} essential medicines")
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def _icd_collection():
|
| 71 |
+
global _icd_col
|
| 72 |
+
if _icd_col is None:
|
| 73 |
+
_icd_col = _get_client().get_collection("icd10", embedding_function=_embedding_fn())
|
| 74 |
+
return _icd_col
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _drug_collection():
|
| 78 |
+
global _drug_col
|
| 79 |
+
if _drug_col is None:
|
| 80 |
+
_drug_col = _get_client().get_collection("medicines", embedding_function=_embedding_fn())
|
| 81 |
+
return _drug_col
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def retrieve_icd_codes(query: str, n: int = 5) -> list[dict]:
|
| 85 |
+
"""Return top-n ICD-10 codes matching the clinical query."""
|
| 86 |
+
if not query.strip():
|
| 87 |
+
return []
|
| 88 |
+
results = _icd_collection().query(query_texts=[query], n_results=n)
|
| 89 |
+
codes = []
|
| 90 |
+
for meta, dist in zip(results["metadatas"][0], results["distances"][0]):
|
| 91 |
+
codes.append({
|
| 92 |
+
"code": meta["code"],
|
| 93 |
+
"description": meta["description"],
|
| 94 |
+
"score": round(1 - dist, 3),
|
| 95 |
+
})
|
| 96 |
+
return codes
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def retrieve_drug_info(drug_names: list[str], n: int = 3) -> list[dict]:
|
| 100 |
+
"""Return drug info for each named medication. Falls back to closest match."""
|
| 101 |
+
if not drug_names:
|
| 102 |
+
return []
|
| 103 |
+
query = ", ".join(drug_names)
|
| 104 |
+
results = _drug_collection().query(query_texts=[query], n_results=n)
|
| 105 |
+
drugs = []
|
| 106 |
+
for meta in results["metadatas"][0]:
|
| 107 |
+
drugs.append({
|
| 108 |
+
"name": meta["name"],
|
| 109 |
+
"class": meta["class"],
|
| 110 |
+
"adult_dose": meta["adult_dose"],
|
| 111 |
+
"indications": meta["indications"],
|
| 112 |
+
"contraindications": meta["contraindications"],
|
| 113 |
+
"notes": meta.get("notes", ""),
|
| 114 |
+
})
|
| 115 |
+
return drugs
|
| 116 |
+
|
| 117 |
+
|
| 118 |
+
def format_icd_context(codes: list[dict]) -> str:
|
| 119 |
+
"""Format ICD codes as text context for injection into prompts."""
|
| 120 |
+
if not codes:
|
| 121 |
+
return ""
|
| 122 |
+
lines = ["Relevant ICD-10 codes to consider:"]
|
| 123 |
+
for c in codes:
|
| 124 |
+
lines.append(f" {c['code']} — {c['description']}")
|
| 125 |
+
return "\n".join(lines)
|
| 126 |
+
|
| 127 |
+
|
| 128 |
+
def format_drug_context(drugs: list[dict]) -> str:
|
| 129 |
+
"""Format drug info as text context for injection into prompts."""
|
| 130 |
+
if not drugs:
|
| 131 |
+
return ""
|
| 132 |
+
lines = ["Relevant medication reference:"]
|
| 133 |
+
for d in drugs:
|
| 134 |
+
lines.append(
|
| 135 |
+
f" {d['name']} ({d['class']}): {d['adult_dose']}. "
|
| 136 |
+
f"Indications: {d['indications']}."
|
| 137 |
+
)
|
| 138 |
+
return "\n".join(lines)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def ensure_kb():
|
| 142 |
+
"""Called at app startup — builds KB only if it doesn't exist yet."""
|
| 143 |
+
client = _get_client()
|
| 144 |
+
existing = [c.name for c in client.list_collections()]
|
| 145 |
+
if "icd10" not in existing or "medicines" not in existing:
|
| 146 |
+
print("[RAG] Building knowledge base for the first time...")
|
| 147 |
+
build_knowledge_base()
|
| 148 |
+
else:
|
| 149 |
+
print("[RAG] Knowledge base ready.")
|
requirements.txt
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.44.0
|
| 2 |
+
faster-whisper>=1.0.0
|
| 3 |
+
ollama>=0.3.0
|
| 4 |
+
google-genai>=1.0.0
|
| 5 |
+
sounddevice>=0.4.6
|
| 6 |
+
numpy>=1.26.0
|
| 7 |
+
scipy>=1.13.0
|
| 8 |
+
python-dotenv>=1.0.0
|
| 9 |
+
fpdf2>=2.7.9
|
| 10 |
+
chromadb>=0.5.0
|
| 11 |
+
sentence-transformers>=3.0.0
|
transcription/__init__.py
ADDED
|
File without changes
|
transcription/transcriber.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import queue
|
| 3 |
+
import tempfile
|
| 4 |
+
import threading
|
| 5 |
+
import wave
|
| 6 |
+
import numpy as np
|
| 7 |
+
import sounddevice as sd
|
| 8 |
+
from faster_whisper import WhisperModel
|
| 9 |
+
|
| 10 |
+
SAMPLE_RATE = 16000
|
| 11 |
+
BLOCK_SECONDS = 3
|
| 12 |
+
CHANNELS = 1
|
| 13 |
+
|
| 14 |
+
_model: WhisperModel | None = None
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _load_model() -> WhisperModel:
|
| 18 |
+
global _model
|
| 19 |
+
if _model is None:
|
| 20 |
+
model_size = os.getenv("WHISPER_MODEL", "small")
|
| 21 |
+
_model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
| 22 |
+
return _model
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class LiveTranscriber:
|
| 26 |
+
"""
|
| 27 |
+
Streams microphone audio, transcribes in real time, and saves the full
|
| 28 |
+
session to a WAV file for post-hoc speaker diarization.
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
def __init__(self, on_text):
|
| 32 |
+
self.on_text = on_text
|
| 33 |
+
self._audio_q: queue.Queue = queue.Queue()
|
| 34 |
+
self._stop_event = threading.Event()
|
| 35 |
+
self._thread: threading.Thread | None = None
|
| 36 |
+
self._stream: sd.InputStream | None = None
|
| 37 |
+
|
| 38 |
+
# accumulate all raw audio for diarization
|
| 39 |
+
self._all_audio: list[np.ndarray] = []
|
| 40 |
+
self._audio_lock = threading.Lock()
|
| 41 |
+
|
| 42 |
+
# path to saved WAV after stop()
|
| 43 |
+
self.wav_path: str | None = None
|
| 44 |
+
|
| 45 |
+
def _audio_callback(self, indata, frames, time_info, status):
|
| 46 |
+
chunk = indata.copy()
|
| 47 |
+
self._audio_q.put(chunk)
|
| 48 |
+
with self._audio_lock:
|
| 49 |
+
self._all_audio.append(chunk.flatten())
|
| 50 |
+
|
| 51 |
+
def _process_loop(self):
|
| 52 |
+
model = _load_model()
|
| 53 |
+
buffer = np.empty((0,), dtype=np.float32)
|
| 54 |
+
chunk_size = SAMPLE_RATE * BLOCK_SECONDS
|
| 55 |
+
|
| 56 |
+
while not self._stop_event.is_set():
|
| 57 |
+
try:
|
| 58 |
+
chunk = self._audio_q.get(timeout=0.5)
|
| 59 |
+
buffer = np.concatenate([buffer, chunk.flatten()])
|
| 60 |
+
except queue.Empty:
|
| 61 |
+
continue
|
| 62 |
+
|
| 63 |
+
if len(buffer) >= chunk_size:
|
| 64 |
+
audio_chunk = buffer[:chunk_size].astype(np.float32)
|
| 65 |
+
buffer = buffer[chunk_size:]
|
| 66 |
+
segments, _ = model.transcribe(
|
| 67 |
+
audio_chunk,
|
| 68 |
+
language="en",
|
| 69 |
+
vad_filter=True,
|
| 70 |
+
vad_parameters={"min_silence_duration_ms": 300},
|
| 71 |
+
)
|
| 72 |
+
text = " ".join(s.text for s in segments).strip()
|
| 73 |
+
if text:
|
| 74 |
+
self.on_text(text)
|
| 75 |
+
|
| 76 |
+
# flush remaining audio
|
| 77 |
+
if len(buffer) > SAMPLE_RATE:
|
| 78 |
+
segments, _ = _load_model().transcribe(
|
| 79 |
+
buffer.astype(np.float32), language="en", vad_filter=True
|
| 80 |
+
)
|
| 81 |
+
text = " ".join(s.text for s in segments).strip()
|
| 82 |
+
if text:
|
| 83 |
+
self.on_text(text)
|
| 84 |
+
|
| 85 |
+
def start(self):
|
| 86 |
+
self._stop_event.clear()
|
| 87 |
+
self._all_audio.clear()
|
| 88 |
+
self._stream = sd.InputStream(
|
| 89 |
+
samplerate=SAMPLE_RATE,
|
| 90 |
+
channels=CHANNELS,
|
| 91 |
+
dtype="float32",
|
| 92 |
+
blocksize=SAMPLE_RATE,
|
| 93 |
+
callback=self._audio_callback,
|
| 94 |
+
)
|
| 95 |
+
self._stream.start()
|
| 96 |
+
self._thread = threading.Thread(target=self._process_loop, daemon=True)
|
| 97 |
+
self._thread.start()
|
| 98 |
+
|
| 99 |
+
def stop(self) -> str | None:
|
| 100 |
+
"""Stop recording and save full audio to a WAV file. Returns the WAV path."""
|
| 101 |
+
self._stop_event.set()
|
| 102 |
+
if self._stream:
|
| 103 |
+
self._stream.stop()
|
| 104 |
+
self._stream.close()
|
| 105 |
+
if self._thread:
|
| 106 |
+
self._thread.join(timeout=5)
|
| 107 |
+
|
| 108 |
+
with self._audio_lock:
|
| 109 |
+
all_audio = list(self._all_audio)
|
| 110 |
+
|
| 111 |
+
if not all_audio:
|
| 112 |
+
return None
|
| 113 |
+
|
| 114 |
+
full_audio = np.concatenate(all_audio).astype(np.float32)
|
| 115 |
+
pcm = (full_audio * 32767).astype(np.int16)
|
| 116 |
+
|
| 117 |
+
tmp = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
|
| 118 |
+
with wave.open(tmp.name, "wb") as wf:
|
| 119 |
+
wf.setnchannels(CHANNELS)
|
| 120 |
+
wf.setsampwidth(2)
|
| 121 |
+
wf.setframerate(SAMPLE_RATE)
|
| 122 |
+
wf.writeframes(pcm.tobytes())
|
| 123 |
+
|
| 124 |
+
self.wav_path = tmp.name
|
| 125 |
+
return tmp.name
|