File size: 10,494 Bytes
88375b9
 
 
bd13cbe
5a6eaa4
c139a8a
1d53ef2
c139a8a
48d78c4
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d53ef2
 
 
 
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d53ef2
bf7a131
 
ac67d71
bf7a131
 
 
 
 
 
 
 
 
 
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
ac67d71
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a6eaa4
bf7a131
5a6eaa4
bf7a131
 
 
1d53ef2
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a6a40c
bf7a131
 
5a6eaa4
bf7a131
5a6eaa4
bf7a131
8a6a40c
bf7a131
5a6eaa4
bf7a131
 
5a6eaa4
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac67d71
 
bf7a131
 
 
 
 
 
 
 
c139a8a
bf7a131
ac67d71
 
 
1d53ef2
bf7a131
88375b9
bf7a131
 
 
 
 
 
 
1d53ef2
bf7a131
 
1d53ef2
bf7a131
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
8a6a40c
48d78c4
c139a8a
 
48d78c4
c139a8a
 
f51dba5
c139a8a
 
 
 
 
f51dba5
8a6a40c
c139a8a
 
 
bd13cbe
a0f6668
c139a8a
fe6ce59
bd13cbe
c139a8a
 
 
 
 
 
 
 
 
 
 
 
88375b9
c139a8a
f51dba5
8a6a40c
dbf8173
bd13cbe
ac67d71
c139a8a
f51dba5
c139a8a
 
 
 
88375b9
1d53ef2
88375b9
c139a8a
88375b9
5a6eaa4
bf7a131
88375b9
ecbffcb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
import gradio as gr
import os
import requests
import time

# --- CONFIGURATION ---
ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")

# SPEECH-TO-TEXT: AssemblyAI Integration
def transcribe_audio_assemblyai(audio_file_path):
    """Uses AssemblyAI's free tier (100 hours free)"""
    if not ASSEMBLYAI_API_KEY:
        return "❌ AssemblyAI API key not set. Add to Secrets."
    
    headers = {"authorization": ASSEMBLYAI_API_KEY}
    
    # Step 1: Upload audio
    print("πŸ“€ Uploading to AssemblyAI...")
    
    def read_file(filename):
        with open(filename, "rb") as f:
            while True:
                data = f.read(5242880)  # 5MB chunks
                if not data:
                    break
                yield data
    
    upload_response = requests.post(
        "https://api.assemblyai.com/v2/upload",
        headers=headers,
        data=read_file(audio_file_path)
    )
    
    if upload_response.status_code != 200:
        return f"❌ Upload failed: {upload_response.text}"
    
    audio_url = upload_response.json()["upload_url"]
    print(f"βœ… Uploaded: {audio_url}")
    
    # Step 2: Request transcription
    json_data = {
        "audio_url": audio_url,
        "speech_models": ["universal-2"],
        "language_code": "en_us"
    }
    
    transcript_response = requests.post(
        "https://api.assemblyai.com/v2/transcript",
        json=json_data,
        headers=headers
    )
    
    if transcript_response.status_code != 200:
        error_msg = transcript_response.json().get("error", "Unknown error")
        return f"❌ Transcription request failed: {error_msg}"
    
    transcript_id = transcript_response.json()["id"]
    print(f"πŸ“ Transcript ID: {transcript_id}")
    
    # Step 3: Poll for results
    polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
    
    for attempt in range(30):  # Max 30 seconds
        polling_response = requests.get(polling_endpoint, headers=headers)
        polling_data = polling_response.json()
        
        status = polling_data["status"]
        print(f"⏳ Status: {status}")
        
        if status == "completed":
            print("βœ… Transcription complete!")
            return polling_data["text"]
        elif status == "error":
            return f"❌ Transcription error: {polling_data.get('error', 'Unknown')}"
        
        time.sleep(1)
    
    return "❌ Transcription timed out after 30 seconds"


def transcribe_audio_placeholder(audio_file_path):
    """Fallback when no API keys are available"""
    return """
Doctor: Hello, what brings you in today?
Patient: I've had a cough for about two weeks. It gets worse at night and I feel really tired.
Doctor: Any fever or shortness of breath?
Patient: No fever, but I get winded climbing stairs.
Doctor: I'm going to listen to your lungs. Take a deep breath. I can hear some mild wheezing on the right side.
Patient: Is it serious?
Doctor: It appears to be acute bronchitis. I'll prescribe an inhaler and recommend rest. Follow up in a week.
Patient: Thank you, doctor.
"""


# CLINICAL NOTE GENERATION: Rule-Based NLP
def generate_rule_based_note(transcript):
    """Extracts clinical info using keyword matching and pattern recognition"""
    t = transcript.lower()
    
    # Extract symptoms
    symptoms = []
    if "cough" in t: 
        if "two week" in t or "2 week" in t:
            symptoms.append("Cough (2 weeks duration)")
        else:
            symptoms.append("Cough")
    if "fever" in t: 
        symptoms.append("Fever")
    if "tired" in t or "fatigue" in t: 
        symptoms.append("Fatigue")
    if "wheez" in t: 
        symptoms.append("Wheezing")
    if "breath" in t or "winded" in t: 
        symptoms.append("Dyspnea on exertion")
    if "night" in t and "cough" in t: 
        symptoms.append("Nocturnal cough")
    if "chest" in t and "pain" in t:
        symptoms.append("Chest pain")
    if "headache" in t:
        symptoms.append("Headache")
    
    # Determine diagnosis
    if "bronchitis" in t:
        diagnosis = "Acute Bronchitis"
        confidence = "High"
    elif "pneumonia" in t:
        diagnosis = "Community-Acquired Pneumonia"
        confidence = "Moderate"
    elif "asthma" in t:
        diagnosis = "Asthma Exacerbation"
        confidence = "Moderate"
    elif "covid" in t or "coronavirus" in t:
        diagnosis = "COVID-19 Infection"
        confidence = "Moderate"
    elif "cough" in t and "wheez" in t:
        diagnosis = "Acute Bronchitis with Reactive Airway Disease"
        confidence = "Moderate"
    elif "cough" in t and len(symptoms) >= 2:
        diagnosis = "Upper Respiratory Infection"
        confidence = "Moderate"
    elif "cough" in t:
        diagnosis = "Acute Cough, Etiology Pending"
        confidence = "Low"
    else:
        diagnosis = "Pending Further Workup"
        confidence = "Low"
    
    # Extract physical exam findings
    exam_findings = []
    if "wheez" in t: 
        exam_findings.append("Mild expiratory wheezing on auscultation")
    if "rhonchi" in t:
        exam_findings.append("Rhonchi noted")
    if "crackle" in t or "rale" in t:
        exam_findings.append("Fine crackles at bases")
    if "lung" in t and "clear" in t: 
        exam_findings.append("Lungs clear bilaterally")
    if not exam_findings: 
        exam_findings.append("Unremarkable")
    
    # Build treatment plan
    plan = []
    if "inhaler" in t or "wheez" in t: 
        plan.append("- Albuterol HFA 90mcg, 2 puffs q4-6h PRN for wheezing")
    if "bronchitis" in t:
        plan.append("- Supportive care (acute bronchitis typically viral, antibiotics not indicated)")
    if "antibiotic" in t:
        plan.append("- Consider antibiotic therapy if bacterial infection suspected")
    if "rest" in t or "tired" in t:
        plan.append("- Recommend rest and increased fluid intake")
    if "cough" in t:
        plan.append("- OTC dextromethorphan or guaifenesin for symptomatic cough relief")
    
    if not plan:
        plan.append("- Symptomatic management")
    
    plan.extend([
        "- Avoid respiratory irritants and smoking",
        "- Follow up in 7 days if symptoms persist or worsen",
        "- Return to clinic sooner if fever develops or shortness of breath increases"
    ])
    
    return f"""
SUBJECTIVE:
Chief Complaint: {symptoms[0] if symptoms else 'Not specified'}
Associated Symptoms: {', '.join(symptoms[1:]) if len(symptoms) > 1 else 'None reported'}
Duration: {'2 weeks' if 'two week' in t or '2 week' in t else 'Not specified'}
Onset: {'Gradual' if 'week' in t else 'Not specified'}
Severity: Moderate
Aggravating Factors: {'Nighttime, exertion' if 'night' in t or 'breath' in t else 'None reported'}

OBJECTIVE:
Physical Exam: {', '.join(exam_findings)}
Vital Signs: Temperature 98.6Β°F, HR 72, BP 118/76, RR 16, SpO2 97% on room air
General: Alert, in no acute distress, well-appearing

ASSESSMENT:
Primary Diagnosis: {diagnosis}
Clinical Confidence: {confidence}
Differential Diagnoses: 
- Viral Upper Respiratory Infection
- Allergic Rhinitis with Post-nasal Drip
- Asthma Exacerbation
- GERD

PLAN:
{chr(10).join(plan)}

"""


def generate_clinical_note(transcript):
    """Main clinical note generation function"""
    if not transcript or len(transcript) < 20:
        return "❌ Transcription too short. Please provide a longer audio file."
    
    if transcript.startswith("❌"):
        return transcript
    
    # Use rule-based extraction (always works, no API needed)
    return generate_rule_based_note(transcript)


# MAIN PIPELINE
def process_encounter(audio):
    """Main workflow: Audio β†’ Transcription β†’ SOAP Note"""
    if audio is None:
        return "⚠️ Please upload an audio file.", ""
    
    print(f"\n{'='*60}")
    print(f"🎀 Processing: {os.path.basename(audio)}")
    print(f"πŸ“ File size: {os.path.getsize(audio)} bytes")
    
    # Step 1: Transcribe audio
    if ASSEMBLYAI_API_KEY:
        print("πŸ”‘ Using AssemblyAI for transcription...")
        transcript = transcribe_audio_assemblyai(audio)
    else:
        print("⚠️ No AssemblyAI key - using sample transcript")
        transcript = "⚠️ DEMO MODE - Add AssemblyAI API key to Secrets for live transcription\n\n"
        transcript += transcribe_audio_placeholder(audio)
    
    print(f"πŸ“ Transcript preview: {transcript[:150]}...")
    
    # Step 2: Generate clinical note
    print("πŸ“‹ Generating clinical note...")
    note = generate_clinical_note(transcript)
    
    print(f"βœ… Complete! Note length: {len(note)} chars")
    print(f"{'='*60}\n")
    
    return transcript, note

# GRADIO USER INTERFACE
demo = gr.Blocks(title="OpenScribe - Clinical AI Demo")

with demo:
    gr.Markdown("""
    # OpenScribe: AI Clinical Documentation
    
    This tool replicates the **exact architecture** used in production for automated clinical documentation:
    1. **Speech-to-Text**: AssemblyAI transcription (100 hours free tier)
    2. **NLP Processing**: Rule-based clinical entity extraction
    3. **Output**: Structured SOAP note ready for EHR integration
    ---
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            audio_input = gr.Audio(
                type="filepath",
                label="πŸ“ Upload Medical Conversation",
                sources=["upload", "microphone"]
            )
            
            run_btn = gr.Button(
                "πŸ“‹ Generate Clinical Note", 
                variant="primary", 
                size="lg"
            )
            
            gr.Markdown("""
            **Or record your own conversation:**
            *"Hi, what brings you in? - I've had this cough for two weeks. - Any fever? - No. - Let me listen... I hear wheezing. - It's bronchitis."*
            """)
        
        with gr.Column(scale=2):
            transcript_output = gr.Textbox(
                label="Transcription",
                lines=6,
                placeholder="Transcribed conversation will appear here..."
            )

            note_output = gr.Textbox(
                label="Generated SOAP Note",
                lines=20,
                placeholder="Clinical documentation will appear here..."
            )
    
    run_btn.click(
        fn=process_encounter,
        inputs=audio_input,
        outputs=[transcript_output, note_output]
    )

# LAUNCH
if __name__ == "__main__":
    demo.launch()