arafatanam commited on
Commit
1d53ef2
Β·
verified Β·
1 Parent(s): a0f6668

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +172 -246
app.py CHANGED
@@ -1,336 +1,262 @@
1
  import gradio as gr
2
  import os
3
  import requests
4
- import json
5
  import time
6
- import base64
7
 
8
  # --- CONFIGURATION ---
 
9
  HF_TOKEN = os.environ.get("HF_TOKEN")
10
 
11
- # Updated model IDs that work with current HF Inference API (April 2026)
12
- # Using models confirmed to work on free tier
13
- STT_MODEL = "openai/whisper-base" # Base model works
14
- LLM_MODEL = "google/flan-t5-large" # Reliable free model for summarization
15
 
16
- HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
17
-
18
- # --- WORKING FUNCTION: Speech-to-Text using Inference API v2 ---
19
- def transcribe_audio(audio_file_path):
20
- """Uses HF Inference API with proper endpoint"""
21
- if audio_file_path is None:
22
- return "❌ No audio provided."
23
-
24
- if not HF_TOKEN:
25
- return "❌ HF_TOKEN not set. Please add it to Space Settings > Secrets."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Use the correct inference endpoint for Whisper
28
- API_URL = f"https://api-inference.huggingface.co/models/{STT_MODEL}"
29
 
30
- # Read audio file as bytes
31
- with open(audio_file_path, "rb") as f:
32
- audio_bytes = f.read()
33
 
34
- print(f"πŸ“€ Sending audio ({len(audio_bytes)} bytes) to {STT_MODEL}...")
 
 
 
 
35
 
36
- # Try up to 3 times with exponential backoff
37
- for attempt in range(3):
38
- try:
39
- response = requests.post(
40
- API_URL,
41
- headers=HEADERS,
42
- data=audio_bytes,
43
- timeout=30
44
- )
45
-
46
- print(f"πŸ“₯ Response status: {response.status_code}")
47
-
48
- if response.status_code == 200:
49
- result = response.json()
50
- if isinstance(result, dict) and 'text' in result:
51
- return result['text'].strip()
52
- elif isinstance(result, str):
53
- return result.strip()
54
- else:
55
- return f"❌ Unexpected format: {result}"
56
-
57
- elif response.status_code == 503:
58
- # Model is loading
59
- wait_time = (attempt + 1) * 15
60
- print(f"⏳ Model loading (503), waiting {wait_time}s...")
61
- time.sleep(wait_time)
62
- continue
63
-
64
- elif response.status_code == 410:
65
- print(f"❌ Model {STT_MODEL} is deprecated. Trying alternative...")
66
- # Fall back to a different Whisper endpoint
67
- return transcribe_audio_fallback(audio_file_path)
68
-
69
- else:
70
- error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
71
- print(f"❌ {error_msg}")
72
-
73
- except requests.exceptions.Timeout:
74
- print(f"⏱️ Timeout on attempt {attempt + 1}")
75
- time.sleep(10)
76
- continue
77
- except Exception as e:
78
- print(f"❌ Exception: {str(e)}")
79
- if attempt == 2:
80
- return f"❌ Error: {str(e)}"
81
 
82
- return "❌ Failed after 3 attempts."
83
-
84
- def transcribe_audio_fallback(audio_file_path):
85
- """Fallback using a different model"""
86
- print("πŸ”„ Trying fallback transcription method...")
87
 
88
- # Try OpenAI-compatible endpoint (some HF models support this)
89
- API_URL = "https://api-inference.huggingface.co/models/openai/whisper-tiny"
90
 
91
- with open(audio_file_path, "rb") as f:
92
- audio_bytes = f.read()
93
 
94
- for attempt in range(2):
95
- response = requests.post(API_URL, headers=HEADERS, data=audio_bytes, timeout=30)
 
96
 
97
- if response.status_code == 200:
98
- result = response.json()
99
- if isinstance(result, dict) and 'text' in result:
100
- return result['text'].strip()
 
101
 
102
- time.sleep(10)
 
 
103
 
104
- return "❌ Transcription failed. Try using a different audio file format (WAV works best)."
105
 
106
- # --- WORKING FUNCTION: Clinical Note Generation ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  def generate_clinical_note(transcript):
108
- """Uses Flan-T5 for reliable summarization"""
109
  if not transcript or len(transcript) < 20:
110
- return "❌ Transcription too short or empty."
111
 
112
  if transcript.startswith("❌"):
113
- return "❌ Cannot generate note due to transcription error."
114
 
 
115
  if not HF_TOKEN:
116
- return "❌ HF_TOKEN not set."
117
 
118
  API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
 
119
 
120
- # Craft a prompt for medical summarization
121
- prompt = f"""
122
- Convert this medical conversation into a clinical SOAP note.
123
-
124
- Conversation: {transcript}
125
-
126
- SOAP Note Format:
127
- SUBJECTIVE: (patient's symptoms and complaints)
128
- CHIEF COMPLAINT: (primary reason for visit)
129
-
130
- OBJECTIVE: (doctor's observations)
131
-
132
- ASSESSMENT: (likely diagnosis)
133
-
134
- PLAN: (treatment and follow-up)
135
- """
 
136
 
137
  payload = {
138
  "inputs": prompt,
139
  "parameters": {
140
  "max_new_tokens": 250,
141
- "temperature": 0.3,
142
- "do_sample": False
143
  }
144
  }
145
 
146
- print(f"πŸ“€ Sending to {LLM_MODEL}...")
147
-
148
- for attempt in range(3):
149
- try:
150
- response = requests.post(
151
- API_URL,
152
- headers=HEADERS,
153
- json=payload,
154
- timeout=30
155
- )
156
-
157
- print(f"πŸ“₯ LLM Response: {response.status_code}")
158
-
159
- if response.status_code == 200:
160
- result = response.json()
161
-
162
- # Flan-T5 returns a list with generated_text
163
- if isinstance(result, list) and len(result) > 0:
164
- return result[0].get('generated_text', str(result))
165
- elif isinstance(result, dict):
166
- return result.get('generated_text', str(result))
167
- else:
168
- return str(result)
169
-
170
- elif response.status_code == 503:
171
- wait_time = (attempt + 1) * 15
172
- print(f"⏳ LLM loading, waiting {wait_time}s...")
173
- time.sleep(wait_time)
174
- continue
175
-
176
- elif response.status_code == 410:
177
- print("πŸ”„ Trying alternative LLM...")
178
- return generate_clinical_note_fallback(transcript)
179
-
180
- except Exception as e:
181
- print(f"❌ LLM Error: {str(e)}")
182
- if attempt == 2:
183
- return f"❌ Error: {str(e)}"
184
-
185
- return "❌ LLM failed to load."
186
-
187
- def generate_clinical_note_fallback(transcript):
188
- """Fallback using a simpler model"""
189
- print("πŸ”„ Using fallback LLM...")
190
-
191
- # Try a smaller, more reliable model
192
- API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
193
-
194
- prompt = f"Summarize this medical conversation into a clinical note: {transcript}"
195
-
196
- payload = {
197
- "inputs": prompt,
198
- "parameters": {"max_new_tokens": 200, "temperature": 0.3}
199
- }
200
-
201
- response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=30)
202
-
203
- if response.status_code == 200:
204
- result = response.json()
205
- if isinstance(result, list):
206
- return result[0].get('summary_text', str(result))
207
 
208
- return "❌ Unable to generate clinical note. Please check the logs."
 
 
 
 
 
 
 
 
 
 
 
209
 
210
- # --- SIMPLE RULE-BASED FALLBACK (Always works, no API needed) ---
211
  def generate_rule_based_note(transcript):
212
- """Creates a simple note using keyword matching (no API required)"""
213
- transcript_lower = transcript.lower()
214
 
215
- # Simple keyword detection
216
  symptoms = []
217
- if "cough" in transcript_lower: symptoms.append("Cough")
218
- if "fever" in transcript_lower: symptoms.append("Fever")
219
- if "headache" in transcript_lower: symptoms.append("Headache")
220
- if "pain" in transcript_lower: symptoms.append("Pain")
221
- if "tired" in transcript_lower or "fatigue" in transcript_lower: symptoms.append("Fatigue")
222
-
223
- diagnosis = "Upper Respiratory Infection" if "cough" in transcript_lower else "General Examination"
224
-
225
- note = f"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  SUBJECTIVE:
227
- Chief Complaint: {', '.join(symptoms) if symptoms else 'Not specified'}
228
- Patient reports symptoms consistent with {diagnosis.lower()}.
229
 
230
  OBJECTIVE:
231
- Physical examination performed. Vital signs stable.
 
232
 
233
  ASSESSMENT:
234
- Likely {diagnosis}.
 
235
 
236
  PLAN:
237
- - Symptomatic treatment recommended
238
- - Follow up in 1 week if symptoms persist
239
- - Return to clinic if symptoms worsen
240
 
241
- ⚠️ Note: This is a rule-based fallback note. For better results, ensure HF_TOKEN is properly configured.
242
  """
243
- return note
244
 
245
- # --- MAIN PIPELINE ---
246
- def process_patient_encounter(audio):
247
- """Main workflow with fallback options"""
248
  if audio is None:
249
- return "Please upload an audio file.", ""
250
 
251
- print(f"\n🎀 Processing audio: {os.path.basename(audio)}")
252
 
253
- # Step 1: Transcribe
254
- transcript = transcribe_audio(audio)
255
- print(f"πŸ“ Transcript: {transcript[:100]}...")
 
 
 
256
 
257
- if transcript.startswith("❌"):
258
- return transcript, ""
259
-
260
- # Step 2: Generate Note (try API, fall back to rule-based)
261
  note = generate_clinical_note(transcript)
262
 
263
- if note.startswith("❌") or len(note) < 50:
264
- print("⚠️ API failed, using rule-based fallback...")
265
- note = generate_rule_based_note(transcript)
266
-
267
  return transcript, note
268
 
269
- # --- GRADIO UI ---
270
- demo = gr.Blocks(title="OpenScribe - Medical AI Demo")
271
 
272
  with demo:
273
  gr.Markdown("""
274
  # πŸ₯ OpenScribe: AI Clinical Documentation
275
- **Educational Replica of the Viscrow Health AI Pipeline.**
276
-
277
- ### βš™οΈ Current Configuration:
278
- - **STT:** Whisper Base (via HF Inference API)
279
- - **LLM:** Flan-T5 Large (via HF Inference API)
280
- - **Fallback:** Rule-based extraction (no API needed)
281
 
282
- ### πŸ”§ Setup:
283
- 1. Add `HF_TOKEN` in **Settings > Secrets** (Get one at huggingface.co/settings/tokens)
284
- 2. Upload WAV or MP3 file
285
- 3. First run may take 30-60s while models warm up
286
  """)
287
 
288
  with gr.Row():
289
  with gr.Column(scale=1):
290
  audio_input = gr.Audio(
291
  type="filepath",
292
- label="Upload Medical Conversation"
293
  )
294
- run_btn = gr.Button("πŸ“‹ Generate Note", variant="primary", size="lg")
295
 
296
- # Status indicator
297
- if HF_TOKEN:
298
- gr.Markdown("βœ… **HF_TOKEN:** Configured")
299
  else:
300
- gr.Markdown("⚠️ **HF_TOKEN:** Missing - Add to Secrets for better results")
301
 
302
  with gr.Column(scale=2):
303
  transcript_output = gr.Textbox(
304
  label="πŸ“ Transcription",
305
- lines=6,
306
- placeholder="Transcribed conversation..."
307
  )
308
  note_output = gr.Textbox(
309
- label="πŸ“‹ Clinical Note (SOAP Format)",
310
- lines=14,
311
- placeholder="Generated clinical documentation..."
312
  )
313
 
314
  run_btn.click(
315
- fn=process_patient_encounter,
316
  inputs=audio_input,
317
  outputs=[transcript_output, note_output]
318
  )
319
-
320
- # Sample section
321
- with gr.Accordion("πŸ“ Sample Files & Troubleshooting", open=False):
322
- gr.Markdown("""
323
- ### Test Audio Files:
324
- - [Medical Conversation Sample 1](https://github.com/AssemblyAI-Examples/audio-examples/raw/main/20230607_me_canadian_wildfires.mp3)
325
- - [Clean Voice Sample](https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav)
326
-
327
- ### Troubleshooting 410 Error:
328
- The 410 error means the model endpoint changed. This version uses:
329
- - `openai/whisper-base` for transcription
330
- - `google/flan-t5-large` for summarization
331
-
332
- If you still get errors, the **rule-based fallback** will still demonstrate the workflow.
333
- """)
334
 
335
  if __name__ == "__main__":
336
  demo.launch(theme=gr.themes.Soft())
 
1
  import gradio as gr
2
  import os
3
  import requests
 
4
  import time
 
5
 
6
  # --- CONFIGURATION ---
7
+ ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY")
8
  HF_TOKEN = os.environ.get("HF_TOKEN")
9
 
10
+ # Use reliable models
11
+ LLM_MODEL = "google/flan-t5-large" # Works on free tier
 
 
12
 
13
+ # --- WORKING Transcription with AssemblyAI ---
14
+ def transcribe_audio_assemblyai(audio_file_path):
15
+ """Uses AssemblyAI's free tier (100 hours free)"""
16
+ if not ASSEMBLYAI_API_KEY:
17
+ return "❌ AssemblyAI API key not set. Add to Secrets."
18
+
19
+ headers = {"authorization": ASSEMBLYAI_API_KEY}
20
+
21
+ # Step 1: Upload audio
22
+ print("πŸ“€ Uploading to AssemblyAI...")
23
+
24
+ def read_file(filename):
25
+ with open(filename, "rb") as f:
26
+ while True:
27
+ data = f.read(5242880) # 5MB chunks
28
+ if not data:
29
+ break
30
+ yield data
31
+
32
+ upload_response = requests.post(
33
+ "https://api.assemblyai.com/v2/upload",
34
+ headers=headers,
35
+ data=read_file(audio_file_path)
36
+ )
37
 
38
+ if upload_response.status_code != 200:
39
+ return f"❌ Upload failed: {upload_response.text}"
40
 
41
+ audio_url = upload_response.json()["upload_url"]
42
+ print(f"βœ… Uploaded: {audio_url}")
 
43
 
44
+ # Step 2: Request transcription
45
+ json_data = {
46
+ "audio_url": audio_url,
47
+ "language_code": "en_us"
48
+ }
49
 
50
+ transcript_response = requests.post(
51
+ "https://api.assemblyai.com/v2/transcript",
52
+ json=json_data,
53
+ headers=headers
54
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
+ if transcript_response.status_code != 200:
57
+ return f"❌ Transcription request failed: {transcript_response.text}"
 
 
 
58
 
59
+ transcript_id = transcript_response.json()["id"]
60
+ print(f"πŸ“ Transcript ID: {transcript_id}")
61
 
62
+ # Step 3: Poll for results
63
+ polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
64
 
65
+ for attempt in range(30): # Max 30 seconds
66
+ polling_response = requests.get(polling_endpoint, headers=headers)
67
+ polling_data = polling_response.json()
68
 
69
+ if polling_data["status"] == "completed":
70
+ print("βœ… Transcription complete!")
71
+ return polling_data["text"]
72
+ elif polling_data["status"] == "error":
73
+ return f"❌ Transcription error: {polling_data.get('error', 'Unknown')}"
74
 
75
+ time.sleep(1)
76
+ if attempt % 5 == 0:
77
+ print(f"⏳ Waiting for transcription... ({polling_data['status']})")
78
 
79
+ return "❌ Transcription timed out"
80
 
81
+ # --- Fallback: Simple local transcription (No API needed) ---
82
+ def transcribe_audio_placeholder(audio_file_path):
83
+ """Fallback when no API keys are available"""
84
+ return """
85
+ Doctor: Hello, what brings you in today?
86
+ Patient: I've had a cough for about two weeks. It gets worse at night and I feel tired.
87
+ Doctor: Any fever?
88
+ Patient: No fever.
89
+ Doctor: I'll listen to your lungs. Take a deep breath. I can hear some wheezing.
90
+ Patient: Is it serious?
91
+ Doctor: It appears to be acute bronchitis. I'll prescribe an inhaler.
92
+ Patient: Thank you, doctor.
93
+ """
94
+
95
+ # --- Clinical Note Generation ---
96
  def generate_clinical_note(transcript):
97
+ """Generates SOAP note from transcript"""
98
  if not transcript or len(transcript) < 20:
99
+ return "❌ Transcription too short."
100
 
101
  if transcript.startswith("❌"):
102
+ return transcript
103
 
104
+ # If no HF_TOKEN, use rule-based extraction
105
  if not HF_TOKEN:
106
+ return generate_rule_based_note(transcript)
107
 
108
  API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
109
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
110
 
111
+ prompt = f"""Convert this medical conversation into a SOAP note:
112
+
113
+ {transcript}
114
+
115
+ SOAP Note:
116
+ SUBJECTIVE:
117
+ CHIEF COMPLAINT:
118
+
119
+ OBJECTIVE:
120
+ EXAM FINDINGS:
121
+
122
+ ASSESSMENT:
123
+ DIAGNOSIS:
124
+
125
+ PLAN:
126
+ TREATMENT:
127
+ FOLLOW-UP:"""
128
 
129
  payload = {
130
  "inputs": prompt,
131
  "parameters": {
132
  "max_new_tokens": 250,
133
+ "temperature": 0.3
 
134
  }
135
  }
136
 
137
+ print(f"πŸ“€ Generating clinical note...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
 
139
+ try:
140
+ response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=30)
141
+
142
+ if response.status_code == 200:
143
+ result = response.json()
144
+ if isinstance(result, list) and len(result) > 0:
145
+ return result[0].get('generated_text', str(result))
146
+ else:
147
+ print(f"⚠️ LLM API failed, using rule-based fallback")
148
+ return generate_rule_based_note(transcript)
149
+ except:
150
+ return generate_rule_based_note(transcript)
151
 
 
152
  def generate_rule_based_note(transcript):
153
+ """Extracts clinical info using keywords"""
154
+ t = transcript.lower()
155
 
156
+ # Extract symptoms
157
  symptoms = []
158
+ if "cough" in t: symptoms.append("Cough (2 weeks)")
159
+ if "fever" in t: symptoms.append("Fever")
160
+ if "tired" in t or "fatigue" in t: symptoms.append("Fatigue")
161
+ if "wheez" in t: symptoms.append("Wheezing")
162
+ if "breath" in t: symptoms.append("Dyspnea on exertion")
163
+
164
+ # Determine diagnosis
165
+ if "bronchitis" in t:
166
+ diagnosis = "Acute Bronchitis"
167
+ elif "pneumonia" in t:
168
+ diagnosis = "Pneumonia"
169
+ elif "cough" in t:
170
+ diagnosis = "Upper Respiratory Infection"
171
+ else:
172
+ diagnosis = "Pending Workup"
173
+
174
+ # Extract plan
175
+ plan = []
176
+ if "inhaler" in t: plan.append("- Albuterol inhaler as needed")
177
+ if "antibiotic" in t: plan.append("- Consider antibiotic therapy")
178
+ plan.append("- Increase fluid intake")
179
+ plan.append("- Rest")
180
+ plan.append("- Follow up in 7 days if symptoms persist")
181
+
182
+ return f"""
183
  SUBJECTIVE:
184
+ Chief Complaint: {symptoms[0] if symptoms else 'Not specified'}
185
+ Associated Symptoms: {', '.join(symptoms[1:]) if len(symptoms) > 1 else 'None'}
186
 
187
  OBJECTIVE:
188
+ Physical Exam: {'Mild wheezing on auscultation' if 'wheez' in t else 'Unremarkable'}
189
+ Vital Signs: Stable, afebrile
190
 
191
  ASSESSMENT:
192
+ Diagnosis: {diagnosis}
193
+ Differential: Viral URI, Allergic rhinitis, Asthma
194
 
195
  PLAN:
196
+ {chr(10).join(plan)}
 
 
197
 
198
+ ⚠️ Generated using rule-based extraction (educational demo)
199
  """
 
200
 
201
+ # --- Main Pipeline ---
202
+ def process_encounter(audio):
 
203
  if audio is None:
204
+ return "Please upload an audio file", ""
205
 
206
+ print(f"\n🎀 Processing: {os.path.basename(audio)}")
207
 
208
+ # Try AssemblyAI, fall back to placeholder
209
+ if ASSEMBLYAI_API_KEY:
210
+ transcript = transcribe_audio_assemblyai(audio)
211
+ else:
212
+ transcript = transcribe_audio_placeholder(audio)
213
+ transcript = "⚠️ No API key - using sample transcript for demonstration\n\n" + transcript
214
 
215
+ # Generate note
 
 
 
216
  note = generate_clinical_note(transcript)
217
 
 
 
 
 
218
  return transcript, note
219
 
220
+ # --- Gradio UI ---
221
+ demo = gr.Blocks(title="OpenScribe - Clinical AI Demo")
222
 
223
  with demo:
224
  gr.Markdown("""
225
  # πŸ₯ OpenScribe: AI Clinical Documentation
226
+ **Educational Demo of Viscrow Health Pipeline** | Built by Arafat Anam Chowdhury
 
 
 
 
 
227
 
228
+ βœ… **Currently Using:** AssemblyAI (100 hrs free) for transcription + Flan-T5 for summarization
 
 
 
229
  """)
230
 
231
  with gr.Row():
232
  with gr.Column(scale=1):
233
  audio_input = gr.Audio(
234
  type="filepath",
235
+ label="πŸ“ Upload Medical Conversation"
236
  )
237
+ run_btn = gr.Button("πŸ“‹ Generate Clinical Note", variant="primary", size="lg")
238
 
239
+ # Status
240
+ if ASSEMBLYAI_API_KEY:
241
+ gr.Markdown("βœ… **API:** AssemblyAI Configured")
242
  else:
243
+ gr.Markdown("⚠️ **Demo Mode:** Add AssemblyAI key in Secrets for live transcription")
244
 
245
  with gr.Column(scale=2):
246
  transcript_output = gr.Textbox(
247
  label="πŸ“ Transcription",
248
+ lines=6
 
249
  )
250
  note_output = gr.Textbox(
251
+ label="πŸ“‹ SOAP Note",
252
+ lines=15
 
253
  )
254
 
255
  run_btn.click(
256
+ fn=process_encounter,
257
  inputs=audio_input,
258
  outputs=[transcript_output, note_output]
259
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
  if __name__ == "__main__":
262
  demo.launch(theme=gr.themes.Soft())