arafatanam commited on
Commit
bd13cbe
Β·
verified Β·
1 Parent(s): 987baac

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +139 -85
app.py CHANGED
@@ -2,148 +2,200 @@ import gradio as gr
2
  import os
3
  import requests
4
  import json
 
5
 
6
- # 1. Speech-to-Text Integration
7
- # 2. LLM Summarization Pipeline
8
- # 3. Structured Clinical Output
9
 
10
- # Use your Hugging Face API Token (Free)
11
- HF_TOKEN = os.environ.get("HF_TOKEN") # Set this in Space Settings > Secrets
 
12
 
13
- # Model endpoints (Free inference API)
14
- STT_MODEL = "openai/whisper-large-v3-turbo"
15
- LLM_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
16
 
17
- HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
18
-
19
- # --- FUNCTION 1: Speech-to-Text ---
20
  def transcribe_audio(audio_file_path):
21
- """
22
- Takes audio file path, sends to free Whisper endpoint.
23
- Returns text transcription.
24
- """
25
  if audio_file_path is None:
26
- return "No audio provided."
 
 
 
27
 
28
- # Query the free HF Inference API
29
  API_URL = f"https://api-inference.huggingface.co/models/{STT_MODEL}"
30
 
 
31
  with open(audio_file_path, "rb") as f:
32
  data = f.read()
33
 
34
- response = requests.post(API_URL, headers=HEADERS, data=data)
35
- result = response.json()
36
 
37
- # Handle response structure (Whisper returns {'text': '...'})
38
- if 'text' in result:
39
- return result['text']
40
- elif 'error' in result:
41
- return f"Error: {result['error']}. Model might be loading. Try again in 30s."
42
- else:
43
- return "Transcription failed."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # --- FUNCTION 2: Clinical Note Generation ---
46
  def generate_clinical_note(transcript):
47
- """
48
- Takes raw transcript and prompts Llama 3 to create a SOAP note.
49
- This is the exact same logic used in the Viscrow tool.
50
- """
51
  if not transcript or len(transcript) < 20:
52
- return "Transcription too short or empty."
 
 
 
 
 
 
53
 
54
- # This prompt engineering is the CORE SKILL you demonstrated at Viscrow.
55
- # It structures the output exactly like a clinician expects.
56
  prompt = f"""
57
- <|begin_of_text|><|start_header_id|>system<|end_header_id|>
58
- You are an AI Medical Scribe. Convert the following doctor-patient conversation into a structured SOAP Note.
59
- Focus on medical entities, symptoms, and plan. If information is missing, state "Not mentioned".
60
- <|eot_id|><|start_header_id|>user<|end_header_id|>
61
  Conversation:
62
  {transcript}
63
 
64
- Generate the SOAP Note:
65
  SUBJECTIVE:
66
  CHIEF COMPLAINT:
67
- HISTORY OF PRESENT ILLNESS:
68
 
69
  OBJECTIVE:
70
- VITALS:
71
- PHYSICAL EXAM:
72
 
73
  ASSESSMENT:
74
- DIAGNOSIS/IMPRESSION:
75
 
76
  PLAN:
77
- TREATMENT:
78
- FOLLOW-UP:
79
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>
80
  """
81
 
82
  API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
83
  payload = {
84
  "inputs": prompt,
85
  "parameters": {
86
- "max_new_tokens": 512,
87
- "temperature": 0.2, # Low temp for factual accuracy (reduces errors as per your CV)
88
  "return_full_text": False
89
  }
90
  }
91
 
92
- response = requests.post(API_URL, headers=HEADERS, json=payload)
93
- result = response.json()
94
 
95
- if isinstance(result, list) and 'generated_text' in result[0]:
96
- return result[0]['generated_text'].strip()
97
- elif 'error' in result:
98
- return f"LLM Error: {result['error']}"
99
- else:
100
- return "Failed to generate note."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
 
102
- # --- FUNCTION 3: The Full Pipeline ---
103
  def process_patient_encounter(audio):
104
- """
105
- The main workflow that ties it all together.
106
- This is exactly what you built at Viscrow but using open-source tools.
107
- """
108
  if audio is None:
109
  return "Please upload an audio file.", ""
110
 
111
- # Step 1: Transcribe (Like Viscrow integration)
112
- transcription_status = "Transcribing audio with Whisper..."
113
- transcript_text = transcribe_audio(audio)
114
 
115
- # Step 2: Summarize (Like Viscrow LLM pipeline)
116
- if "Error" in transcript_text or "failed" in transcript_text:
117
- return transcript_text, ""
118
-
119
- note_status = "Generating SOAP Note with Llama 3..."
120
- clinical_note = generate_clinical_note(transcript_text)
 
 
 
121
 
122
- return transcript_text, clinical_note
123
 
124
- # --- GRADIO UI ---
125
- with gr.Blocks(theme=gr.themes.Soft(), title="OpenScribe - Medical AI Demo") as demo:
 
 
126
  gr.Markdown("""
127
  # πŸ₯ OpenScribe: AI Clinical Documentation
128
  **Educational Replica of the Viscrow Health AI Pipeline.**
129
  *Built by Arafat Anam Chowdhury*
130
 
131
- This tool demonstrates:
132
- - Integration of Speech-to-Text (Whisper)
133
- - LLM Summarization for Clinical Notes (Llama 3)
134
- - Reduction of AI errors via prompt engineering.
135
 
136
- **⚠️ Disclaimer:** *This is a portfolio demo. Not for real clinical use.*
137
  """)
138
 
139
  with gr.Row():
140
  with gr.Column(scale=1):
141
- audio_input = gr.Audio(type="filepath", label="Upload Doctor-Patient Conversation", sources=["upload", "microphone"])
 
 
 
 
142
  run_btn = gr.Button("πŸ“‹ Generate Clinical Note", variant="primary", size="lg")
 
 
 
 
143
 
144
  with gr.Column(scale=2):
145
- transcript_output = gr.Textbox(label="1. Raw Transcription", lines=5, placeholder="Text will appear here...")
146
- note_output = gr.Textbox(label="2. Generated SOAP Note (Llama 3 8B)", lines=15, placeholder="Structured note will appear here...")
 
 
 
 
 
 
 
 
147
 
148
  run_btn.click(
149
  fn=process_patient_encounter,
@@ -153,10 +205,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="OpenScribe - Medical AI Demo") as
153
 
154
  gr.Markdown("""
155
  ---
156
- **Technical Explanation (For Recruiters/Interviewers):**
157
- - **Replication of Viscrow Task:** This uses the exact same architecture I built for the clinician notes tool, just swapped proprietary APIs for open-source Hugging Face models.
158
- - **Error Reduction:** I used a low `temperature` setting (0.2) and strict system prompting to minimize hallucinations, mirroring my work on "evaluating model outputs and reducing common errors."
 
 
159
  """)
160
 
161
  if __name__ == "__main__":
162
- demo.launch()
 
2
  import os
3
  import requests
4
  import json
5
+ import time
6
 
7
+ # --- CONFIGURATION ---
8
+ HF_TOKEN = os.environ.get("HF_TOKEN")
 
9
 
10
+ # Using smaller, faster models that work reliably on free tier
11
+ STT_MODEL = "openai/whisper-small" # Smaller = faster cold start
12
+ LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct" # More reliable on free tier than Llama
13
 
14
+ HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 
 
15
 
16
+ # --- DEBUGGED FUNCTION: Speech-to-Text ---
 
 
17
  def transcribe_audio(audio_file_path):
18
+ """Takes audio file, sends to Whisper, handles errors gracefully"""
 
 
 
19
  if audio_file_path is None:
20
+ return "❌ No audio provided."
21
+
22
+ if not HF_TOKEN:
23
+ return "❌ HF_TOKEN not set. Please add it to Space Settings > Secrets."
24
 
 
25
  API_URL = f"https://api-inference.huggingface.co/models/{STT_MODEL}"
26
 
27
+ # Read audio file
28
  with open(audio_file_path, "rb") as f:
29
  data = f.read()
30
 
31
+ print(f"πŸ“€ Sending audio to {STT_MODEL}...")
 
32
 
33
+ # Try up to 3 times (model might be loading)
34
+ for attempt in range(3):
35
+ response = requests.post(API_URL, headers=HEADERS, data=data)
36
+
37
+ print(f"πŸ“₯ Response status: {response.status_code}")
38
+ print(f"πŸ“₯ Response text: {response.text[:200]}...") # First 200 chars
39
+
40
+ # Check if model is loading
41
+ if "loading" in response.text.lower():
42
+ wait_time = (attempt + 1) * 10
43
+ print(f"⏳ Model loading, waiting {wait_time}s...")
44
+ time.sleep(wait_time)
45
+ continue
46
+
47
+ # Try to parse JSON
48
+ try:
49
+ result = response.json()
50
+
51
+ if isinstance(result, dict) and 'text' in result:
52
+ return result['text']
53
+ elif isinstance(result, dict) and 'error' in result:
54
+ return f"❌ API Error: {result['error']}"
55
+ else:
56
+ return f"❌ Unexpected response format: {result}"
57
+
58
+ except json.JSONDecodeError:
59
+ if attempt == 2:
60
+ return f"❌ Failed to parse response. Raw text: {response.text[:500]}"
61
+
62
+ return "❌ Model failed to load after 3 attempts. Try a smaller audio file or check HF_TOKEN."
63
 
64
+ # --- DEBUGGED FUNCTION: Clinical Note Generation ---
65
  def generate_clinical_note(transcript):
66
+ """Takes transcript, returns structured SOAP note"""
 
 
 
67
  if not transcript or len(transcript) < 20:
68
+ return "❌ Transcription too short or empty."
69
+
70
+ if transcript.startswith("❌"):
71
+ return "❌ Cannot generate note due to transcription error."
72
+
73
+ if not HF_TOKEN:
74
+ return "❌ HF_TOKEN not set."
75
 
76
+ # Simpler prompt for Phi-3
 
77
  prompt = f"""
78
+ You are a medical scribe. Convert this conversation into a SOAP note format.
79
+
 
 
80
  Conversation:
81
  {transcript}
82
 
83
+ Generate:
84
  SUBJECTIVE:
85
  CHIEF COMPLAINT:
86
+ HISTORY:
87
 
88
  OBJECTIVE:
89
+ EXAM FINDINGS:
 
90
 
91
  ASSESSMENT:
 
92
 
93
  PLAN:
 
 
 
94
  """
95
 
96
  API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
97
  payload = {
98
  "inputs": prompt,
99
  "parameters": {
100
+ "max_new_tokens": 300,
101
+ "temperature": 0.3,
102
  "return_full_text": False
103
  }
104
  }
105
 
106
+ print(f"πŸ“€ Sending transcript to {LLM_MODEL}...")
 
107
 
108
+ for attempt in range(3):
109
+ response = requests.post(API_URL, headers=HEADERS, json=payload)
110
+
111
+ print(f"πŸ“₯ LLM Response status: {response.status_code}")
112
+
113
+ if "loading" in response.text.lower():
114
+ wait_time = (attempt + 1) * 10
115
+ print(f"⏳ LLM loading, waiting {wait_time}s...")
116
+ time.sleep(wait_time)
117
+ continue
118
+
119
+ try:
120
+ result = response.json()
121
+
122
+ if isinstance(result, list) and len(result) > 0:
123
+ if 'generated_text' in result[0]:
124
+ return result[0]['generated_text'].strip()
125
+ elif isinstance(result, dict) and 'generated_text' in result:
126
+ return result['generated_text'].strip()
127
+ elif isinstance(result, dict) and 'error' in result:
128
+ return f"❌ LLM Error: {result['error']}"
129
+ else:
130
+ return f"❌ Unexpected LLM response: {result}"
131
+
132
+ except json.JSONDecodeError:
133
+ if attempt == 2:
134
+ return f"❌ Failed to parse LLM response. Raw: {response.text[:300]}"
135
+
136
+ return "❌ LLM failed to load."
137
 
138
+ # --- MAIN PIPELINE ---
139
  def process_patient_encounter(audio):
140
+ """Main workflow"""
 
 
 
141
  if audio is None:
142
  return "Please upload an audio file.", ""
143
 
144
+ print(f"\n🎀 Processing audio: {audio}")
 
 
145
 
146
+ # Step 1: Transcribe
147
+ transcript = transcribe_audio(audio)
148
+ print(f"πŸ“ Transcript length: {len(transcript)} chars")
149
+
150
+ # Step 2: Generate Note
151
+ if transcript.startswith("❌"):
152
+ return transcript, ""
153
+
154
+ note = generate_clinical_note(transcript)
155
 
156
+ return transcript, note
157
 
158
+ # --- GRADIO UI (Fixed for Gradio 6.0) ---
159
+ demo = gr.Blocks(title="OpenScribe - Medical AI Demo")
160
+
161
+ with demo:
162
  gr.Markdown("""
163
  # πŸ₯ OpenScribe: AI Clinical Documentation
164
  **Educational Replica of the Viscrow Health AI Pipeline.**
165
  *Built by Arafat Anam Chowdhury*
166
 
167
+ ### πŸ”§ Setup Instructions:
168
+ 1. Go to **Settings > Secrets** and add `HF_TOKEN` (get one free at huggingface.co/settings/tokens)
169
+ 2. Upload an audio file (MP3 or WAV) of a medical conversation
170
+ 3. Click "Generate Clinical Note"
171
 
172
+ **⚠️ Note:** First run may take 30-60 seconds as models warm up. Subsequent runs are faster.
173
  """)
174
 
175
  with gr.Row():
176
  with gr.Column(scale=1):
177
+ audio_input = gr.Audio(
178
+ type="filepath",
179
+ label="Upload Doctor-Patient Conversation",
180
+ sources=["upload", "microphone"]
181
+ )
182
  run_btn = gr.Button("πŸ“‹ Generate Clinical Note", variant="primary", size="lg")
183
+
184
+ # Debug info
185
+ token_status = "βœ… Token Set" if HF_TOKEN else "❌ Token Missing - Add HF_TOKEN to Secrets"
186
+ gr.Markdown(f"**Status:** {token_status}")
187
 
188
  with gr.Column(scale=2):
189
+ transcript_output = gr.Textbox(
190
+ label="1. Raw Transcription",
191
+ lines=5,
192
+ placeholder="Transcribed text will appear here..."
193
+ )
194
+ note_output = gr.Textbox(
195
+ label="2. Generated SOAP Note",
196
+ lines=15,
197
+ placeholder="Structured clinical note will appear here..."
198
+ )
199
 
200
  run_btn.click(
201
  fn=process_patient_encounter,
 
205
 
206
  gr.Markdown("""
207
  ---
208
+ ### πŸ“‹ Sample Test Audio:
209
+ Don't have an audio file? [Click here to download a sample](https://github.com/AssemblyAI-Examples/audio-examples/raw/main/20230607_me_canadian_wildfires.mp3)
210
+
211
+ ### πŸ” Debugging:
212
+ Check the **Logs** tab at the bottom of this page to see exactly what's happening.
213
  """)
214
 
215
  if __name__ == "__main__":
216
+ demo.launch(theme=gr.themes.Soft())