Spaces:

arafatanam
/

Clinical-Scribe

Sleeping

App Files Files Community

arafatanam commited on Apr 9

Commit

bd13cbe

verified ·

1 Parent(s): 987baac

Update app.py

Browse files

Files changed (1) hide show

app.py +139 -85

app.py CHANGED Viewed

@@ -2,148 +2,200 @@ import gradio as gr
 import os
 import requests
 import json
-# 1. Speech-to-Text Integration
-# 2. LLM Summarization Pipeline
-# 3. Structured Clinical Output
-# Use your Hugging Face API Token (Free)
-HF_TOKEN = os.environ.get("HF_TOKEN") # Set this in Space Settings > Secrets
-# Model endpoints (Free inference API)
-STT_MODEL = "openai/whisper-large-v3-turbo"
-LLM_MODEL = "meta-llama/Meta-Llama-3-8B-Instruct"
-HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
-# --- FUNCTION 1: Speech-to-Text ---
 def transcribe_audio(audio_file_path):
-    """
-    Takes audio file path, sends to free Whisper endpoint.
-    Returns text transcription.
-    """
     if audio_file_path is None:
-        return "No audio provided."
-    # Query the free HF Inference API
     API_URL = f"https://api-inference.huggingface.co/models/{STT_MODEL}"
     with open(audio_file_path, "rb") as f:
         data = f.read()
-    response = requests.post(API_URL, headers=HEADERS, data=data)
-    result = response.json()
-    # Handle response structure (Whisper returns {'text': '...'})
-    if 'text' in result:
-        return result['text']
-    elif 'error' in result:
-        return f"Error: {result['error']}. Model might be loading. Try again in 30s."
-    else:
-        return "Transcription failed."
-# --- FUNCTION 2: Clinical Note Generation ---
 def generate_clinical_note(transcript):
-    """
-    Takes raw transcript and prompts Llama 3 to create a SOAP note.
-    This is the exact same logic used in the Viscrow tool.
-    """
     if not transcript or len(transcript) < 20:
-        return "Transcription too short or empty."
-    # This prompt engineering is the CORE SKILL you demonstrated at Viscrow.
-    # It structures the output exactly like a clinician expects.
     prompt = f"""
-    <|begin_of_text|><|start_header_id|>system<|end_header_id|>
-    You are an AI Medical Scribe. Convert the following doctor-patient conversation into a structured SOAP Note.
-    Focus on medical entities, symptoms, and plan. If information is missing, state "Not mentioned".
-    <|eot_id|><|start_header_id|>user<|end_header_id|>
     Conversation:
     {transcript}
-    Generate the SOAP Note:
     SUBJECTIVE:
     CHIEF COMPLAINT:
-    HISTORY OF PRESENT ILLNESS:
     OBJECTIVE:
-    VITALS:
-    PHYSICAL EXAM:
     ASSESSMENT:
-    DIAGNOSIS/IMPRESSION:
     PLAN:
-    TREATMENT:
-    FOLLOW-UP:
-    <|eot_id|><|start_header_id|>assistant<|end_header_id|>
     """
     API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
     payload = {
         "inputs": prompt,
         "parameters": {
-            "max_new_tokens": 512,
-            "temperature": 0.2, # Low temp for factual accuracy (reduces errors as per your CV)
             "return_full_text": False
         }
     }
-    response = requests.post(API_URL, headers=HEADERS, json=payload)
-    result = response.json()
-    if isinstance(result, list) and 'generated_text' in result[0]:
-        return result[0]['generated_text'].strip()
-    elif 'error' in result:
-        return f"LLM Error: {result['error']}"
-    else:
-        return "Failed to generate note."
-# --- FUNCTION 3: The Full Pipeline ---
 def process_patient_encounter(audio):
-    """
-    The main workflow that ties it all together.
-    This is exactly what you built at Viscrow but using open-source tools.
-    """
     if audio is None:
         return "Please upload an audio file.", ""
-    # Step 1: Transcribe (Like Viscrow integration)
-    transcription_status = "Transcribing audio with Whisper..."
-    transcript_text = transcribe_audio(audio)
-    # Step 2: Summarize (Like Viscrow LLM pipeline)
-    if "Error" in transcript_text or "failed" in transcript_text:
-        return transcript_text, ""
-    note_status = "Generating SOAP Note with Llama 3..."
-    clinical_note = generate_clinical_note(transcript_text)
-    return transcript_text, clinical_note
-# --- GRADIO UI ---
-with gr.Blocks(theme=gr.themes.Soft(), title="OpenScribe - Medical AI Demo") as demo:
     gr.Markdown("""
     # 🏥 OpenScribe: AI Clinical Documentation
     **Educational Replica of the Viscrow Health AI Pipeline.**
     *Built by Arafat Anam Chowdhury*
-    This tool demonstrates:
-    - Integration of Speech-to-Text (Whisper)
-    - LLM Summarization for Clinical Notes (Llama 3)
-    - Reduction of AI errors via prompt engineering.
-    **⚠️ Disclaimer:** *This is a portfolio demo. Not for real clinical use.*
     """)
     with gr.Row():
         with gr.Column(scale=1):
-            audio_input = gr.Audio(type="filepath", label="Upload Doctor-Patient Conversation", sources=["upload", "microphone"])
             run_btn = gr.Button("📋 Generate Clinical Note", variant="primary", size="lg")
         with gr.Column(scale=2):
-            transcript_output = gr.Textbox(label="1. Raw Transcription", lines=5, placeholder="Text will appear here...")
-            note_output = gr.Textbox(label="2. Generated SOAP Note (Llama 3 8B)", lines=15, placeholder="Structured note will appear here...")
     run_btn.click(
         fn=process_patient_encounter,
@@ -153,10 +205,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="OpenScribe - Medical AI Demo") as
     gr.Markdown("""
     ---
-    **Technical Explanation (For Recruiters/Interviewers):**
-    - **Replication of Viscrow Task:** This uses the exact same architecture I built for the clinician notes tool, just swapped proprietary APIs for open-source Hugging Face models.
-    - **Error Reduction:** I used a low `temperature` setting (0.2) and strict system prompting to minimize hallucinations, mirroring my work on "evaluating model outputs and reducing common errors."
     """)
 if __name__ == "__main__":
-    demo.launch()

 import os
 import requests
 import json
+import time
+# --- CONFIGURATION ---
+HF_TOKEN = os.environ.get("HF_TOKEN")
+# Using smaller, faster models that work reliably on free tier
+STT_MODEL = "openai/whisper-small"  # Smaller = faster cold start
+LLM_MODEL = "microsoft/Phi-3-mini-4k-instruct"  # More reliable on free tier than Llama
+HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
+# --- DEBUGGED FUNCTION: Speech-to-Text ---
 def transcribe_audio(audio_file_path):
+    """Takes audio file, sends to Whisper, handles errors gracefully"""
     if audio_file_path is None:
+        return "❌ No audio provided."
+    if not HF_TOKEN:
+        return "❌ HF_TOKEN not set. Please add it to Space Settings > Secrets."
     API_URL = f"https://api-inference.huggingface.co/models/{STT_MODEL}"
+    # Read audio file
     with open(audio_file_path, "rb") as f:
         data = f.read()
+    print(f"📤 Sending audio to {STT_MODEL}...")
+    # Try up to 3 times (model might be loading)
+    for attempt in range(3):
+        response = requests.post(API_URL, headers=HEADERS, data=data)
+        print(f"📥 Response status: {response.status_code}")
+        print(f"📥 Response text: {response.text[:200]}...")  # First 200 chars
+        # Check if model is loading
+        if "loading" in response.text.lower():
+            wait_time = (attempt + 1) * 10
+            print(f"⏳ Model loading, waiting {wait_time}s...")
+            time.sleep(wait_time)
+            continue
+        # Try to parse JSON
+        try:
+            result = response.json()
+            if isinstance(result, dict) and 'text' in result:
+                return result['text']
+            elif isinstance(result, dict) and 'error' in result:
+                return f"❌ API Error: {result['error']}"
+            else:
+                return f"❌ Unexpected response format: {result}"
+        except json.JSONDecodeError:
+            if attempt == 2:
+                return f"❌ Failed to parse response. Raw text: {response.text[:500]}"
+    return "❌ Model failed to load after 3 attempts. Try a smaller audio file or check HF_TOKEN."
+# --- DEBUGGED FUNCTION: Clinical Note Generation ---
 def generate_clinical_note(transcript):
+    """Takes transcript, returns structured SOAP note"""
     if not transcript or len(transcript) < 20:
+        return "❌ Transcription too short or empty."
+    if transcript.startswith("❌"):
+        return "❌ Cannot generate note due to transcription error."
+    if not HF_TOKEN:
+        return "❌ HF_TOKEN not set."
+    # Simpler prompt for Phi-3
     prompt = f"""
+    You are a medical scribe. Convert this conversation into a SOAP note format.
     Conversation:
     {transcript}
+    Generate:
     SUBJECTIVE:
     CHIEF COMPLAINT:
+    HISTORY:
     OBJECTIVE:
+    EXAM FINDINGS:
     ASSESSMENT:
     PLAN:
     """
     API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
     payload = {
         "inputs": prompt,
         "parameters": {
+            "max_new_tokens": 300,
+            "temperature": 0.3,
             "return_full_text": False
         }
     }
+    print(f"📤 Sending transcript to {LLM_MODEL}...")
+    for attempt in range(3):
+        response = requests.post(API_URL, headers=HEADERS, json=payload)
+        print(f"📥 LLM Response status: {response.status_code}")
+        if "loading" in response.text.lower():
+            wait_time = (attempt + 1) * 10
+            print(f"⏳ LLM loading, waiting {wait_time}s...")
+            time.sleep(wait_time)
+            continue
+        try:
+            result = response.json()
+            if isinstance(result, list) and len(result) > 0:
+                if 'generated_text' in result[0]:
+                    return result[0]['generated_text'].strip()
+            elif isinstance(result, dict) and 'generated_text' in result:
+                return result['generated_text'].strip()
+            elif isinstance(result, dict) and 'error' in result:
+                return f"❌ LLM Error: {result['error']}"
+            else:
+                return f"❌ Unexpected LLM response: {result}"
+        except json.JSONDecodeError:
+            if attempt == 2:
+                return f"❌ Failed to parse LLM response. Raw: {response.text[:300]}"
+    return "❌ LLM failed to load."
+# --- MAIN PIPELINE ---
 def process_patient_encounter(audio):
+    """Main workflow"""
     if audio is None:
         return "Please upload an audio file.", ""
+    print(f"\n🎤 Processing audio: {audio}")
+    # Step 1: Transcribe
+    transcript = transcribe_audio(audio)
+    print(f"📝 Transcript length: {len(transcript)} chars")
+    # Step 2: Generate Note
+    if transcript.startswith("❌"):
+        return transcript, ""
+    note = generate_clinical_note(transcript)
+    return transcript, note
+# --- GRADIO UI (Fixed for Gradio 6.0) ---
+demo = gr.Blocks(title="OpenScribe - Medical AI Demo")
+with demo:
     gr.Markdown("""
     # 🏥 OpenScribe: AI Clinical Documentation
     **Educational Replica of the Viscrow Health AI Pipeline.**
     *Built by Arafat Anam Chowdhury*
+    ### 🔧 Setup Instructions:
+    1. Go to **Settings > Secrets** and add `HF_TOKEN` (get one free at huggingface.co/settings/tokens)
+    2. Upload an audio file (MP3 or WAV) of a medical conversation
+    3. Click "Generate Clinical Note"
+    **⚠️ Note:** First run may take 30-60 seconds as models warm up. Subsequent runs are faster.
     """)
     with gr.Row():
         with gr.Column(scale=1):
+            audio_input = gr.Audio(
+                type="filepath",
+                label="Upload Doctor-Patient Conversation",
+                sources=["upload", "microphone"]
+            )
             run_btn = gr.Button("📋 Generate Clinical Note", variant="primary", size="lg")
+            # Debug info
+            token_status = "✅ Token Set" if HF_TOKEN else "❌ Token Missing - Add HF_TOKEN to Secrets"
+            gr.Markdown(f"**Status:** {token_status}")
         with gr.Column(scale=2):
+            transcript_output = gr.Textbox(
+                label="1. Raw Transcription",
+                lines=5,
+                placeholder="Transcribed text will appear here..."
+            )
+            note_output = gr.Textbox(
+                label="2. Generated SOAP Note",
+                lines=15,
+                placeholder="Structured clinical note will appear here..."
+            )
     run_btn.click(
         fn=process_patient_encounter,
     gr.Markdown("""
     ---
+    ### 📋 Sample Test Audio:
+    Don't have an audio file? [Click here to download a sample](https://github.com/AssemblyAI-Examples/audio-examples/raw/main/20230607_me_canadian_wildfires.mp3)
+    ### 🔍 Debugging:
+    Check the **Logs** tab at the bottom of this page to see exactly what's happening.
     """)
 if __name__ == "__main__":
+    demo.launch(theme=gr.themes.Soft())