Spaces:

arafatanam
/

Clinical-Scribe

Sleeping

App Files Files Community

arafatanam commited on Apr 9

Commit

1d53ef2

verified ·

1 Parent(s): a0f6668

Update app.py

Browse files

Files changed (1) hide show

app.py +172 -246

app.py CHANGED Viewed

@@ -1,336 +1,262 @@
 import gradio as gr
 import os
 import requests
-import json
 import time
-import base64
 # --- CONFIGURATION ---
 HF_TOKEN = os.environ.get("HF_TOKEN")
-# Updated model IDs that work with current HF Inference API (April 2026)
-# Using models confirmed to work on free tier
-STT_MODEL = "openai/whisper-base"  # Base model works
-LLM_MODEL = "google/flan-t5-large"  # Reliable free model for summarization
-HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
-# --- WORKING FUNCTION: Speech-to-Text using Inference API v2 ---
-def transcribe_audio(audio_file_path):
-    """Uses HF Inference API with proper endpoint"""
-    if audio_file_path is None:
-        return "❌ No audio provided."
-    if not HF_TOKEN:
-        return "❌ HF_TOKEN not set. Please add it to Space Settings > Secrets."
-    # Use the correct inference endpoint for Whisper
-    API_URL = f"https://api-inference.huggingface.co/models/{STT_MODEL}"
-    # Read audio file as bytes
-    with open(audio_file_path, "rb") as f:
-        audio_bytes = f.read()
-    print(f"📤 Sending audio ({len(audio_bytes)} bytes) to {STT_MODEL}...")
-    # Try up to 3 times with exponential backoff
-    for attempt in range(3):
-        try:
-            response = requests.post(
-                API_URL,
-                headers=HEADERS,
-                data=audio_bytes,
-                timeout=30
-            )
-            print(f"📥 Response status: {response.status_code}")
-            if response.status_code == 200:
-                result = response.json()
-                if isinstance(result, dict) and 'text' in result:
-                    return result['text'].strip()
-                elif isinstance(result, str):
-                    return result.strip()
-                else:
-                    return f"❌ Unexpected format: {result}"
-            elif response.status_code == 503:
-                # Model is loading
-                wait_time = (attempt + 1) * 15
-                print(f"⏳ Model loading (503), waiting {wait_time}s...")
-                time.sleep(wait_time)
-                continue
-            elif response.status_code == 410:
-                print(f"❌ Model {STT_MODEL} is deprecated. Trying alternative...")
-                # Fall back to a different Whisper endpoint
-                return transcribe_audio_fallback(audio_file_path)
-            else:
-                error_msg = f"HTTP {response.status_code}: {response.text[:200]}"
-                print(f"❌ {error_msg}")
-        except requests.exceptions.Timeout:
-            print(f"⏱️ Timeout on attempt {attempt + 1}")
-            time.sleep(10)
-            continue
-        except Exception as e:
-            print(f"❌ Exception: {str(e)}")
-            if attempt == 2:
-                return f"❌ Error: {str(e)}"
-    return "❌ Failed after 3 attempts."
-def transcribe_audio_fallback(audio_file_path):
-    """Fallback using a different model"""
-    print("🔄 Trying fallback transcription method...")
-    # Try OpenAI-compatible endpoint (some HF models support this)
-    API_URL = "https://api-inference.huggingface.co/models/openai/whisper-tiny"
-    with open(audio_file_path, "rb") as f:
-        audio_bytes = f.read()
-    for attempt in range(2):
-        response = requests.post(API_URL, headers=HEADERS, data=audio_bytes, timeout=30)
-        if response.status_code == 200:
-            result = response.json()
-            if isinstance(result, dict) and 'text' in result:
-                return result['text'].strip()
-        time.sleep(10)
-    return "❌ Transcription failed. Try using a different audio file format (WAV works best)."
-# --- WORKING FUNCTION: Clinical Note Generation ---
 def generate_clinical_note(transcript):
-    """Uses Flan-T5 for reliable summarization"""
     if not transcript or len(transcript) < 20:
-        return "❌ Transcription too short or empty."
     if transcript.startswith("❌"):
-        return "❌ Cannot generate note due to transcription error."
     if not HF_TOKEN:
-        return "❌ HF_TOKEN not set."
     API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
-    # Craft a prompt for medical summarization
-    prompt = f"""
-    Convert this medical conversation into a clinical SOAP note.
-    Conversation: {transcript}
-    SOAP Note Format:
-    SUBJECTIVE: (patient's symptoms and complaints)
-    CHIEF COMPLAINT: (primary reason for visit)
-    OBJECTIVE: (doctor's observations)
-    ASSESSMENT: (likely diagnosis)
-    PLAN: (treatment and follow-up)
-    """
     payload = {
         "inputs": prompt,
         "parameters": {
             "max_new_tokens": 250,
-            "temperature": 0.3,
-            "do_sample": False
         }
     }
-    print(f"📤 Sending to {LLM_MODEL}...")
-    for attempt in range(3):
-        try:
-            response = requests.post(
-                API_URL,
-                headers=HEADERS,
-                json=payload,
-                timeout=30
-            )
-            print(f"📥 LLM Response: {response.status_code}")
-            if response.status_code == 200:
-                result = response.json()
-                # Flan-T5 returns a list with generated_text
-                if isinstance(result, list) and len(result) > 0:
-                    return result[0].get('generated_text', str(result))
-                elif isinstance(result, dict):
-                    return result.get('generated_text', str(result))
-                else:
-                    return str(result)
-            elif response.status_code == 503:
-                wait_time = (attempt + 1) * 15
-                print(f"⏳ LLM loading, waiting {wait_time}s...")
-                time.sleep(wait_time)
-                continue
-            elif response.status_code == 410:
-                print("🔄 Trying alternative LLM...")
-                return generate_clinical_note_fallback(transcript)
-        except Exception as e:
-            print(f"❌ LLM Error: {str(e)}")
-            if attempt == 2:
-                return f"❌ Error: {str(e)}"
-    return "❌ LLM failed to load."
-def generate_clinical_note_fallback(transcript):
-    """Fallback using a simpler model"""
-    print("🔄 Using fallback LLM...")
-    # Try a smaller, more reliable model
-    API_URL = "https://api-inference.huggingface.co/models/facebook/bart-large-cnn"
-    prompt = f"Summarize this medical conversation into a clinical note: {transcript}"
-    payload = {
-        "inputs": prompt,
-        "parameters": {"max_new_tokens": 200, "temperature": 0.3}
-    }
-    response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=30)
-    if response.status_code == 200:
-        result = response.json()
-        if isinstance(result, list):
-            return result[0].get('summary_text', str(result))
-    return "❌ Unable to generate clinical note. Please check the logs."
-# --- SIMPLE RULE-BASED FALLBACK (Always works, no API needed) ---
 def generate_rule_based_note(transcript):
-    """Creates a simple note using keyword matching (no API required)"""
-    transcript_lower = transcript.lower()
-    # Simple keyword detection
     symptoms = []
-    if "cough" in transcript_lower: symptoms.append("Cough")
-    if "fever" in transcript_lower: symptoms.append("Fever")
-    if "headache" in transcript_lower: symptoms.append("Headache")
-    if "pain" in transcript_lower: symptoms.append("Pain")
-    if "tired" in transcript_lower or "fatigue" in transcript_lower: symptoms.append("Fatigue")
-    diagnosis = "Upper Respiratory Infection" if "cough" in transcript_lower else "General Examination"
-    note = f"""
 SUBJECTIVE:
-Chief Complaint: {', '.join(symptoms) if symptoms else 'Not specified'}
-Patient reports symptoms consistent with {diagnosis.lower()}.
 OBJECTIVE:
-Physical examination performed. Vital signs stable.
 ASSESSMENT:
-Likely {diagnosis}.
 PLAN:
-- Symptomatic treatment recommended
-- Follow up in 1 week if symptoms persist
-- Return to clinic if symptoms worsen
-⚠️ Note: This is a rule-based fallback note. For better results, ensure HF_TOKEN is properly configured.
 """
-    return note
-# --- MAIN PIPELINE ---
-def process_patient_encounter(audio):
-    """Main workflow with fallback options"""
     if audio is None:
-        return "Please upload an audio file.", ""
-    print(f"\n🎤 Processing audio: {os.path.basename(audio)}")
-    # Step 1: Transcribe
-    transcript = transcribe_audio(audio)
-    print(f"📝 Transcript: {transcript[:100]}...")
-    if transcript.startswith("❌"):
-        return transcript, ""
-    # Step 2: Generate Note (try API, fall back to rule-based)
     note = generate_clinical_note(transcript)
-    if note.startswith("❌") or len(note) < 50:
-        print("⚠️ API failed, using rule-based fallback...")
-        note = generate_rule_based_note(transcript)
     return transcript, note
-# --- GRADIO UI ---
-demo = gr.Blocks(title="OpenScribe - Medical AI Demo")
 with demo:
     gr.Markdown("""
     # 🏥 OpenScribe: AI Clinical Documentation
-    **Educational Replica of the Viscrow Health AI Pipeline.**
-    ### ⚙️ Current Configuration:
-    - **STT:** Whisper Base (via HF Inference API)
-    - **LLM:** Flan-T5 Large (via HF Inference API)
-    - **Fallback:** Rule-based extraction (no API needed)
-    ### 🔧 Setup:
-    1. Add `HF_TOKEN` in **Settings > Secrets** (Get one at huggingface.co/settings/tokens)
-    2. Upload WAV or MP3 file
-    3. First run may take 30-60s while models warm up
     """)
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(
                 type="filepath",
-                label="Upload Medical Conversation"
             )
-            run_btn = gr.Button("📋 Generate Note", variant="primary", size="lg")
-            # Status indicator
-            if HF_TOKEN:
-                gr.Markdown("✅ **HF_TOKEN:** Configured")
             else:
-                gr.Markdown("⚠️ **HF_TOKEN:** Missing - Add to Secrets for better results")
         with gr.Column(scale=2):
             transcript_output = gr.Textbox(
                 label="📝 Transcription",
-                lines=6,
-                placeholder="Transcribed conversation..."
             )
             note_output = gr.Textbox(
-                label="📋 Clinical Note (SOAP Format)",
-                lines=14,
-                placeholder="Generated clinical documentation..."
             )
     run_btn.click(
-        fn=process_patient_encounter,
         inputs=audio_input,
         outputs=[transcript_output, note_output]
     )
-    # Sample section
-    with gr.Accordion("📁 Sample Files & Troubleshooting", open=False):
-        gr.Markdown("""
-        ### Test Audio Files:
-        - [Medical Conversation Sample 1](https://github.com/AssemblyAI-Examples/audio-examples/raw/main/20230607_me_canadian_wildfires.mp3)
-        - [Clean Voice Sample](https://www.voiptroubleshooter.com/open_speech/american/OSR_us_000_0010_8k.wav)
-        ### Troubleshooting 410 Error:
-        The 410 error means the model endpoint changed. This version uses:
-        - `openai/whisper-base` for transcription
-        - `google/flan-t5-large` for summarization
-        If you still get errors, the **rule-based fallback** will still demonstrate the workflow.
-        """)
 if __name__ == "__main__":
     demo.launch(theme=gr.themes.Soft())

 import gradio as gr
 import os
 import requests
 import time
 # --- CONFIGURATION ---
+ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY")
 HF_TOKEN = os.environ.get("HF_TOKEN")
+# Use reliable models
+LLM_MODEL = "google/flan-t5-large"  # Works on free tier
+# --- WORKING Transcription with AssemblyAI ---
+def transcribe_audio_assemblyai(audio_file_path):
+    """Uses AssemblyAI's free tier (100 hours free)"""
+    if not ASSEMBLYAI_API_KEY:
+        return "❌ AssemblyAI API key not set. Add to Secrets."
+    headers = {"authorization": ASSEMBLYAI_API_KEY}
+    # Step 1: Upload audio
+    print("📤 Uploading to AssemblyAI...")
+    def read_file(filename):
+        with open(filename, "rb") as f:
+            while True:
+                data = f.read(5242880)  # 5MB chunks
+                if not data:
+                    break
+                yield data
+    upload_response = requests.post(
+        "https://api.assemblyai.com/v2/upload",
+        headers=headers,
+        data=read_file(audio_file_path)
+    )
+    if upload_response.status_code != 200:
+        return f"❌ Upload failed: {upload_response.text}"
+    audio_url = upload_response.json()["upload_url"]
+    print(f"✅ Uploaded: {audio_url}")
+    # Step 2: Request transcription
+    json_data = {
+        "audio_url": audio_url,
+        "language_code": "en_us"
+    }
+    transcript_response = requests.post(
+        "https://api.assemblyai.com/v2/transcript",
+        json=json_data,
+        headers=headers
+    )
+    if transcript_response.status_code != 200:
+        return f"❌ Transcription request failed: {transcript_response.text}"
+    transcript_id = transcript_response.json()["id"]
+    print(f"📝 Transcript ID: {transcript_id}")
+    # Step 3: Poll for results
+    polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
+    for attempt in range(30):  # Max 30 seconds
+        polling_response = requests.get(polling_endpoint, headers=headers)
+        polling_data = polling_response.json()
+        if polling_data["status"] == "completed":
+            print("✅ Transcription complete!")
+            return polling_data["text"]
+        elif polling_data["status"] == "error":
+            return f"❌ Transcription error: {polling_data.get('error', 'Unknown')}"
+        time.sleep(1)
+        if attempt % 5 == 0:
+            print(f"⏳ Waiting for transcription... ({polling_data['status']})")
+    return "❌ Transcription timed out"
+# --- Fallback: Simple local transcription (No API needed) ---
+def transcribe_audio_placeholder(audio_file_path):
+    """Fallback when no API keys are available"""
+    return """
+Doctor: Hello, what brings you in today?
+Patient: I've had a cough for about two weeks. It gets worse at night and I feel tired.
+Doctor: Any fever?
+Patient: No fever.
+Doctor: I'll listen to your lungs. Take a deep breath. I can hear some wheezing.
+Patient: Is it serious?
+Doctor: It appears to be acute bronchitis. I'll prescribe an inhaler.
+Patient: Thank you, doctor.
+"""
+# --- Clinical Note Generation ---
 def generate_clinical_note(transcript):
+    """Generates SOAP note from transcript"""
     if not transcript or len(transcript) < 20:
+        return "❌ Transcription too short."
     if transcript.startswith("❌"):
+        return transcript
+    # If no HF_TOKEN, use rule-based extraction
     if not HF_TOKEN:
+        return generate_rule_based_note(transcript)
     API_URL = f"https://api-inference.huggingface.co/models/{LLM_MODEL}"
+    HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"}
+    prompt = f"""Convert this medical conversation into a SOAP note:
+{transcript}
+SOAP Note:
+SUBJECTIVE:
+CHIEF COMPLAINT:
+OBJECTIVE:
+EXAM FINDINGS:
+ASSESSMENT:
+DIAGNOSIS:
+PLAN:
+TREATMENT:
+FOLLOW-UP:"""
     payload = {
         "inputs": prompt,
         "parameters": {
             "max_new_tokens": 250,
+            "temperature": 0.3
         }
     }
+    print(f"📤 Generating clinical note...")
+    try:
+        response = requests.post(API_URL, headers=HEADERS, json=payload, timeout=30)
+        if response.status_code == 200:
+            result = response.json()
+            if isinstance(result, list) and len(result) > 0:
+                return result[0].get('generated_text', str(result))
+        else:
+            print(f"⚠️ LLM API failed, using rule-based fallback")
+            return generate_rule_based_note(transcript)
+    except:
+        return generate_rule_based_note(transcript)
 def generate_rule_based_note(transcript):
+    """Extracts clinical info using keywords"""
+    t = transcript.lower()
+    # Extract symptoms
     symptoms = []
+    if "cough" in t: symptoms.append("Cough (2 weeks)")
+    if "fever" in t: symptoms.append("Fever")
+    if "tired" in t or "fatigue" in t: symptoms.append("Fatigue")
+    if "wheez" in t: symptoms.append("Wheezing")
+    if "breath" in t: symptoms.append("Dyspnea on exertion")
+    # Determine diagnosis
+    if "bronchitis" in t:
+        diagnosis = "Acute Bronchitis"
+    elif "pneumonia" in t:
+        diagnosis = "Pneumonia"
+    elif "cough" in t:
+        diagnosis = "Upper Respiratory Infection"
+    else:
+        diagnosis = "Pending Workup"
+    # Extract plan
+    plan = []
+    if "inhaler" in t: plan.append("- Albuterol inhaler as needed")
+    if "antibiotic" in t: plan.append("- Consider antibiotic therapy")
+    plan.append("- Increase fluid intake")
+    plan.append("- Rest")
+    plan.append("- Follow up in 7 days if symptoms persist")
+    return f"""
 SUBJECTIVE:
+Chief Complaint: {symptoms[0] if symptoms else 'Not specified'}
+Associated Symptoms: {', '.join(symptoms[1:]) if len(symptoms) > 1 else 'None'}
 OBJECTIVE:
+Physical Exam: {'Mild wheezing on auscultation' if 'wheez' in t else 'Unremarkable'}
+Vital Signs: Stable, afebrile
 ASSESSMENT:
+Diagnosis: {diagnosis}
+Differential: Viral URI, Allergic rhinitis, Asthma
 PLAN:
+{chr(10).join(plan)}
+⚠️ Generated using rule-based extraction (educational demo)
 """
+# --- Main Pipeline ---
+def process_encounter(audio):
     if audio is None:
+        return "Please upload an audio file", ""
+    print(f"\n🎤 Processing: {os.path.basename(audio)}")
+    # Try AssemblyAI, fall back to placeholder
+    if ASSEMBLYAI_API_KEY:
+        transcript = transcribe_audio_assemblyai(audio)
+    else:
+        transcript = transcribe_audio_placeholder(audio)
+        transcript = "⚠️ No API key - using sample transcript for demonstration\n\n" + transcript
+    # Generate note
     note = generate_clinical_note(transcript)
     return transcript, note
+# --- Gradio UI ---
+demo = gr.Blocks(title="OpenScribe - Clinical AI Demo")
 with demo:
     gr.Markdown("""
     # 🏥 OpenScribe: AI Clinical Documentation
+    **Educational Demo of Viscrow Health Pipeline** | Built by Arafat Anam Chowdhury
+    ✅ **Currently Using:** AssemblyAI (100 hrs free) for transcription + Flan-T5 for summarization
     """)
     with gr.Row():
         with gr.Column(scale=1):
             audio_input = gr.Audio(
                 type="filepath",
+                label="📁 Upload Medical Conversation"
             )
+            run_btn = gr.Button("📋 Generate Clinical Note", variant="primary", size="lg")
+            # Status
+            if ASSEMBLYAI_API_KEY:
+                gr.Markdown("✅ **API:** AssemblyAI Configured")
             else:
+                gr.Markdown("⚠️ **Demo Mode:** Add AssemblyAI key in Secrets for live transcription")
         with gr.Column(scale=2):
             transcript_output = gr.Textbox(
                 label="📝 Transcription",
+                lines=6
             )
             note_output = gr.Textbox(
+                label="📋 SOAP Note",
+                lines=15
             )
     run_btn.click(
+        fn=process_encounter,
         inputs=audio_input,
         outputs=[transcript_output, note_output]
     )
 if __name__ == "__main__":
     demo.launch(theme=gr.themes.Soft())