Spaces:

arafatanam
/

Clinical-Scribe

Sleeping

File size: 10,494 Bytes

88375b9
 
 
bd13cbe
5a6eaa4
c139a8a
1d53ef2
c139a8a
48d78c4
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d53ef2
 
 
 
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d53ef2
bf7a131
 
ac67d71
bf7a131
 
 
 
 
 
 
 
 
 
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
ac67d71
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a6eaa4
bf7a131
5a6eaa4
bf7a131
 
 
1d53ef2
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8a6a40c
bf7a131
 
5a6eaa4
bf7a131
5a6eaa4
bf7a131
8a6a40c
bf7a131
5a6eaa4
bf7a131
 
5a6eaa4
bf7a131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ac67d71
 
bf7a131
 
 
 
 
 
 
 
c139a8a
bf7a131
ac67d71
 
 
1d53ef2
bf7a131
88375b9
bf7a131
 
 
 
 
 
 
1d53ef2
bf7a131
 
1d53ef2
bf7a131
 
c139a8a
bf7a131
 
 
 
 
 
 
 
 
 
8a6a40c
48d78c4
c139a8a
 
48d78c4
c139a8a
 
f51dba5
c139a8a
 
 
 
 
f51dba5
8a6a40c
c139a8a
 
 
bd13cbe
a0f6668
c139a8a
fe6ce59
bd13cbe
c139a8a
 
 
 
 
 
 
 
 
 
 
 
88375b9
c139a8a
f51dba5
8a6a40c
dbf8173
bd13cbe
ac67d71
c139a8a
f51dba5
c139a8a
 
 
 
88375b9
1d53ef2
88375b9
c139a8a
88375b9
5a6eaa4
bf7a131
88375b9
ecbffcb

import gradio as gr
import os
import requests
import time

# --- CONFIGURATION ---
ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")

# SPEECH-TO-TEXT: AssemblyAI Integration
def transcribe_audio_assemblyai(audio_file_path):
    """Uses AssemblyAI's free tier (100 hours free)"""
    if not ASSEMBLYAI_API_KEY:
        return "❌ AssemblyAI API key not set. Add to Secrets."
    
    headers = {"authorization": ASSEMBLYAI_API_KEY}
    
    # Step 1: Upload audio
    print("📤 Uploading to AssemblyAI...")
    
    def read_file(filename):
        with open(filename, "rb") as f:
            while True:
                data = f.read(5242880)  # 5MB chunks
                if not data:
                    break
                yield data
    
    upload_response = requests.post(
        "https://api.assemblyai.com/v2/upload",
        headers=headers,
        data=read_file(audio_file_path)
    )
    
    if upload_response.status_code != 200:
        return f"❌ Upload failed: {upload_response.text}"
    
    audio_url = upload_response.json()["upload_url"]
    print(f"✅ Uploaded: {audio_url}")
    
    # Step 2: Request transcription
    json_data = {
        "audio_url": audio_url,
        "speech_models": ["universal-2"],
        "language_code": "en_us"
    }
    
    transcript_response = requests.post(
        "https://api.assemblyai.com/v2/transcript",
        json=json_data,
        headers=headers
    )
    
    if transcript_response.status_code != 200:
        error_msg = transcript_response.json().get("error", "Unknown error")
        return f"❌ Transcription request failed: {error_msg}"
    
    transcript_id = transcript_response.json()["id"]
    print(f"📝 Transcript ID: {transcript_id}")
    
    # Step 3: Poll for results
    polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
    
    for attempt in range(30):  # Max 30 seconds
        polling_response = requests.get(polling_endpoint, headers=headers)
        polling_data = polling_response.json()
        
        status = polling_data["status"]
        print(f"⏳ Status: {status}")
        
        if status == "completed":
            print("✅ Transcription complete!")
            return polling_data["text"]
        elif status == "error":
            return f"❌ Transcription error: {polling_data.get('error', 'Unknown')}"
        
        time.sleep(1)
    
    return "❌ Transcription timed out after 30 seconds"


def transcribe_audio_placeholder(audio_file_path):
    """Fallback when no API keys are available"""
    return """
Doctor: Hello, what brings you in today?
Patient: I've had a cough for about two weeks. It gets worse at night and I feel really tired.
Doctor: Any fever or shortness of breath?
Patient: No fever, but I get winded climbing stairs.
Doctor: I'm going to listen to your lungs. Take a deep breath. I can hear some mild wheezing on the right side.
Patient: Is it serious?
Doctor: It appears to be acute bronchitis. I'll prescribe an inhaler and recommend rest. Follow up in a week.
Patient: Thank you, doctor.
"""


# CLINICAL NOTE GENERATION: Rule-Based NLP
def generate_rule_based_note(transcript):
    """Extracts clinical info using keyword matching and pattern recognition"""
    t = transcript.lower()
    
    # Extract symptoms
    symptoms = []
    if "cough" in t: 
        if "two week" in t or "2 week" in t:
            symptoms.append("Cough (2 weeks duration)")
        else:
            symptoms.append("Cough")
    if "fever" in t: 
        symptoms.append("Fever")
    if "tired" in t or "fatigue" in t: 
        symptoms.append("Fatigue")
    if "wheez" in t: 
        symptoms.append("Wheezing")
    if "breath" in t or "winded" in t: 
        symptoms.append("Dyspnea on exertion")
    if "night" in t and "cough" in t: 
        symptoms.append("Nocturnal cough")
    if "chest" in t and "pain" in t:
        symptoms.append("Chest pain")
    if "headache" in t:
        symptoms.append("Headache")
    
    # Determine diagnosis
    if "bronchitis" in t:
        diagnosis = "Acute Bronchitis"
        confidence = "High"
    elif "pneumonia" in t:
        diagnosis = "Community-Acquired Pneumonia"
        confidence = "Moderate"
    elif "asthma" in t:
        diagnosis = "Asthma Exacerbation"
        confidence = "Moderate"
    elif "covid" in t or "coronavirus" in t:
        diagnosis = "COVID-19 Infection"
        confidence = "Moderate"
    elif "cough" in t and "wheez" in t:
        diagnosis = "Acute Bronchitis with Reactive Airway Disease"
        confidence = "Moderate"
    elif "cough" in t and len(symptoms) >= 2:
        diagnosis = "Upper Respiratory Infection"
        confidence = "Moderate"
    elif "cough" in t:
        diagnosis = "Acute Cough, Etiology Pending"
        confidence = "Low"
    else:
        diagnosis = "Pending Further Workup"
        confidence = "Low"
    
    # Extract physical exam findings
    exam_findings = []
    if "wheez" in t: 
        exam_findings.append("Mild expiratory wheezing on auscultation")
    if "rhonchi" in t:
        exam_findings.append("Rhonchi noted")
    if "crackle" in t or "rale" in t:
        exam_findings.append("Fine crackles at bases")
    if "lung" in t and "clear" in t: 
        exam_findings.append("Lungs clear bilaterally")
    if not exam_findings: 
        exam_findings.append("Unremarkable")
    
    # Build treatment plan
    plan = []
    if "inhaler" in t or "wheez" in t: 
        plan.append("- Albuterol HFA 90mcg, 2 puffs q4-6h PRN for wheezing")
    if "bronchitis" in t:
        plan.append("- Supportive care (acute bronchitis typically viral, antibiotics not indicated)")
    if "antibiotic" in t:
        plan.append("- Consider antibiotic therapy if bacterial infection suspected")
    if "rest" in t or "tired" in t:
        plan.append("- Recommend rest and increased fluid intake")
    if "cough" in t:
        plan.append("- OTC dextromethorphan or guaifenesin for symptomatic cough relief")
    
    if not plan:
        plan.append("- Symptomatic management")
    
    plan.extend([
        "- Avoid respiratory irritants and smoking",
        "- Follow up in 7 days if symptoms persist or worsen",
        "- Return to clinic sooner if fever develops or shortness of breath increases"
    ])
    
    return f"""
SUBJECTIVE:
Chief Complaint: {symptoms[0] if symptoms else 'Not specified'}
Associated Symptoms: {', '.join(symptoms[1:]) if len(symptoms) > 1 else 'None reported'}
Duration: {'2 weeks' if 'two week' in t or '2 week' in t else 'Not specified'}
Onset: {'Gradual' if 'week' in t else 'Not specified'}
Severity: Moderate
Aggravating Factors: {'Nighttime, exertion' if 'night' in t or 'breath' in t else 'None reported'}

OBJECTIVE:
Physical Exam: {', '.join(exam_findings)}
Vital Signs: Temperature 98.6°F, HR 72, BP 118/76, RR 16, SpO2 97% on room air
General: Alert, in no acute distress, well-appearing

ASSESSMENT:
Primary Diagnosis: {diagnosis}
Clinical Confidence: {confidence}
Differential Diagnoses: 
- Viral Upper Respiratory Infection
- Allergic Rhinitis with Post-nasal Drip
- Asthma Exacerbation
- GERD

PLAN:
{chr(10).join(plan)}

"""


def generate_clinical_note(transcript):
    """Main clinical note generation function"""
    if not transcript or len(transcript) < 20:
        return "❌ Transcription too short. Please provide a longer audio file."
    
    if transcript.startswith("❌"):
        return transcript
    
    # Use rule-based extraction (always works, no API needed)
    return generate_rule_based_note(transcript)


# MAIN PIPELINE
def process_encounter(audio):
    """Main workflow: Audio → Transcription → SOAP Note"""
    if audio is None:
        return "⚠️ Please upload an audio file.", ""
    
    print(f"\n{'='*60}")
    print(f"🎤 Processing: {os.path.basename(audio)}")
    print(f"📁 File size: {os.path.getsize(audio)} bytes")
    
    # Step 1: Transcribe audio
    if ASSEMBLYAI_API_KEY:
        print("🔑 Using AssemblyAI for transcription...")
        transcript = transcribe_audio_assemblyai(audio)
    else:
        print("⚠️ No AssemblyAI key - using sample transcript")
        transcript = "⚠️ DEMO MODE - Add AssemblyAI API key to Secrets for live transcription\n\n"
        transcript += transcribe_audio_placeholder(audio)
    
    print(f"📝 Transcript preview: {transcript[:150]}...")
    
    # Step 2: Generate clinical note
    print("📋 Generating clinical note...")
    note = generate_clinical_note(transcript)
    
    print(f"✅ Complete! Note length: {len(note)} chars")
    print(f"{'='*60}\n")
    
    return transcript, note

# GRADIO USER INTERFACE
demo = gr.Blocks(title="OpenScribe - Clinical AI Demo")

with demo:
    gr.Markdown("""
    # OpenScribe: AI Clinical Documentation
    
    This tool replicates the **exact architecture** used in production for automated clinical documentation:
    1. **Speech-to-Text**: AssemblyAI transcription (100 hours free tier)
    2. **NLP Processing**: Rule-based clinical entity extraction
    3. **Output**: Structured SOAP note ready for EHR integration
    ---
    """)
    
    with gr.Row():
        with gr.Column(scale=1):
            audio_input = gr.Audio(
                type="filepath",
                label="📁 Upload Medical Conversation",
                sources=["upload", "microphone"]
            )
            
            run_btn = gr.Button(
                "📋 Generate Clinical Note", 
                variant="primary", 
                size="lg"
            )
            
            gr.Markdown("""
            **Or record your own conversation:**
            *"Hi, what brings you in? - I've had this cough for two weeks. - Any fever? - No. - Let me listen... I hear wheezing. - It's bronchitis."*
            """)
        
        with gr.Column(scale=2):
            transcript_output = gr.Textbox(
                label="Transcription",
                lines=6,
                placeholder="Transcribed conversation will appear here..."
            )

            note_output = gr.Textbox(
                label="Generated SOAP Note",
                lines=20,
                placeholder="Clinical documentation will appear here..."
            )
    
    run_btn.click(
        fn=process_encounter,
        inputs=audio_input,
        outputs=[transcript_output, note_output]
    )

# LAUNCH
if __name__ == "__main__":
    demo.launch()