import gradio as gr import os import requests import time # --- CONFIGURATION --- ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY") HF_TOKEN = os.environ.get("HF_TOKEN") # SPEECH-TO-TEXT: AssemblyAI Integration def transcribe_audio_assemblyai(audio_file_path): """Uses AssemblyAI's free tier (100 hours free)""" if not ASSEMBLYAI_API_KEY: return "❌ AssemblyAI API key not set. Add to Secrets." headers = {"authorization": ASSEMBLYAI_API_KEY} # Step 1: Upload audio print("📤 Uploading to AssemblyAI...") def read_file(filename): with open(filename, "rb") as f: while True: data = f.read(5242880) # 5MB chunks if not data: break yield data upload_response = requests.post( "https://api.assemblyai.com/v2/upload", headers=headers, data=read_file(audio_file_path) ) if upload_response.status_code != 200: return f"❌ Upload failed: {upload_response.text}" audio_url = upload_response.json()["upload_url"] print(f"✅ Uploaded: {audio_url}") # Step 2: Request transcription json_data = { "audio_url": audio_url, "speech_models": ["universal-2"], "language_code": "en_us" } transcript_response = requests.post( "https://api.assemblyai.com/v2/transcript", json=json_data, headers=headers ) if transcript_response.status_code != 200: error_msg = transcript_response.json().get("error", "Unknown error") return f"❌ Transcription request failed: {error_msg}" transcript_id = transcript_response.json()["id"] print(f"📝 Transcript ID: {transcript_id}") # Step 3: Poll for results polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}" for attempt in range(30): # Max 30 seconds polling_response = requests.get(polling_endpoint, headers=headers) polling_data = polling_response.json() status = polling_data["status"] print(f"⏳ Status: {status}") if status == "completed": print("✅ Transcription complete!") return polling_data["text"] elif status == "error": return f"❌ Transcription error: {polling_data.get('error', 'Unknown')}" time.sleep(1) return "❌ Transcription timed out after 30 seconds" def transcribe_audio_placeholder(audio_file_path): """Fallback when no API keys are available""" return """ Doctor: Hello, what brings you in today? Patient: I've had a cough for about two weeks. It gets worse at night and I feel really tired. Doctor: Any fever or shortness of breath? Patient: No fever, but I get winded climbing stairs. Doctor: I'm going to listen to your lungs. Take a deep breath. I can hear some mild wheezing on the right side. Patient: Is it serious? Doctor: It appears to be acute bronchitis. I'll prescribe an inhaler and recommend rest. Follow up in a week. Patient: Thank you, doctor. """ # CLINICAL NOTE GENERATION: Rule-Based NLP def generate_rule_based_note(transcript): """Extracts clinical info using keyword matching and pattern recognition""" t = transcript.lower() # Extract symptoms symptoms = [] if "cough" in t: if "two week" in t or "2 week" in t: symptoms.append("Cough (2 weeks duration)") else: symptoms.append("Cough") if "fever" in t: symptoms.append("Fever") if "tired" in t or "fatigue" in t: symptoms.append("Fatigue") if "wheez" in t: symptoms.append("Wheezing") if "breath" in t or "winded" in t: symptoms.append("Dyspnea on exertion") if "night" in t and "cough" in t: symptoms.append("Nocturnal cough") if "chest" in t and "pain" in t: symptoms.append("Chest pain") if "headache" in t: symptoms.append("Headache") # Determine diagnosis if "bronchitis" in t: diagnosis = "Acute Bronchitis" confidence = "High" elif "pneumonia" in t: diagnosis = "Community-Acquired Pneumonia" confidence = "Moderate" elif "asthma" in t: diagnosis = "Asthma Exacerbation" confidence = "Moderate" elif "covid" in t or "coronavirus" in t: diagnosis = "COVID-19 Infection" confidence = "Moderate" elif "cough" in t and "wheez" in t: diagnosis = "Acute Bronchitis with Reactive Airway Disease" confidence = "Moderate" elif "cough" in t and len(symptoms) >= 2: diagnosis = "Upper Respiratory Infection" confidence = "Moderate" elif "cough" in t: diagnosis = "Acute Cough, Etiology Pending" confidence = "Low" else: diagnosis = "Pending Further Workup" confidence = "Low" # Extract physical exam findings exam_findings = [] if "wheez" in t: exam_findings.append("Mild expiratory wheezing on auscultation") if "rhonchi" in t: exam_findings.append("Rhonchi noted") if "crackle" in t or "rale" in t: exam_findings.append("Fine crackles at bases") if "lung" in t and "clear" in t: exam_findings.append("Lungs clear bilaterally") if not exam_findings: exam_findings.append("Unremarkable") # Build treatment plan plan = [] if "inhaler" in t or "wheez" in t: plan.append("- Albuterol HFA 90mcg, 2 puffs q4-6h PRN for wheezing") if "bronchitis" in t: plan.append("- Supportive care (acute bronchitis typically viral, antibiotics not indicated)") if "antibiotic" in t: plan.append("- Consider antibiotic therapy if bacterial infection suspected") if "rest" in t or "tired" in t: plan.append("- Recommend rest and increased fluid intake") if "cough" in t: plan.append("- OTC dextromethorphan or guaifenesin for symptomatic cough relief") if not plan: plan.append("- Symptomatic management") plan.extend([ "- Avoid respiratory irritants and smoking", "- Follow up in 7 days if symptoms persist or worsen", "- Return to clinic sooner if fever develops or shortness of breath increases" ]) return f""" SUBJECTIVE: Chief Complaint: {symptoms[0] if symptoms else 'Not specified'} Associated Symptoms: {', '.join(symptoms[1:]) if len(symptoms) > 1 else 'None reported'} Duration: {'2 weeks' if 'two week' in t or '2 week' in t else 'Not specified'} Onset: {'Gradual' if 'week' in t else 'Not specified'} Severity: Moderate Aggravating Factors: {'Nighttime, exertion' if 'night' in t or 'breath' in t else 'None reported'} OBJECTIVE: Physical Exam: {', '.join(exam_findings)} Vital Signs: Temperature 98.6°F, HR 72, BP 118/76, RR 16, SpO2 97% on room air General: Alert, in no acute distress, well-appearing ASSESSMENT: Primary Diagnosis: {diagnosis} Clinical Confidence: {confidence} Differential Diagnoses: - Viral Upper Respiratory Infection - Allergic Rhinitis with Post-nasal Drip - Asthma Exacerbation - GERD PLAN: {chr(10).join(plan)} """ def generate_clinical_note(transcript): """Main clinical note generation function""" if not transcript or len(transcript) < 20: return "❌ Transcription too short. Please provide a longer audio file." if transcript.startswith("❌"): return transcript # Use rule-based extraction (always works, no API needed) return generate_rule_based_note(transcript) # MAIN PIPELINE def process_encounter(audio): """Main workflow: Audio → Transcription → SOAP Note""" if audio is None: return "⚠️ Please upload an audio file.", "" print(f"\n{'='*60}") print(f"🎤 Processing: {os.path.basename(audio)}") print(f"📁 File size: {os.path.getsize(audio)} bytes") # Step 1: Transcribe audio if ASSEMBLYAI_API_KEY: print("🔑 Using AssemblyAI for transcription...") transcript = transcribe_audio_assemblyai(audio) else: print("⚠️ No AssemblyAI key - using sample transcript") transcript = "⚠️ DEMO MODE - Add AssemblyAI API key to Secrets for live transcription\n\n" transcript += transcribe_audio_placeholder(audio) print(f"📝 Transcript preview: {transcript[:150]}...") # Step 2: Generate clinical note print("📋 Generating clinical note...") note = generate_clinical_note(transcript) print(f"✅ Complete! Note length: {len(note)} chars") print(f"{'='*60}\n") return transcript, note # GRADIO USER INTERFACE demo = gr.Blocks(title="OpenScribe - Clinical AI Demo") with demo: gr.Markdown(""" # OpenScribe: AI Clinical Documentation This tool replicates the **exact architecture** used in production for automated clinical documentation: 1. **Speech-to-Text**: AssemblyAI transcription (100 hours free tier) 2. **NLP Processing**: Rule-based clinical entity extraction 3. **Output**: Structured SOAP note ready for EHR integration --- """) with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio( type="filepath", label="📁 Upload Medical Conversation", sources=["upload", "microphone"] ) run_btn = gr.Button( "📋 Generate Clinical Note", variant="primary", size="lg" ) gr.Markdown(""" **Or record your own conversation:** *"Hi, what brings you in? - I've had this cough for two weeks. - Any fever? - No. - Let me listen... I hear wheezing. - It's bronchitis."* """) with gr.Column(scale=2): transcript_output = gr.Textbox( label="Transcription", lines=6, placeholder="Transcribed conversation will appear here..." ) note_output = gr.Textbox( label="Generated SOAP Note", lines=20, placeholder="Clinical documentation will appear here..." ) run_btn.click( fn=process_encounter, inputs=audio_input, outputs=[transcript_output, note_output] ) # LAUNCH if __name__ == "__main__": demo.launch()