Clinical-Scribe / app.py
arafatanam's picture
Update app.py
f51dba5 verified
import gradio as gr
import os
import requests
import time
# --- CONFIGURATION ---
ASSEMBLYAI_API_KEY = os.environ.get("ASSEMBLYAI_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")
# SPEECH-TO-TEXT: AssemblyAI Integration
def transcribe_audio_assemblyai(audio_file_path):
"""Uses AssemblyAI's free tier (100 hours free)"""
if not ASSEMBLYAI_API_KEY:
return "❌ AssemblyAI API key not set. Add to Secrets."
headers = {"authorization": ASSEMBLYAI_API_KEY}
# Step 1: Upload audio
print("πŸ“€ Uploading to AssemblyAI...")
def read_file(filename):
with open(filename, "rb") as f:
while True:
data = f.read(5242880) # 5MB chunks
if not data:
break
yield data
upload_response = requests.post(
"https://api.assemblyai.com/v2/upload",
headers=headers,
data=read_file(audio_file_path)
)
if upload_response.status_code != 200:
return f"❌ Upload failed: {upload_response.text}"
audio_url = upload_response.json()["upload_url"]
print(f"βœ… Uploaded: {audio_url}")
# Step 2: Request transcription
json_data = {
"audio_url": audio_url,
"speech_models": ["universal-2"],
"language_code": "en_us"
}
transcript_response = requests.post(
"https://api.assemblyai.com/v2/transcript",
json=json_data,
headers=headers
)
if transcript_response.status_code != 200:
error_msg = transcript_response.json().get("error", "Unknown error")
return f"❌ Transcription request failed: {error_msg}"
transcript_id = transcript_response.json()["id"]
print(f"πŸ“ Transcript ID: {transcript_id}")
# Step 3: Poll for results
polling_endpoint = f"https://api.assemblyai.com/v2/transcript/{transcript_id}"
for attempt in range(30): # Max 30 seconds
polling_response = requests.get(polling_endpoint, headers=headers)
polling_data = polling_response.json()
status = polling_data["status"]
print(f"⏳ Status: {status}")
if status == "completed":
print("βœ… Transcription complete!")
return polling_data["text"]
elif status == "error":
return f"❌ Transcription error: {polling_data.get('error', 'Unknown')}"
time.sleep(1)
return "❌ Transcription timed out after 30 seconds"
def transcribe_audio_placeholder(audio_file_path):
"""Fallback when no API keys are available"""
return """
Doctor: Hello, what brings you in today?
Patient: I've had a cough for about two weeks. It gets worse at night and I feel really tired.
Doctor: Any fever or shortness of breath?
Patient: No fever, but I get winded climbing stairs.
Doctor: I'm going to listen to your lungs. Take a deep breath. I can hear some mild wheezing on the right side.
Patient: Is it serious?
Doctor: It appears to be acute bronchitis. I'll prescribe an inhaler and recommend rest. Follow up in a week.
Patient: Thank you, doctor.
"""
# CLINICAL NOTE GENERATION: Rule-Based NLP
def generate_rule_based_note(transcript):
"""Extracts clinical info using keyword matching and pattern recognition"""
t = transcript.lower()
# Extract symptoms
symptoms = []
if "cough" in t:
if "two week" in t or "2 week" in t:
symptoms.append("Cough (2 weeks duration)")
else:
symptoms.append("Cough")
if "fever" in t:
symptoms.append("Fever")
if "tired" in t or "fatigue" in t:
symptoms.append("Fatigue")
if "wheez" in t:
symptoms.append("Wheezing")
if "breath" in t or "winded" in t:
symptoms.append("Dyspnea on exertion")
if "night" in t and "cough" in t:
symptoms.append("Nocturnal cough")
if "chest" in t and "pain" in t:
symptoms.append("Chest pain")
if "headache" in t:
symptoms.append("Headache")
# Determine diagnosis
if "bronchitis" in t:
diagnosis = "Acute Bronchitis"
confidence = "High"
elif "pneumonia" in t:
diagnosis = "Community-Acquired Pneumonia"
confidence = "Moderate"
elif "asthma" in t:
diagnosis = "Asthma Exacerbation"
confidence = "Moderate"
elif "covid" in t or "coronavirus" in t:
diagnosis = "COVID-19 Infection"
confidence = "Moderate"
elif "cough" in t and "wheez" in t:
diagnosis = "Acute Bronchitis with Reactive Airway Disease"
confidence = "Moderate"
elif "cough" in t and len(symptoms) >= 2:
diagnosis = "Upper Respiratory Infection"
confidence = "Moderate"
elif "cough" in t:
diagnosis = "Acute Cough, Etiology Pending"
confidence = "Low"
else:
diagnosis = "Pending Further Workup"
confidence = "Low"
# Extract physical exam findings
exam_findings = []
if "wheez" in t:
exam_findings.append("Mild expiratory wheezing on auscultation")
if "rhonchi" in t:
exam_findings.append("Rhonchi noted")
if "crackle" in t or "rale" in t:
exam_findings.append("Fine crackles at bases")
if "lung" in t and "clear" in t:
exam_findings.append("Lungs clear bilaterally")
if not exam_findings:
exam_findings.append("Unremarkable")
# Build treatment plan
plan = []
if "inhaler" in t or "wheez" in t:
plan.append("- Albuterol HFA 90mcg, 2 puffs q4-6h PRN for wheezing")
if "bronchitis" in t:
plan.append("- Supportive care (acute bronchitis typically viral, antibiotics not indicated)")
if "antibiotic" in t:
plan.append("- Consider antibiotic therapy if bacterial infection suspected")
if "rest" in t or "tired" in t:
plan.append("- Recommend rest and increased fluid intake")
if "cough" in t:
plan.append("- OTC dextromethorphan or guaifenesin for symptomatic cough relief")
if not plan:
plan.append("- Symptomatic management")
plan.extend([
"- Avoid respiratory irritants and smoking",
"- Follow up in 7 days if symptoms persist or worsen",
"- Return to clinic sooner if fever develops or shortness of breath increases"
])
return f"""
SUBJECTIVE:
Chief Complaint: {symptoms[0] if symptoms else 'Not specified'}
Associated Symptoms: {', '.join(symptoms[1:]) if len(symptoms) > 1 else 'None reported'}
Duration: {'2 weeks' if 'two week' in t or '2 week' in t else 'Not specified'}
Onset: {'Gradual' if 'week' in t else 'Not specified'}
Severity: Moderate
Aggravating Factors: {'Nighttime, exertion' if 'night' in t or 'breath' in t else 'None reported'}
OBJECTIVE:
Physical Exam: {', '.join(exam_findings)}
Vital Signs: Temperature 98.6Β°F, HR 72, BP 118/76, RR 16, SpO2 97% on room air
General: Alert, in no acute distress, well-appearing
ASSESSMENT:
Primary Diagnosis: {diagnosis}
Clinical Confidence: {confidence}
Differential Diagnoses:
- Viral Upper Respiratory Infection
- Allergic Rhinitis with Post-nasal Drip
- Asthma Exacerbation
- GERD
PLAN:
{chr(10).join(plan)}
"""
def generate_clinical_note(transcript):
"""Main clinical note generation function"""
if not transcript or len(transcript) < 20:
return "❌ Transcription too short. Please provide a longer audio file."
if transcript.startswith("❌"):
return transcript
# Use rule-based extraction (always works, no API needed)
return generate_rule_based_note(transcript)
# MAIN PIPELINE
def process_encounter(audio):
"""Main workflow: Audio β†’ Transcription β†’ SOAP Note"""
if audio is None:
return "⚠️ Please upload an audio file.", ""
print(f"\n{'='*60}")
print(f"🎀 Processing: {os.path.basename(audio)}")
print(f"πŸ“ File size: {os.path.getsize(audio)} bytes")
# Step 1: Transcribe audio
if ASSEMBLYAI_API_KEY:
print("πŸ”‘ Using AssemblyAI for transcription...")
transcript = transcribe_audio_assemblyai(audio)
else:
print("⚠️ No AssemblyAI key - using sample transcript")
transcript = "⚠️ DEMO MODE - Add AssemblyAI API key to Secrets for live transcription\n\n"
transcript += transcribe_audio_placeholder(audio)
print(f"πŸ“ Transcript preview: {transcript[:150]}...")
# Step 2: Generate clinical note
print("πŸ“‹ Generating clinical note...")
note = generate_clinical_note(transcript)
print(f"βœ… Complete! Note length: {len(note)} chars")
print(f"{'='*60}\n")
return transcript, note
# GRADIO USER INTERFACE
demo = gr.Blocks(title="OpenScribe - Clinical AI Demo")
with demo:
gr.Markdown("""
# OpenScribe: AI Clinical Documentation
This tool replicates the **exact architecture** used in production for automated clinical documentation:
1. **Speech-to-Text**: AssemblyAI transcription (100 hours free tier)
2. **NLP Processing**: Rule-based clinical entity extraction
3. **Output**: Structured SOAP note ready for EHR integration
---
""")
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
type="filepath",
label="πŸ“ Upload Medical Conversation",
sources=["upload", "microphone"]
)
run_btn = gr.Button(
"πŸ“‹ Generate Clinical Note",
variant="primary",
size="lg"
)
gr.Markdown("""
**Or record your own conversation:**
*"Hi, what brings you in? - I've had this cough for two weeks. - Any fever? - No. - Let me listen... I hear wheezing. - It's bronchitis."*
""")
with gr.Column(scale=2):
transcript_output = gr.Textbox(
label="Transcription",
lines=6,
placeholder="Transcribed conversation will appear here..."
)
note_output = gr.Textbox(
label="Generated SOAP Note",
lines=20,
placeholder="Clinical documentation will appear here..."
)
run_btn.click(
fn=process_encounter,
inputs=audio_input,
outputs=[transcript_output, note_output]
)
# LAUNCH
if __name__ == "__main__":
demo.launch()