""" MedASR - Medical Speech Recognition API Based on Google's MedASR model for medical dictation and transcription. See: https://developers.google.com/health-ai-developer-foundations/medasr """ import gradio as gr from transformers import pipeline import librosa import numpy as np import tempfile import os # Load MedASR model # Note: This model requires accepting the license at https://huggingface.co/google/medasr # The Space needs HF_TOKEN secret with access to the model model_id = "google/medasr" pipe = pipeline("automatic-speech-recognition", model=model_id) def transcribe(audio_path): """ Transcribe audio file using MedASR. MedASR requires: mono-channel audio, 16kHz, int16 waveform This function handles resampling if needed. Args: audio_path: Path to audio file (any format supported by librosa) Returns: str: Transcribed text """ if audio_path is None: return "Error: No audio file provided" try: # Load and resample audio to 16kHz mono (as required by MedASR) speech, sample_rate = librosa.load(audio_path, sr=16000, mono=True) # Process audio with recommended parameters from docs # chunk_length_s: how long in seconds MedASR batches audio # stride_length_s: overlap between chunks result = pipe( {"raw": speech, "sampling_rate": 16000}, chunk_length_s=20, stride_length_s=2 ) return result['text'] except Exception as e: return f"Error during transcription: {str(e)}" # Create Gradio interface demo = gr.Interface( fn=transcribe, inputs=gr.Audio(type="filepath", label="Upload Medical Audio"), outputs=gr.Textbox(label="Transcription", lines=10), title="MedASR - Medical Speech Recognition", description=""" Medical dictation and transcription powered by Google's MedASR model. **Supported audio formats:** WAV, MP3, FLAC, OGG, WebM **Best results with:** Clear speech, medical terminology Note: Audio is automatically resampled to 16kHz mono for optimal performance. """, api_name="predict", # Explicitly naming the endpoint for the API examples=[], # Add example audio files if available ) # Launch with queue for handling concurrent requests demo.queue() demo.launch()