"""
MedASR - Medical Speech Recognition API
Based on Google's MedASR model for medical dictation and transcription.
See: https://developers.google.com/health-ai-developer-foundations/medasr
"""
import gradio as gr
from transformers import pipeline
import librosa
import numpy as np
import tempfile
import os

# Load MedASR model
# Note: This model requires accepting the license at https://huggingface.co/google/medasr
# The Space needs HF_TOKEN secret with access to the model
model_id = "google/medasr"
pipe = pipeline("automatic-speech-recognition", model=model_id)


def transcribe(audio_path):
    """
    Transcribe audio file using MedASR.
    
    MedASR requires: mono-channel audio, 16kHz, int16 waveform
    This function handles resampling if needed.
    
    Args:
        audio_path: Path to audio file (any format supported by librosa)
        
    Returns:
        str: Transcribed text
    """
    if audio_path is None:
        return "Error: No audio file provided"
    
    try:
        # Load and resample audio to 16kHz mono (as required by MedASR)
        speech, sample_rate = librosa.load(audio_path, sr=16000, mono=True)
        
        # Process audio with recommended parameters from docs
        # chunk_length_s: how long in seconds MedASR batches audio
        # stride_length_s: overlap between chunks
        result = pipe(
            {"raw": speech, "sampling_rate": 16000},
            chunk_length_s=20,
            stride_length_s=2
        )
        
        return result['text']
        
    except Exception as e:
        return f"Error during transcription: {str(e)}"


# Create Gradio interface
demo = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(type="filepath", label="Upload Medical Audio"),
    outputs=gr.Textbox(label="Transcription", lines=10),
    title="MedASR - Medical Speech Recognition",
    description="""
    Medical dictation and transcription powered by Google's MedASR model.
    
    **Supported audio formats:** WAV, MP3, FLAC, OGG, WebM
    **Best results with:** Clear speech, medical terminology
    
    Note: Audio is automatically resampled to 16kHz mono for optimal performance.
    """,
    api_name="predict",  # Explicitly naming the endpoint for the API
    examples=[],  # Add example audio files if available
)

# Launch with queue for handling concurrent requests
demo.queue()
demo.launch()