import gradio as gr
from faster_whisper import WhisperModel

# Model choice: use smaller models for speed on M1 Pro (try "tiny" or "small")
MODEL_SIZE = "small"

# compute_type="int8" uses quantized weights for faster CPU inference
model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8")

def transcribe(audio_path):
    if not audio_path:
        return ""
    segments, info = model.transcribe(audio_path, beam_size=1, vad_filter=True)
    text = "".join([seg.text for seg in segments])
    return text

iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources="upload", type="filepath", label="Upload audio file"),
    outputs=gr.Textbox(label="Transcription"),
    title="Fast Local Transcription",
    description=("faster-whisper backend; pick model_size=" + MODEL_SIZE +
                 " for a balance of speed/accuracy."),
)

# Expose the Gradio app variable so Spaces can detect API endpoints
gradio_app = iface

# Enable the request queue so the Space exposes queue-based API endpoints
try:
    gradio_app = gradio_app.queue()
except Exception:
    # older gradio versions may not support queue(); ignore if unavailable
    pass

if __name__ == "__main__":
    iface.launch(server_name="0.0.0.0", share=False)