import gradio as gr from faster_whisper import WhisperModel # Model choice: use smaller models for speed on M1 Pro (try "tiny" or "small") MODEL_SIZE = "small" # compute_type="int8" uses quantized weights for faster CPU inference model = WhisperModel(MODEL_SIZE, device="cpu", compute_type="int8") def transcribe(audio_path): if not audio_path: return "" segments, info = model.transcribe(audio_path, beam_size=1, vad_filter=True) text = "".join([seg.text for seg in segments]) return text iface = gr.Interface( fn=transcribe, inputs=gr.Audio(sources="upload", type="filepath", label="Upload audio file"), outputs=gr.Textbox(label="Transcription"), title="Fast Local Transcription", description=("faster-whisper backend; pick model_size=" + MODEL_SIZE + " for a balance of speed/accuracy."), ) # Expose the Gradio app variable so Spaces can detect API endpoints gradio_app = iface # Enable the request queue so the Space exposes queue-based API endpoints try: gradio_app = gradio_app.queue() except Exception: # older gradio versions may not support queue(); ignore if unavailable pass if __name__ == "__main__": iface.launch(server_name="0.0.0.0", share=False)