import gradio as gr import whisper # 1. Load the model into memory (Runs once on startup) # "base" is a good balance of speed and accuracy for a free CPU print("Loading Whisper model...") # Change "base" to "small" model = whisper.load_model("medium") # 2. Define the inference function def transcribe_audio(audio_filepath): print(f"Processing audio: {audio_filepath}") # Run inference, forcing Arabic and extracting timestamps result = model.transcribe( audio_filepath, language="ar", word_timestamps=True, condition_on_previous_text=False # ADD THIS LINE ) # Return a clean dictionary return { "text": result["text"], "segments": result["segments"] } # 3. Create the API routing interface demo = gr.Interface( fn=transcribe_audio, inputs=gr.Audio(type="filepath", label="Upload Arabic Audio"), outputs=gr.JSON(label="Transcription Data"), title="Arabic Speech-to-Text Microservice" ) # 4. Launch the server demo.launch()