Spaces:
Sleeping
Sleeping
File size: 1,031 Bytes
e14606f aca8f07 e14606f aca8f07 e14606f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 | import gradio as gr
import whisper
# 1. Load the model into memory (Runs once on startup)
# "base" is a good balance of speed and accuracy for a free CPU
print("Loading Whisper model...")
# Change "base" to "small"
model = whisper.load_model("medium")
# 2. Define the inference function
def transcribe_audio(audio_filepath):
print(f"Processing audio: {audio_filepath}")
# Run inference, forcing Arabic and extracting timestamps
result = model.transcribe(
audio_filepath,
language="ar",
word_timestamps=True,
condition_on_previous_text=False # ADD THIS LINE
)
# Return a clean dictionary
return {
"text": result["text"],
"segments": result["segments"]
}
# 3. Create the API routing interface
demo = gr.Interface(
fn=transcribe_audio,
inputs=gr.Audio(type="filepath", label="Upload Arabic Audio"),
outputs=gr.JSON(label="Transcription Data"),
title="Arabic Speech-to-Text Microservice"
)
# 4. Launch the server
demo.launch() |