Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import whisper | |
| # 1. Load the model into memory (Runs once on startup) | |
| # "base" is a good balance of speed and accuracy for a free CPU | |
| print("Loading Whisper model...") | |
| # Change "base" to "small" | |
| model = whisper.load_model("medium") | |
| # 2. Define the inference function | |
| def transcribe_audio(audio_filepath): | |
| print(f"Processing audio: {audio_filepath}") | |
| # Run inference, forcing Arabic and extracting timestamps | |
| result = model.transcribe( | |
| audio_filepath, | |
| language="ar", | |
| word_timestamps=True, | |
| condition_on_previous_text=False # ADD THIS LINE | |
| ) | |
| # Return a clean dictionary | |
| return { | |
| "text": result["text"], | |
| "segments": result["segments"] | |
| } | |
| # 3. Create the API routing interface | |
| demo = gr.Interface( | |
| fn=transcribe_audio, | |
| inputs=gr.Audio(type="filepath", label="Upload Arabic Audio"), | |
| outputs=gr.JSON(label="Transcription Data"), | |
| title="Arabic Speech-to-Text Microservice" | |
| ) | |
| # 4. Launch the server | |
| demo.launch() |