import gradio as gr
import whisper

# 1. Load the model into memory (Runs once on startup)
# "base" is a good balance of speed and accuracy for a free CPU
print("Loading Whisper model...")
# Change "base" to "small"
model = whisper.load_model("medium")
# 2. Define the inference function
def transcribe_audio(audio_filepath):
    print(f"Processing audio: {audio_filepath}")
    
    # Run inference, forcing Arabic and extracting timestamps
    result = model.transcribe(
        audio_filepath, 
        language="ar", 
        word_timestamps=True,
        condition_on_previous_text=False  # ADD THIS LINE
    )
    
    # Return a clean dictionary
    return {
        "text": result["text"], 
        "segments": result["segments"]
    }

# 3. Create the API routing interface
demo = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath", label="Upload Arabic Audio"),
    outputs=gr.JSON(label="Transcription Data"),
    title="Arabic Speech-to-Text Microservice"
)

# 4. Launch the server
demo.launch()