import gradio as gr
import torch
from transformers import pipeline
import spaces

# =========================================
# LOAD MODEL
# =========================================
# Load on CPU with bfloat16 to save memory during startup
pipe = pipeline(
    "automatic-speech-recognition", 
    model="uzair0/Katib-ASR",
    torch_dtype=torch.bfloat16,
    device="cpu"
)

@spaces.GPU(duration=60)
def transcribe_audio(audio_filepath):
    if audio_filepath is None:
        return "⚠️ Please record some audio first!"
    
    # 1. Move model to GPU
    pipe.model.to("cuda")
    
    # 2. Re-assign the device to the pipeline for this call
    pipe.device = torch.device("cuda")
    
    # 3. Run transcription
    result = pipe(
        audio_filepath, 
        chunk_length_s=30, # Helps with longer recordings
        generate_kwargs={
            "language": "pashto", 
            "task": "transcribe"
        }
    )
    
    # 4. Cleanup: Move back to CPU so ZeroGPU can release the hook
    pipe.model.to("cpu")
    pipe.device = torch.device("cpu")
    
    return result["text"]

# =========================================
# UI DESIGN (Dark Reference Layout)
# =========================================

custom_css = """
.gradio-container { background-color: #0b0f19 !important; border: none !important; }
h2, p { color: white !important; }

/* Transcription box styling */
.transcription-box textarea { 
    direction: rtl !important; 
    text-align: right !important; 
    font-size: 1.2em !important;
    background-color: #161b22 !important;
    color: white !important;
    border: 1px solid #30363d !important;
}

/* Orange Submit Button */
.submit-btn {
    background: #ff5722 !important;
    color: white !important;
    font-weight: bold !important;
    border: none !important;
}

.clear-btn {
    background-color: #21262d !important;
    color: white !important;
    border: 1px solid #30363d !important;
}

/* Make audio player look better in dark mode */
audio { filter: invert(1) hue-rotate(180deg); }
"""

with gr.Blocks(theme=gr.themes.Default(), css=custom_css) as demo:
    with gr.Column():
        gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
        gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")

    with gr.Row():
        with gr.Column(scale=1):
            audio_input = gr.Audio(
                sources=["microphone"], 
                type="filepath", 
                label="Record Pashto"
            )
            with gr.Row():
                clear_btn = gr.Button("Clear", elem_classes="clear-btn")
                submit_btn = gr.Button("Submit", elem_classes="submit-btn")
        
        with gr.Column(scale=1):
            output_text = gr.Textbox(
                label="Katib ASR Transcription", 
                lines=8, 
                elem_classes="transcription-box"
            )

    # Logic
    submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
    clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])

demo.launch(ssr_mode=False)