import gradio as gr import torch from transformers import pipeline import spaces # ========================================= # LOAD MODEL # ========================================= # Load on CPU with bfloat16 to save memory during startup pipe = pipeline( "automatic-speech-recognition", model="uzair0/Katib-ASR", torch_dtype=torch.bfloat16, device="cpu" ) @spaces.GPU(duration=60) def transcribe_audio(audio_filepath): if audio_filepath is None: return "⚠️ Please record some audio first!" # 1. Move model to GPU pipe.model.to("cuda") # 2. Re-assign the device to the pipeline for this call pipe.device = torch.device("cuda") # 3. Run transcription result = pipe( audio_filepath, chunk_length_s=30, # Helps with longer recordings generate_kwargs={ "language": "pashto", "task": "transcribe" } ) # 4. Cleanup: Move back to CPU so ZeroGPU can release the hook pipe.model.to("cpu") pipe.device = torch.device("cpu") return result["text"] # ========================================= # UI DESIGN (Dark Reference Layout) # ========================================= custom_css = """ .gradio-container { background-color: #0b0f19 !important; border: none !important; } h2, p { color: white !important; } /* Transcription box styling */ .transcription-box textarea { direction: rtl !important; text-align: right !important; font-size: 1.2em !important; background-color: #161b22 !important; color: white !important; border: 1px solid #30363d !important; } /* Orange Submit Button */ .submit-btn { background: #ff5722 !important; color: white !important; font-weight: bold !important; border: none !important; } .clear-btn { background-color: #21262d !important; color: white !important; border: 1px solid #30363d !important; } /* Make audio player look better in dark mode */ audio { filter: invert(1) hue-rotate(180deg); } """ with gr.Blocks(theme=gr.themes.Default(), css=custom_css) as demo: with gr.Column(): gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition") gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!") with gr.Row(): with gr.Column(scale=1): audio_input = gr.Audio( sources=["microphone"], type="filepath", label="Record Pashto" ) with gr.Row(): clear_btn = gr.Button("Clear", elem_classes="clear-btn") submit_btn = gr.Button("Submit", elem_classes="submit-btn") with gr.Column(scale=1): output_text = gr.Textbox( label="Katib ASR Transcription", lines=8, elem_classes="transcription-box" ) # Logic submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text) clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text]) demo.launch(ssr_mode=False)