Katib-ASR / app.py
junaid008's picture
Update app.py
83e303c verified
import gradio as gr
import torch
from transformers import pipeline
import spaces
# =========================================
# LOAD MODEL
# =========================================
# Load on CPU with bfloat16 to save memory during startup
pipe = pipeline(
"automatic-speech-recognition",
model="uzair0/Katib-ASR",
torch_dtype=torch.bfloat16,
device="cpu"
)
@spaces.GPU(duration=60)
def transcribe_audio(audio_filepath):
if audio_filepath is None:
return "⚠️ Please record some audio first!"
# 1. Move model to GPU
pipe.model.to("cuda")
# 2. Re-assign the device to the pipeline for this call
pipe.device = torch.device("cuda")
# 3. Run transcription
result = pipe(
audio_filepath,
chunk_length_s=30, # Helps with longer recordings
generate_kwargs={
"language": "pashto",
"task": "transcribe"
}
)
# 4. Cleanup: Move back to CPU so ZeroGPU can release the hook
pipe.model.to("cpu")
pipe.device = torch.device("cpu")
return result["text"]
# =========================================
# UI DESIGN (Dark Reference Layout)
# =========================================
custom_css = """
.gradio-container { background-color: #0b0f19 !important; border: none !important; }
h2, p { color: white !important; }
/* Transcription box styling */
.transcription-box textarea {
direction: rtl !important;
text-align: right !important;
font-size: 1.2em !important;
background-color: #161b22 !important;
color: white !important;
border: 1px solid #30363d !important;
}
/* Orange Submit Button */
.submit-btn {
background: #ff5722 !important;
color: white !important;
font-weight: bold !important;
border: none !important;
}
.clear-btn {
background-color: #21262d !important;
color: white !important;
border: 1px solid #30363d !important;
}
/* Make audio player look better in dark mode */
audio { filter: invert(1) hue-rotate(180deg); }
"""
with gr.Blocks(theme=gr.themes.Default(), css=custom_css) as demo:
with gr.Column():
gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Record Pashto"
)
with gr.Row():
clear_btn = gr.Button("Clear", elem_classes="clear-btn")
submit_btn = gr.Button("Submit", elem_classes="submit-btn")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="Katib ASR Transcription",
lines=8,
elem_classes="transcription-box"
)
# Logic
submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])
demo.launch(ssr_mode=False)