File size: 3,141 Bytes
7708e2e 364e491 7708e2e 364e491 7708e2e 364e491 024de1d 364e491 83e303c 364e491 abd1c03 364e491 7708e2e 364e491 83e303c 364e491 abd1c03 83e303c 7708e2e 83e303c abd1c03 7708e2e abd1c03 83e303c abd1c03 7708e2e 364e491 83e303c 364e491 83e303c abd1c03 024de1d 364e491 024de1d 83e303c 024de1d 83e303c 024de1d abd1c03 83e303c 024de1d 83e303c 024de1d abd1c03 024de1d abd1c03 024de1d 83e303c 024de1d 83e303c 364e491 abd1c03 83e303c 364e491 83e303c abd1c03 024de1d 364e491 024de1d 364e491 83e303c 7708e2e 83e303c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | import gradio as gr
import torch
from transformers import pipeline
import spaces
# =========================================
# LOAD MODEL
# =========================================
# Load on CPU with bfloat16 to save memory during startup
pipe = pipeline(
"automatic-speech-recognition",
model="uzair0/Katib-ASR",
torch_dtype=torch.bfloat16,
device="cpu"
)
@spaces.GPU(duration=60)
def transcribe_audio(audio_filepath):
if audio_filepath is None:
return "⚠️ Please record some audio first!"
# 1. Move model to GPU
pipe.model.to("cuda")
# 2. Re-assign the device to the pipeline for this call
pipe.device = torch.device("cuda")
# 3. Run transcription
result = pipe(
audio_filepath,
chunk_length_s=30, # Helps with longer recordings
generate_kwargs={
"language": "pashto",
"task": "transcribe"
}
)
# 4. Cleanup: Move back to CPU so ZeroGPU can release the hook
pipe.model.to("cpu")
pipe.device = torch.device("cpu")
return result["text"]
# =========================================
# UI DESIGN (Dark Reference Layout)
# =========================================
custom_css = """
.gradio-container { background-color: #0b0f19 !important; border: none !important; }
h2, p { color: white !important; }
/* Transcription box styling */
.transcription-box textarea {
direction: rtl !important;
text-align: right !important;
font-size: 1.2em !important;
background-color: #161b22 !important;
color: white !important;
border: 1px solid #30363d !important;
}
/* Orange Submit Button */
.submit-btn {
background: #ff5722 !important;
color: white !important;
font-weight: bold !important;
border: none !important;
}
.clear-btn {
background-color: #21262d !important;
color: white !important;
border: 1px solid #30363d !important;
}
/* Make audio player look better in dark mode */
audio { filter: invert(1) hue-rotate(180deg); }
"""
with gr.Blocks(theme=gr.themes.Default(), css=custom_css) as demo:
with gr.Column():
gr.Markdown("## 🎙️ Katib ASR: Pashto Speech Recognition")
gr.Markdown("Click the Record button below, speak Pashto into your microphone, and see the result!")
with gr.Row():
with gr.Column(scale=1):
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Record Pashto"
)
with gr.Row():
clear_btn = gr.Button("Clear", elem_classes="clear-btn")
submit_btn = gr.Button("Submit", elem_classes="submit-btn")
with gr.Column(scale=1):
output_text = gr.Textbox(
label="Katib ASR Transcription",
lines=8,
elem_classes="transcription-box"
)
# Logic
submit_btn.click(fn=transcribe_audio, inputs=audio_input, outputs=output_text)
clear_btn.click(fn=lambda: [None, ""], inputs=None, outputs=[audio_input, output_text])
demo.launch(ssr_mode=False) |