Abid Ali Awan
Add Urdu full stop handling in transcribe function of app.py
549eccc
import os
import warnings
import gradio as gr
import numpy as np
import torch
from transformers import (
AutoModelForSpeechSeq2Seq,
AutoProcessor,
logging,
pipeline,
)
warnings.simplefilter("ignore", FutureWarning)
# β€”β€” CPU performance tweaks β€”β€”
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"
torch.set_num_threads(4)
logging.set_verbosity_error()
# β€”β€” Model setup β€”β€”
model_id = "kingabzpro/whisper-base-urdu-full"
# Load and quantize to int8
model = AutoModelForSpeechSeq2Seq.from_pretrained(
model_id,
use_safetensors=True,
)
model = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
processor = AutoProcessor.from_pretrained(model_id)
# Build a CPU-based pipeline with chunking
transcriber = pipeline(
task="automatic-speech-recognition",
model=model,
tokenizer=processor.tokenizer,
feature_extractor=processor.feature_extractor,
device=-1, # CPU
chunk_length_s=30,
stride_length_s=(5, 5),
)
def transcribe(audio):
if audio is None:
return "No audio provided. Please record or upload an audio file."
sr, y = audio
# mono & normalize
if y.ndim > 1:
y = y.mean(axis=1)
y = y.astype(np.float32)
peak = np.max(np.abs(y))
if peak > 0:
y /= peak
else:
return "Audio appears to be silent. Please try again."
# Inference under no_grad
with torch.no_grad():
result = transcriber({"sampling_rate": sr, "raw": y})
text = result.get("text", "")
# Add Urdu full stop if not present
if text:
text = text.rstrip()
if text.endswith("."):
text = text[:-1] + "Ϋ”"
elif not text.endswith("Ϋ”"):
text = text + "Ϋ”"
return text
# β€”β€” Gradio UI β€”β€”
description = """
<p style='text-align: center'>
Record or upload audio in Urdu and get the transcribed text using the Whisper Base Urdu model.
</p>
"""
examples = [
["samples/audio1.mp3"],
["samples/audio2.mp3"],
["samples/audio3.mp3"],
]
demo = gr.Interface(
fn=transcribe,
inputs=gr.Audio(
sources=["microphone", "upload"],
type="numpy",
label="Record or Upload Audio (Urdu)",
),
outputs=gr.Textbox(
label="Transcribed Text (Urdu)",
placeholder="Transcribed Urdu text will appear here...",
),
title="⚑Fast Urdu Speech Recognition",
description=description,
examples=examples,
allow_flagging="never",
theme=gr.themes.Soft(),
)
if __name__ == "__main__":
demo.launch()