Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| import librosa | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| # ========================= | |
| # CONFIG | |
| # ========================= | |
| MODEL_ID = "afaqalinagra/PASHTO-ASR-MODEL" | |
| DEVICE = "cpu" | |
| TARGET_SR = 16000 | |
| # ========================= | |
| # LOAD MODEL | |
| # ========================= | |
| processor = WhisperProcessor.from_pretrained( | |
| MODEL_ID, | |
| language="pashto", | |
| task="transcribe" | |
| ) | |
| model = WhisperForConditionalGeneration.from_pretrained( | |
| MODEL_ID | |
| ).to(DEVICE) | |
| model.eval() | |
| # ========================= | |
| # SAFE AUDIO HANDLER | |
| # ========================= | |
| def transcribe_audio(audio): | |
| if audio is None: | |
| return "No audio provided." | |
| # ---- HANDLE BOTH GRADIO FORMATS ---- | |
| if isinstance(audio, dict): | |
| waveform = audio.get("data", None) | |
| sample_rate = audio.get("sampling_rate", None) | |
| else: | |
| sample_rate, waveform = audio | |
| if waveform is None or sample_rate is None: | |
| return "Invalid audio input." | |
| if len(waveform) == 0: | |
| return "Empty audio." | |
| # Convert stereo โ mono | |
| if waveform.ndim > 1: | |
| waveform = np.mean(waveform, axis=1) | |
| # Resample to 16kHz | |
| if sample_rate != TARGET_SR: | |
| waveform = librosa.resample( | |
| waveform.astype(np.float32), | |
| orig_sr=sample_rate, | |
| target_sr=TARGET_SR | |
| ) | |
| inputs = processor( | |
| waveform, | |
| sampling_rate=TARGET_SR, | |
| return_tensors="pt" | |
| ) | |
| with torch.no_grad(): | |
| predicted_ids = model.generate( | |
| inputs.input_features.to(DEVICE), | |
| max_length=448 | |
| ) | |
| transcription = processor.batch_decode( | |
| predicted_ids, | |
| skip_special_tokens=True | |
| )[0] | |
| return transcription.strip() if transcription else "No speech detected." | |
| # ========================= | |
| # GLASSMORPHISM CSS | |
| # ========================= | |
| CUSTOM_CSS = """ | |
| body { | |
| background: linear-gradient(135deg, #0f2027, #203a43, #2c5364); | |
| font-family: Inter, sans-serif; | |
| } | |
| .gradio-container { | |
| max-width: 1100px !important; | |
| margin: auto; | |
| } | |
| .glass { | |
| background: rgba(255, 255, 255, 0.12); | |
| backdrop-filter: blur(18px); | |
| -webkit-backdrop-filter: blur(18px); | |
| border-radius: 18px; | |
| border: 1px solid rgba(255, 255, 255, 0.25); | |
| box-shadow: 0 8px 32px rgba(0, 0, 0, 0.35); | |
| padding: 24px; | |
| } | |
| h1, h3, p { | |
| color: white !important; | |
| text-align: center; | |
| } | |
| button { | |
| background: linear-gradient(135deg, #ff8008, #ffc837) !important; | |
| color: black !important; | |
| font-weight: 600 !important; | |
| border-radius: 10px !important; | |
| } | |
| textarea { | |
| font-size: 16px !important; | |
| } | |
| """ | |
| # ========================= | |
| # UI | |
| # ========================= | |
| with gr.Blocks(css=CUSTOM_CSS) as demo: | |
| with gr.Column(elem_classes="glass"): | |
| gr.Markdown( | |
| """ | |
| # ๐๏ธ Pashto Speech-to-Text | |
| ### Powered by Whisper ASR | |
| Upload or record Pashto audio and get accurate transcription. | |
| """ | |
| ) | |
| audio_input = gr.Audio( | |
| sources=["upload", "microphone"], | |
| type="numpy", | |
| label="Upload or Record Pashto Audio" | |
| ) | |
| transcribe_btn = gr.Button("Transcribe") | |
| output_text = gr.Textbox( | |
| label="Transcription Output", | |
| lines=6, | |
| placeholder="Pashto transcription will appear here..." | |
| ) | |
| transcribe_btn.click( | |
| fn=transcribe_audio, | |
| inputs=audio_input, | |
| outputs=output_text | |
| ) | |
| gr.Markdown( | |
| """ | |
| <hr> | |
| <p> | |
| Developed for low-resource Pashto ASR using Whisper fine-tuning.<br> | |
| Runs entirely on Hugging Face free infrastructure. | |
| </p> | |
| """ | |
| ) | |
| # ========================= | |
| # LAUNCH | |
| # ========================= | |
| if __name__ == "__main__": | |
| demo.launch() | |