Spaces:
Runtime error
Runtime error
| from transformers import WhisperProcessor, WhisperFeatureExtractor, WhisperForConditionalGeneration | |
| import gradio as gr | |
| import torchaudio | |
| mdl = "models/amithm3/whisper-medium" | |
| processor = WhisperProcessor.from_pretrained(mdl, task="transcribe") | |
| feature_extractor = WhisperFeatureExtractor.from_pretrained(mdl, task="transcribe") | |
| model = WhisperForConditionalGeneration.from_pretrained(mdl) | |
| sampling_rate = 16000 | |
| def transcribe(audio, language): | |
| audio, orig_freq = torchaudio.load(audio) | |
| audio = torchaudio.functional.resample(audio, orig_freq=orig_freq, new_freq=sampling_rate) | |
| audio = audio.squeeze().numpy() | |
| input_features = processor(audio, sampling_rate=sampling_rate, return_tensors="pt").input_features | |
| model.generation_config.language = language | |
| predicted_ids = model.generate(input_features) | |
| transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)[0] | |
| return transcription | |
| iface = gr.Interface( | |
| fn=transcribe, | |
| inputs=[gr.Audio(type="filepath"), gr.Dropdown(["kannada", "english", None], label="Language", value="kannada")], | |
| outputs="text", | |
| title="Whisper Medium Indic", | |
| description="Realtime demo for Indic speech recognition using a fine-tuned Whisper Medium model.", | |
| ) | |
| iface.launch() | |