Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
| import torchaudio | |
| def translate(audio): | |
| model_id_asr = "openai/whisper-small" | |
| processor_asr = WhisperProcessor.from_pretrained(model_id_asr) | |
| model_asr = WhisperForConditionalGeneration.from_pretrained(model_id_asr) | |
| forced_decoder_ids = processor_asr.get_decoder_prompt_ids(language="tamil", task="translate") | |
| input_features = processor_asr(audio["audio"]["array"], sampling_rate=audio["audio"]["sampling_rate"], return_tensors="pt").input_features | |
| predicted_ids = model_asr.generate(input_features,forced_decoder_ids=forced_decoder_ids) | |
| transcription = processor_asr.batch_decode(predicted_ids, skip_special_tokens=True) | |
| return transcription[0] | |
| def speech_to_speech_translation(audio_filepath): | |
| waveform, sampling_rate = torchaudio.load(audio_filepath) | |
| if sampling_rate != 16000: | |
| resampler = torchaudio.transforms.Resample(orig_freq=sampling_rate, new_freq=16000) | |
| waveform = resampler(waveform) | |
| sampling_rate = 16000 | |
| audio_dict = { | |
| "audio": { | |
| "array": waveform.numpy(), | |
| "sampling_rate": sampling_rate | |
| } | |
| } | |
| translated_text = translate(audio_dict) | |
| return translated_text | |
| title = "Tamil Translator / Whisper AI" | |
| description = """ | |
| This is just a simple gradio app that can record spoken tamil audio and translates it into english language | |
| """ | |
| demo = gr.Blocks() | |
| mic_translate = gr.Interface( | |
| fn=speech_to_speech_translation, | |
| inputs=gr.Audio(source="microphone", type="filepath"), | |
| outputs=gr.Textbox(label="Translation"),allow_flagging="never",title=title, | |
| description=description) | |
| with demo: | |
| gr.TabbedInterface([mic_translate], [" "]) | |
| demo.launch(debug=True, share=False) |