Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torchaudio | |
| from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC, MarianMTModel, MarianTokenizer | |
| # Load the translation model and tokenizer | |
| translation_model_name = "Helsinki-NLP/opus-mt-en-ml" | |
| translation_tokenizer = MarianTokenizer.from_pretrained(translation_model_name) | |
| translation_model = MarianMTModel.from_pretrained(translation_model_name) | |
| # Load the speech recognition model and tokenizer | |
| asr_model_name = "facebook/wav2vec2-large-960h" | |
| asr_processor = Wav2Vec2Processor.from_pretrained(asr_model_name) | |
| asr_model = Wav2Vec2ForCTC.from_pretrained(asr_model_name) | |
| # Translation function | |
| def translate_text(text): | |
| inputs = translation_tokenizer(text, return_tensors="pt", padding=True) | |
| outputs = translation_model.generate(**inputs, max_length=128, num_beams=4, early_stopping=True) | |
| translated_text = translation_tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return translated_text | |
| # Speech-to-text function | |
| def speech_to_text(audio_path): | |
| speech, rate = torchaudio.load(audio_path) | |
| input_values = asr_processor(speech.squeeze(), sampling_rate=rate, return_tensors="pt").input_values | |
| logits = asr_model(input_values).logits | |
| predicted_ids = logits.argmax(dim=-1) | |
| transcription = asr_processor.batch_decode(predicted_ids)[0] | |
| return transcription | |
| # Combined function for Gradio interface | |
| def translate_speech(audio_path): | |
| text = speech_to_text(audio_path) | |
| translation = translate_text(text) | |
| return translation | |
| # Gradio interface | |
| iface = gr.Interface( | |
| fn=lambda text, audio_path: (translate_text(text), translate_speech(audio_path) if audio_path else None), | |
| inputs=[gr.Textbox(label="Input English Text"), gr.Audio(type="filepath")], | |
| outputs=[gr.Textbox(label="Translated Malayalam Text (from Text)"), gr.Textbox(label="Translated Malayalam Text (from Speech)")], | |
| title="English to Malayalam Translator", | |
| description="Translate English text or speech to Malayalam. Either enter text or speak into the microphone." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |