import streamlit as st import whisper from transformers import pipeline import speech_recognition as sr from gtts import gTTS import tempfile import os import base64 # Load Whisper model for Speech-to-Text @st.cache_resource def load_whisper_model(): return whisper.load_model("base") # You can use other models like "large" for better accuracy # Load translation model from Hugging Face @st.cache_resource def load_translation_model(input_lang, output_lang): model_name = f"Helsinki-NLP/opus-mt-{input_lang}-{output_lang}" return pipeline("translation", model=model_name) # Initialize speech recognition recognizer = sr.Recognizer() def speech_to_text(audio_file): with sr.AudioFile(audio_file) as source: audio_data = recognizer.record(source) return recognizer.recognize_google(audio_data) def translate_text(input_text, translation_pipeline): return translation_pipeline(input_text)[0]["translation_text"] def text_to_speech(text, language): tts = gTTS(text=text, lang=language) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") tts.save(temp_file.name) return temp_file.name # Streamlit UI st.title("Voice-to-Voice Translator") # Sidebar for user input st.sidebar.header("Settings") input_lang = st.sidebar.selectbox("Select Input Language", ["en", "fr", "es", "de", "hi"]) output_lang = st.sidebar.selectbox("Select Output Language", ["en", "fr", "es", "de", "hi"]) # Initialize models whisper_model = load_whisper_model() translation_pipeline = load_translation_model(input_lang, output_lang) # Input options st.header("Input your voice or text") # Option to upload an audio file audio_file = st.file_uploader("Upload an audio file (WAV format)", type=["wav"]) if audio_file is not None: # Convert speech to text input_text = speech_to_text(audio_file) st.write(f"Recognized Text: {input_text}") # Translate the text translated_text = translate_text(input_text, translation_pipeline) st.success(f"Translated Text: {translated_text}") # Convert translated text to speech output_audio = text_to_speech(translated_text, output_lang) audio_bytes = open(output_audio, "rb").read() # Play the audio st.audio(audio_bytes, format="audio/mp3") # Provide download link for audio b64 = base64.b64encode(audio_bytes).decode() href = f'Download Translated Audio' st.markdown(href, unsafe_allow_html=True) else: st.write("Please upload an audio file to get started.")