Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import whisper | |
| from transformers import pipeline | |
| import speech_recognition as sr | |
| from gtts import gTTS | |
| import tempfile | |
| import os | |
| import base64 | |
| # Load Whisper model for Speech-to-Text | |
| def load_whisper_model(): | |
| return whisper.load_model("base") # You can use other models like "large" for better accuracy | |
| # Load translation model from Hugging Face | |
| def load_translation_model(input_lang, output_lang): | |
| model_name = f"Helsinki-NLP/opus-mt-{input_lang}-{output_lang}" | |
| return pipeline("translation", model=model_name) | |
| # Initialize speech recognition | |
| recognizer = sr.Recognizer() | |
| def speech_to_text(audio_file): | |
| with sr.AudioFile(audio_file) as source: | |
| audio_data = recognizer.record(source) | |
| return recognizer.recognize_google(audio_data) | |
| def translate_text(input_text, translation_pipeline): | |
| return translation_pipeline(input_text)[0]["translation_text"] | |
| def text_to_speech(text, language): | |
| tts = gTTS(text=text, lang=language) | |
| temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") | |
| tts.save(temp_file.name) | |
| return temp_file.name | |
| # Streamlit UI | |
| st.title("Voice-to-Voice Translator") | |
| # Sidebar for user input | |
| st.sidebar.header("Settings") | |
| input_lang = st.sidebar.selectbox("Select Input Language", ["en", "fr", "es", "de", "hi"]) | |
| output_lang = st.sidebar.selectbox("Select Output Language", ["en", "fr", "es", "de", "hi"]) | |
| # Initialize models | |
| whisper_model = load_whisper_model() | |
| translation_pipeline = load_translation_model(input_lang, output_lang) | |
| # Input options | |
| st.header("Input your voice or text") | |
| # Option to upload an audio file | |
| audio_file = st.file_uploader("Upload an audio file (WAV format)", type=["wav"]) | |
| if audio_file is not None: | |
| # Convert speech to text | |
| input_text = speech_to_text(audio_file) | |
| st.write(f"Recognized Text: {input_text}") | |
| # Translate the text | |
| translated_text = translate_text(input_text, translation_pipeline) | |
| st.success(f"Translated Text: {translated_text}") | |
| # Convert translated text to speech | |
| output_audio = text_to_speech(translated_text, output_lang) | |
| audio_bytes = open(output_audio, "rb").read() | |
| # Play the audio | |
| st.audio(audio_bytes, format="audio/mp3") | |
| # Provide download link for audio | |
| b64 = base64.b64encode(audio_bytes).decode() | |
| href = f'<a href="data:audio/mp3;base64,{b64}" download="translated_audio.mp3">Download Translated Audio</a>' | |
| st.markdown(href, unsafe_allow_html=True) | |
| else: | |
| st.write("Please upload an audio file to get started.") | |