Spaces:
No application file
No application file
| import streamlit as st | |
| import librosa | |
| import tempfile | |
| from transformers import pipeline | |
| from TTS.api import TTS | |
| from ui import render_header, render_sidebar ,render_status | |
| st.set_page_config(page_title="Voice Clone Translator", layout="wide") | |
| render_header() | |
| render_sidebar() | |
| render_status() | |
| st.title("ποΈ Voice Cloning Translator (English β Hindi / French / Japanese)") | |
| # -------- Load models -------- | |
| def load_asr(): | |
| return pipeline( | |
| "automatic-speech-recognition", | |
| model="openai/whisper-small", | |
| device=-1 | |
| ) | |
| def load_translator(model_name, target_lang): | |
| if model_name.startswith("facebook/m2m100"): | |
| return pipeline( | |
| "translation", | |
| model=model_name, | |
| src_lang="en", | |
| tgt_lang=target_lang, | |
| device=-1 | |
| ) | |
| else: | |
| return pipeline( | |
| "translation", | |
| model=model_name, | |
| device=-1 | |
| ) | |
| def load_xtts(): | |
| return TTS( | |
| "tts_models/multilingual/multi-dataset/xtts_v2", | |
| gpu=False | |
| ) | |
| asr = load_asr() | |
| xtts = load_xtts() | |
| # -------- Language config -------- | |
| LANGS = { | |
| "Hindi": { | |
| "translator": "Helsinki-NLP/opus-mt-en-hi", | |
| "code": "hi", | |
| "file": "hindi_my_voice.wav" | |
| }, | |
| "French": { | |
| "translator": "Helsinki-NLP/opus-mt-en-fr", | |
| "code": "fr", | |
| "file": "french_my_voice.wav" | |
| }, | |
| "Japanese": { | |
| "translator": "facebook/m2m100_418M", | |
| "code": "ja", | |
| "file": "japanese_my_voice.wav" | |
| } | |
| } | |
| # -------- UI -------- | |
| target_lang = st.selectbox("Select Target Language", list(LANGS.keys())) | |
| uploaded = st.file_uploader("Upload English voice (WAV)", type=["wav"]) | |
| text_input = st.text_area("Or type English text") | |
| convert = st.button("Convert to Voice") | |
| tab1, tab2, tab3 = st.tabs(["π Text", "π Translation", "π Voice"]) | |
| # -------- Processing -------- | |
| if convert: | |
| if not uploaded and not text_input.strip(): | |
| st.warning("Upload audio or type text.") | |
| else: | |
| with st.spinner("Processing (CPU β slow but working)..."): | |
| # -------- Handle uploaded audio -------- | |
| # -------- Get English text -------- | |
| if uploaded: | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp: | |
| tmp.write(uploaded.read()) | |
| speaker_path = tmp.name | |
| audio, sr = librosa.load(speaker_path, sr=16000) | |
| english_text = asr(audio)["text"] | |
| elif text_input.strip(): | |
| st.warning("β οΈ Upload a voice sample to clone your voice.") | |
| st.stop() | |
| else: | |
| st.warning("Provide text or upload audio.") | |
| st.stop() | |
| with tab1: | |
| st.subheader("Recognized English") | |
| st.write(english_text) | |
| # -------- Translation -------- | |
| translator = load_translator( | |
| LANGS[target_lang]["translator"], | |
| LANGS[target_lang]["code"] | |
| ) | |
| translated_text = translator(english_text)[0]["translation_text"] | |
| with tab2: | |
| st.subheader(f"{target_lang} Text") | |
| st.write(translated_text) | |
| # -------- XTTS (Real Voice Cloning) -------- | |
| out_path = "out.wav" | |
| xtts.tts_to_file( | |
| text=translated_text, | |
| speaker_wav=speaker_path, | |
| language=LANGS[target_lang]["code"], | |
| file_path=out_path, | |
| split_sentences=False | |
| ) | |
| with tab3: | |
| st.subheader(f"{target_lang} Voice (Your Voice)") | |
| st.audio(out_path) | |
| st.download_button( | |
| "β¬ Download Audio", | |
| open(out_path, "rb"), | |
| file_name=LANGS[target_lang]["file"] | |
| ) | |