GaneshSarode commited on
Commit
cc0170c
Β·
verified Β·
1 Parent(s): 4a4d0c5

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +135 -2
src/app.py CHANGED
@@ -1,4 +1,137 @@
1
  import streamlit as st
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
- st.title("Voice Clone XTTS")
4
- st.write("App is running")
 
1
  import streamlit as st
2
+ import librosa
3
+ import tempfile
4
+ from transformers import pipeline
5
+ from TTS.api import TTS
6
+
7
+ from ui import render_header, render_sidebar ,render_status
8
+
9
+ st.set_page_config(page_title="Voice Clone Translator", layout="wide")
10
+ render_header()
11
+ render_sidebar()
12
+ render_status()
13
+ st.title("πŸŽ™οΈ Voice Cloning Translator (English β†’ Hindi / French / Japanese)")
14
+
15
+ # -------- Load models --------
16
+ @st.cache_resource
17
+ def load_asr():
18
+ return pipeline(
19
+ "automatic-speech-recognition",
20
+ model="openai/whisper-small",
21
+ device=-1
22
+ )
23
+
24
+ @st.cache_resource
25
+ def load_translator(model_name, target_lang):
26
+ if model_name.startswith("facebook/m2m100"):
27
+ return pipeline(
28
+ "translation",
29
+ model=model_name,
30
+ src_lang="en",
31
+ tgt_lang=target_lang,
32
+ device=-1
33
+ )
34
+ else:
35
+ return pipeline(
36
+ "translation",
37
+ model=model_name,
38
+ device=-1
39
+ )
40
+
41
+
42
+ @st.cache_resource
43
+ def load_xtts():
44
+ return TTS(
45
+ "tts_models/multilingual/multi-dataset/xtts_v2",
46
+ gpu=False
47
+ )
48
+
49
+ asr = load_asr()
50
+ xtts = load_xtts()
51
+
52
+ # -------- Language config --------
53
+ LANGS = {
54
+ "Hindi": {
55
+ "translator": "Helsinki-NLP/opus-mt-en-hi",
56
+ "code": "hi",
57
+ "file": "hindi_my_voice.wav"
58
+ },
59
+ "French": {
60
+ "translator": "Helsinki-NLP/opus-mt-en-fr",
61
+ "code": "fr",
62
+ "file": "french_my_voice.wav"
63
+ },
64
+ "Japanese": {
65
+ "translator": "facebook/m2m100_418M",
66
+ "code": "ja",
67
+ "file": "japanese_my_voice.wav"
68
+ }
69
+ }
70
+
71
+ # -------- UI --------
72
+ target_lang = st.selectbox("Select Target Language", list(LANGS.keys()))
73
+ uploaded = st.file_uploader("Upload English voice (WAV)", type=["wav"])
74
+ text_input = st.text_area("Or type English text")
75
+ convert = st.button("Convert to Voice")
76
+ tab1, tab2, tab3 = st.tabs(["πŸ“ Text", "🌍 Translation", "πŸ”Š Voice"])
77
+ # -------- Processing --------
78
+ if convert:
79
+ if not uploaded and not text_input.strip():
80
+ st.warning("Upload audio or type text.")
81
+ else:
82
+ with st.spinner("Processing (CPU – slow but working)..."):
83
+
84
+ # -------- Handle uploaded audio --------
85
+ # -------- Get English text --------
86
+ if uploaded:
87
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
88
+ tmp.write(uploaded.read())
89
+ speaker_path = tmp.name
90
+
91
+ audio, sr = librosa.load(speaker_path, sr=16000)
92
+ english_text = asr(audio)["text"]
93
+
94
+ elif text_input.strip():
95
+ st.warning("⚠️ Upload a voice sample to clone your voice.")
96
+ st.stop()
97
+
98
+ else:
99
+ st.warning("Provide text or upload audio.")
100
+ st.stop()
101
+
102
+
103
+ with tab1:
104
+ st.subheader("Recognized English")
105
+ st.write(english_text)
106
+
107
+ # -------- Translation --------
108
+ translator = load_translator(
109
+ LANGS[target_lang]["translator"],
110
+ LANGS[target_lang]["code"]
111
+ )
112
+
113
+ translated_text = translator(english_text)[0]["translation_text"]
114
+
115
+ with tab2:
116
+ st.subheader(f"{target_lang} Text")
117
+ st.write(translated_text)
118
+
119
+ # -------- XTTS (Real Voice Cloning) --------
120
+ out_path = "out.wav"
121
+ xtts.tts_to_file(
122
+ text=translated_text,
123
+ speaker_wav=speaker_path,
124
+ language=LANGS[target_lang]["code"],
125
+ file_path=out_path,
126
+ split_sentences=False
127
+ )
128
+
129
+ with tab3:
130
+ st.subheader(f"{target_lang} Voice (Your Voice)")
131
+ st.audio(out_path)
132
+ st.download_button(
133
+ "⬇ Download Audio",
134
+ open(out_path, "rb"),
135
+ file_name=LANGS[target_lang]["file"]
136
+ )
137