Spaces:
Runtime error
Runtime error
Pawan Kumar Pradhan commited on
Commit ·
814a890
1
Parent(s): 7fe9462
update lang dropdown
Browse files
app.py
CHANGED
|
@@ -16,8 +16,6 @@ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
|
|
| 16 |
output_dir = "output_audio"
|
| 17 |
os.makedirs(output_dir, exist_ok=True)
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
| 21 |
def transcribeaudio(audiofile):
|
| 22 |
print("Transcribing audio...")
|
| 23 |
tresult = model.transcribe(audiofile)
|
|
@@ -36,102 +34,124 @@ def transcribeaudio(audiofile):
|
|
| 36 |
|
| 37 |
return {"text": tresult["text"], "language": detected_language}
|
| 38 |
|
| 39 |
-
def translatetext(text, source_lang):
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
print(f"{lang_name} Translation: {translated_text}")
|
| 49 |
-
except Exception as e:
|
| 50 |
-
print(f"Error translating to {lang_name}: {str(e)}")
|
| 51 |
-
translations[lang_code] = f"Error: Could not translate to {lang_name}"
|
| 52 |
-
|
| 53 |
-
return [translations[lang] for lang in ["es", "fr", "hi"]]
|
| 54 |
|
| 55 |
def readtranslation(text, audiofile, language):
|
| 56 |
output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav")
|
| 57 |
print(f"Generating TTS for text: {text}")
|
| 58 |
-
tts.tts_to_file(text=text,
|
| 59 |
-
file_path=output_path,
|
| 60 |
-
speaker_wav=audiofile,
|
| 61 |
-
language=language)
|
| 62 |
print(f"Generated audio file at: {output_path}")
|
| 63 |
return output_path
|
| 64 |
|
| 65 |
-
def
|
|
|
|
|
|
|
|
|
|
| 66 |
progress(0, desc="Starting process...")
|
| 67 |
try:
|
| 68 |
progress(0.2, desc="Transcribing audio...")
|
| 69 |
transcription_result = transcribeaudio(audiofile)
|
| 70 |
-
|
| 71 |
if isinstance(transcription_result, dict) and transcription_result.get("status") == "error":
|
| 72 |
raise gr.Error(transcription_result["error"])
|
| 73 |
-
|
| 74 |
text = transcription_result["text"]
|
| 75 |
detected_language = transcription_result["language"]
|
| 76 |
-
|
| 77 |
progress(0.4, desc="Translating text...")
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
progress((i + 1) * 0.1 + 0.5, desc=f"Generating {lang} audio...")
|
| 84 |
-
try:
|
| 85 |
-
audio_path = readtranslation(translation, audiofile, lang)
|
| 86 |
-
audio_paths.append(audio_path)
|
| 87 |
-
except Exception as e:
|
| 88 |
-
print(f"Error generating audio for {lang}: {str(e)}")
|
| 89 |
-
audio_paths.append(None)
|
| 90 |
-
|
| 91 |
progress(1.0, desc="Process complete!")
|
| 92 |
-
return
|
| 93 |
except Exception as e:
|
| 94 |
raise gr.Error(f"An error occurred: {str(e)}")
|
| 95 |
finally:
|
| 96 |
cleanup_memory()
|
| 97 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
with gr.Blocks() as demo:
|
| 99 |
gr.Markdown("## Record yourself in any language and immediately receive voice translations.")
|
|
|
|
| 100 |
with gr.Row():
|
| 101 |
with gr.Column():
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
with gr.Row():
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
|
| 129 |
-
|
| 130 |
-
def cleanup_memory():
|
| 131 |
-
gc.collect()
|
| 132 |
-
torch.cuda.empty_cache()
|
| 133 |
-
print("Memory cleaned up")
|
| 134 |
|
| 135 |
if __name__ == "__main__":
|
| 136 |
demo.launch()
|
| 137 |
-
cleanup_memory()
|
|
|
|
| 16 |
output_dir = "output_audio"
|
| 17 |
os.makedirs(output_dir, exist_ok=True)
|
| 18 |
|
|
|
|
|
|
|
| 19 |
def transcribeaudio(audiofile):
|
| 20 |
print("Transcribing audio...")
|
| 21 |
tresult = model.transcribe(audiofile)
|
|
|
|
| 34 |
|
| 35 |
return {"text": tresult["text"], "language": detected_language}
|
| 36 |
|
| 37 |
+
def translatetext(text, source_lang, target_lang):
|
| 38 |
+
try:
|
| 39 |
+
translator = Translator(from_lang=source_lang, to_lang=target_lang)
|
| 40 |
+
translated_text = translator.translate(text)
|
| 41 |
+
print(f"Translated text: {translated_text}")
|
| 42 |
+
return translated_text
|
| 43 |
+
except Exception as e:
|
| 44 |
+
print(f"Error translating to {target_lang}: {str(e)}")
|
| 45 |
+
return f"Error: Could not translate to {target_lang}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def readtranslation(text, audiofile, language):
|
| 48 |
output_path = os.path.join(output_dir, f"{language}_{uuid.uuid4()}.wav")
|
| 49 |
print(f"Generating TTS for text: {text}")
|
| 50 |
+
tts.tts_to_file(text=text, file_path=output_path, speaker_wav=audiofile, language=language)
|
|
|
|
|
|
|
|
|
|
| 51 |
print(f"Generated audio file at: {output_path}")
|
| 52 |
return output_path
|
| 53 |
|
| 54 |
+
def v2vtranslate(audiofile, selected_lang,COQUI_TOS_AGREED, progress=gr.Progress()):
|
| 55 |
+
|
| 56 |
+
if COQUI_TOS_AGREED == True:
|
| 57 |
+
|
| 58 |
progress(0, desc="Starting process...")
|
| 59 |
try:
|
| 60 |
progress(0.2, desc="Transcribing audio...")
|
| 61 |
transcription_result = transcribeaudio(audiofile)
|
| 62 |
+
|
| 63 |
if isinstance(transcription_result, dict) and transcription_result.get("status") == "error":
|
| 64 |
raise gr.Error(transcription_result["error"])
|
| 65 |
+
|
| 66 |
text = transcription_result["text"]
|
| 67 |
detected_language = transcription_result["language"]
|
| 68 |
+
|
| 69 |
progress(0.4, desc="Translating text...")
|
| 70 |
+
translated_text = translatetext(text, detected_language, selected_lang)
|
| 71 |
+
|
| 72 |
+
progress(0.7, desc="Generating audio...")
|
| 73 |
+
audio_path = readtranslation(translated_text, audiofile, selected_lang)
|
| 74 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
progress(1.0, desc="Process complete!")
|
| 76 |
+
return audio_path, translated_text
|
| 77 |
except Exception as e:
|
| 78 |
raise gr.Error(f"An error occurred: {str(e)}")
|
| 79 |
finally:
|
| 80 |
cleanup_memory()
|
| 81 |
|
| 82 |
+
else:
|
| 83 |
+
gr.Warning("Please accept the Terms & Condition!")
|
| 84 |
+
return (
|
| 85 |
+
None,
|
| 86 |
+
None,
|
| 87 |
+
None,
|
| 88 |
+
None,
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
with gr.Blocks() as demo:
|
| 92 |
gr.Markdown("## Record yourself in any language and immediately receive voice translations.")
|
| 93 |
+
|
| 94 |
with gr.Row():
|
| 95 |
with gr.Column():
|
| 96 |
+
|
| 97 |
+
audio_input = gr.Audio(
|
| 98 |
+
sources=["microphone"],
|
| 99 |
+
type="filepath",
|
| 100 |
+
show_download_button=True,
|
| 101 |
+
max_length=15,
|
| 102 |
+
label="Record your voice",
|
| 103 |
+
waveform_options=gr.WaveformOptions(
|
| 104 |
+
waveform_color="#01C6FF",
|
| 105 |
+
waveform_progress_color="#0066B4",
|
| 106 |
+
skip_length=2,
|
| 107 |
+
show_controls=False,)
|
| 108 |
+
)
|
| 109 |
+
language_gr = gr.Dropdown(
|
| 110 |
+
label="Language",
|
| 111 |
+
info="Select an output language for the synthesised speech",
|
| 112 |
+
choices=[
|
| 113 |
+
"en",
|
| 114 |
+
"es",
|
| 115 |
+
"fr",
|
| 116 |
+
"de",
|
| 117 |
+
"it",
|
| 118 |
+
"pt",
|
| 119 |
+
"pl",
|
| 120 |
+
"tr",
|
| 121 |
+
"ru",
|
| 122 |
+
"nl",
|
| 123 |
+
"cs",
|
| 124 |
+
"ar",
|
| 125 |
+
"zh-cn",
|
| 126 |
+
"ja",
|
| 127 |
+
"ko",
|
| 128 |
+
"hu",
|
| 129 |
+
"hi"
|
| 130 |
+
],
|
| 131 |
+
max_choices=1,
|
| 132 |
+
value="es",
|
| 133 |
+
)
|
| 134 |
+
tos_gr = gr.Checkbox(
|
| 135 |
+
label="Agree",
|
| 136 |
+
value=False,
|
| 137 |
+
info="I agree to the terms of the CPML: https://coqui.ai/cpml",
|
| 138 |
+
)
|
| 139 |
+
submit = gr.Button("Submit", variant="primary")
|
| 140 |
+
reset = gr.Button("Reset")
|
| 141 |
|
| 142 |
with gr.Row():
|
| 143 |
+
output_audio = gr.Audio(label="Translated Audio", interactive=False)
|
| 144 |
+
output_text = gr.Markdown()
|
| 145 |
+
|
| 146 |
+
output_components = [output_audio, output_text]
|
| 147 |
+
|
| 148 |
+
submit.click(fn=v2vtranslate, inputs=[audio_input, language_gr,tos_gr], outputs=output_components, show_progress=True)
|
| 149 |
+
reset.click(fn=lambda: None, inputs=None, outputs=output_components + [audio_input])
|
| 150 |
+
|
| 151 |
+
def cleanup_memory():
|
| 152 |
+
gc.collect()
|
| 153 |
+
torch.cuda.empty_cache()
|
| 154 |
+
print("Memory cleaned up")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
|
| 156 |
if __name__ == "__main__":
|
| 157 |
demo.launch()
|
|
|