Echo / app.py
Afeezee's picture
Create app.py
18ab614 verified
import os
import numpy as np
import gradio as gr
import assemblyai as aai
from translate import Translator
import uuid
from gtts import gTTS
import tempfile
from pathlib import Path
def voice_to_voice(audio_file):
# Transcribe speech
transcript = transcribe_audio(audio_file)
if transcript.status == aai.TranscriptStatus.error:
raise gr.Error(transcript.error)
else:
transcript = transcript.text
# Translate text
list_translations = translate_text(transcript)
generated_audio_paths = []
# Generate speech from translated text
for translation in list_translations:
translated_audio_file_name = text_to_speech(translation)
path = Path(translated_audio_file_name)
generated_audio_paths.append(path)
return generated_audio_paths[0], generated_audio_paths[1], generated_audio_paths[2], generated_audio_paths[3], generated_audio_paths[4], generated_audio_paths[5], list_translations[0], list_translations[1], list_translations[2], list_translations[3], list_translations[4], list_translations[5]
# Function to transcribe audio using AssemblyAI
def transcribe_audio(audio_file):
aai.settings.api_key = "21f30361d02543cca65707e8f71721d8"
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(audio_file)
return transcript
# Function to translate text
def translate_text(text: str) -> str:
languages = ["ru", "tr", "sv", "de", "es", "ja"]
list_translations = []
for lan in languages:
translator = Translator(from_lang="en", to_lang=lan)
translation = translator.translate(text)
list_translations.append(translation)
return list_translations
# Function to generate speech with gTTS (Google Text-to-Speech)
def text_to_speech(text: str) -> str:
# Generate speech using gTTS (Google Text-to-Speech)
tts = gTTS(text=text, lang='en', slow=True)
# Save the audio to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tts.save(tmp_file.name)
audio_path = tmp_file.name
return audio_path
input_audio = gr.Audio(
sources=["microphone"],
type="filepath",
show_download_button=True,
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
),
)
with gr.Blocks() as demo:
gr.Markdown("## Echo: Voice Translation App")
gr.Markdown("## Record yourself in English and immediately receive voice translations.")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(sources=["microphone"],
type="filepath",
show_download_button=True,
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
),)
with gr.Row():
submit = gr.Button("Submit", variant="primary")
btn = gr.ClearButton(audio_input, "Clear")
with gr.Row():
with gr.Group() as turkish:
tr_output = gr.Audio(label="Turkish", interactive=False)
tr_text = gr.Markdown()
with gr.Group() as swedish:
sv_output = gr.Audio(label="Swedish", interactive=False)
sv_text = gr.Markdown()
with gr.Group() as russian:
ru_output = gr.Audio(label="Russian", interactive=False)
ru_text = gr.Markdown()
with gr.Row():
with gr.Group():
de_output = gr.Audio(label="German", interactive=False)
de_text = gr.Markdown()
with gr.Group():
es_output = gr.Audio(label="Spanish", interactive=False)
es_text = gr.Markdown()
with gr.Group():
jp_output = gr.Audio(label="Japanese", interactive=False)
jp_text = gr.Markdown()
output_components = [ru_output, tr_output, sv_output, de_output, es_output, jp_output, ru_text, tr_text, sv_text, de_text, es_text, jp_text]
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
if __name__ == "__main__":
demo.launch()