Spaces:
Sleeping
Sleeping
File size: 4,449 Bytes
18ab614 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 | import os
import numpy as np
import gradio as gr
import assemblyai as aai
from translate import Translator
import uuid
from gtts import gTTS
import tempfile
from pathlib import Path
def voice_to_voice(audio_file):
# Transcribe speech
transcript = transcribe_audio(audio_file)
if transcript.status == aai.TranscriptStatus.error:
raise gr.Error(transcript.error)
else:
transcript = transcript.text
# Translate text
list_translations = translate_text(transcript)
generated_audio_paths = []
# Generate speech from translated text
for translation in list_translations:
translated_audio_file_name = text_to_speech(translation)
path = Path(translated_audio_file_name)
generated_audio_paths.append(path)
return generated_audio_paths[0], generated_audio_paths[1], generated_audio_paths[2], generated_audio_paths[3], generated_audio_paths[4], generated_audio_paths[5], list_translations[0], list_translations[1], list_translations[2], list_translations[3], list_translations[4], list_translations[5]
# Function to transcribe audio using AssemblyAI
def transcribe_audio(audio_file):
aai.settings.api_key = "21f30361d02543cca65707e8f71721d8"
transcriber = aai.Transcriber()
transcript = transcriber.transcribe(audio_file)
return transcript
# Function to translate text
def translate_text(text: str) -> str:
languages = ["ru", "tr", "sv", "de", "es", "ja"]
list_translations = []
for lan in languages:
translator = Translator(from_lang="en", to_lang=lan)
translation = translator.translate(text)
list_translations.append(translation)
return list_translations
# Function to generate speech with gTTS (Google Text-to-Speech)
def text_to_speech(text: str) -> str:
# Generate speech using gTTS (Google Text-to-Speech)
tts = gTTS(text=text, lang='en', slow=True)
# Save the audio to a temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tts.save(tmp_file.name)
audio_path = tmp_file.name
return audio_path
input_audio = gr.Audio(
sources=["microphone"],
type="filepath",
show_download_button=True,
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
),
)
with gr.Blocks() as demo:
gr.Markdown("## Echo: Voice Translation App")
gr.Markdown("## Record yourself in English and immediately receive voice translations.")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(sources=["microphone"],
type="filepath",
show_download_button=True,
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
),)
with gr.Row():
submit = gr.Button("Submit", variant="primary")
btn = gr.ClearButton(audio_input, "Clear")
with gr.Row():
with gr.Group() as turkish:
tr_output = gr.Audio(label="Turkish", interactive=False)
tr_text = gr.Markdown()
with gr.Group() as swedish:
sv_output = gr.Audio(label="Swedish", interactive=False)
sv_text = gr.Markdown()
with gr.Group() as russian:
ru_output = gr.Audio(label="Russian", interactive=False)
ru_text = gr.Markdown()
with gr.Row():
with gr.Group():
de_output = gr.Audio(label="German", interactive=False)
de_text = gr.Markdown()
with gr.Group():
es_output = gr.Audio(label="Spanish", interactive=False)
es_text = gr.Markdown()
with gr.Group():
jp_output = gr.Audio(label="Japanese", interactive=False)
jp_text = gr.Markdown()
output_components = [ru_output, tr_output, sv_output, de_output, es_output, jp_output, ru_text, tr_text, sv_text, de_text, es_text, jp_text]
submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True)
if __name__ == "__main__":
demo.launch() |