Spaces:
Sleeping
Sleeping
| import os | |
| import numpy as np | |
| import gradio as gr | |
| import assemblyai as aai | |
| from translate import Translator | |
| import uuid | |
| from gtts import gTTS | |
| import tempfile | |
| from pathlib import Path | |
| def voice_to_voice(audio_file): | |
| # Transcribe speech | |
| transcript = transcribe_audio(audio_file) | |
| if transcript.status == aai.TranscriptStatus.error: | |
| raise gr.Error(transcript.error) | |
| else: | |
| transcript = transcript.text | |
| # Translate text | |
| list_translations = translate_text(transcript) | |
| generated_audio_paths = [] | |
| # Generate speech from translated text | |
| for translation in list_translations: | |
| translated_audio_file_name = text_to_speech(translation) | |
| path = Path(translated_audio_file_name) | |
| generated_audio_paths.append(path) | |
| return generated_audio_paths[0], generated_audio_paths[1], generated_audio_paths[2], generated_audio_paths[3], generated_audio_paths[4], generated_audio_paths[5], list_translations[0], list_translations[1], list_translations[2], list_translations[3], list_translations[4], list_translations[5] | |
| # Function to transcribe audio using AssemblyAI | |
| def transcribe_audio(audio_file): | |
| aai.settings.api_key = "21f30361d02543cca65707e8f71721d8" | |
| transcriber = aai.Transcriber() | |
| transcript = transcriber.transcribe(audio_file) | |
| return transcript | |
| # Function to translate text | |
| def translate_text(text: str) -> str: | |
| languages = ["ru", "tr", "sv", "de", "es", "ja"] | |
| list_translations = [] | |
| for lan in languages: | |
| translator = Translator(from_lang="en", to_lang=lan) | |
| translation = translator.translate(text) | |
| list_translations.append(translation) | |
| return list_translations | |
| # Function to generate speech with gTTS (Google Text-to-Speech) | |
| def text_to_speech(text: str) -> str: | |
| # Generate speech using gTTS (Google Text-to-Speech) | |
| tts = gTTS(text=text, lang='en', slow=True) | |
| # Save the audio to a temporary file | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
| tts.save(tmp_file.name) | |
| audio_path = tmp_file.name | |
| return audio_path | |
| input_audio = gr.Audio( | |
| sources=["microphone"], | |
| type="filepath", | |
| show_download_button=True, | |
| waveform_options=gr.WaveformOptions( | |
| waveform_color="#01C6FF", | |
| waveform_progress_color="#0066B4", | |
| skip_length=2, | |
| show_controls=False, | |
| ), | |
| ) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Echo: Voice Translation App") | |
| gr.Markdown("## Record yourself in English and immediately receive voice translations.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.Audio(sources=["microphone"], | |
| type="filepath", | |
| show_download_button=True, | |
| waveform_options=gr.WaveformOptions( | |
| waveform_color="#01C6FF", | |
| waveform_progress_color="#0066B4", | |
| skip_length=2, | |
| show_controls=False, | |
| ),) | |
| with gr.Row(): | |
| submit = gr.Button("Submit", variant="primary") | |
| btn = gr.ClearButton(audio_input, "Clear") | |
| with gr.Row(): | |
| with gr.Group() as turkish: | |
| tr_output = gr.Audio(label="Turkish", interactive=False) | |
| tr_text = gr.Markdown() | |
| with gr.Group() as swedish: | |
| sv_output = gr.Audio(label="Swedish", interactive=False) | |
| sv_text = gr.Markdown() | |
| with gr.Group() as russian: | |
| ru_output = gr.Audio(label="Russian", interactive=False) | |
| ru_text = gr.Markdown() | |
| with gr.Row(): | |
| with gr.Group(): | |
| de_output = gr.Audio(label="German", interactive=False) | |
| de_text = gr.Markdown() | |
| with gr.Group(): | |
| es_output = gr.Audio(label="Spanish", interactive=False) | |
| es_text = gr.Markdown() | |
| with gr.Group(): | |
| jp_output = gr.Audio(label="Japanese", interactive=False) | |
| jp_text = gr.Markdown() | |
| output_components = [ru_output, tr_output, sv_output, de_output, es_output, jp_output, ru_text, tr_text, sv_text, de_text, es_text, jp_text] | |
| submit.click(fn=voice_to_voice, inputs=audio_input, outputs=output_components, show_progress=True) | |
| if __name__ == "__main__": | |
| demo.launch() |