File size: 1,950 Bytes
3f1a6f7
7f5173a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c2b4ff1
 
 
 
7f5173a
 
 
 
 
 
 
c2b4ff1
 
7f5173a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3597254
ab327f2
 
 
c2b4ff1
7f5173a
c2b4ff1
ab327f2
3f1a6f7
7f5173a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
import assemblyai as aai
from translate import Translator
from elevenlabs import VoiceSettings
from elevenlabs.client import ElevenLabs
import uuid
from pathlib import Path
import os

def voice_to_voice(audio_file):
    transcript = transcribe_audio(audio_file)
    if transcript.status == aai.TranscriptStatus.error:
        raise gr.Error(transcript.error)
    else:
        transcript_text = transcript.text

    translation = translate_text(transcript_text)
    audio_path = text_to_speech(translation)
    
    return audio_path, translation

def transcribe_audio(audio_file):
    aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY")
    transcriber = aai.Transcriber()
    return transcriber.transcribe(audio_file)

def translate_text(text):
    translator = Translator(from_lang="en", to_lang="es")  # Translate to Spanish
    return translator.translate(text)

def text_to_speech(text):
    client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY"))
    response = client.text_to_speech.convert(
        voice_id=os.getenv("ELEVENLABS_VOICE_ID"),
        optimize_streaming_latency="0",
        output_format="mp3_22050_32",
        text=text,
        model_id="eleven_multilingual_v2",
        voice_settings=VoiceSettings(stability=0.5, similarity_boost=0.8, style=0.5, use_speaker_boost=True),
    )
    file_path = f"{uuid.uuid4()}.mp3"
    with open(file_path, "wb") as f:
        for chunk in response:
            if chunk:
                f.write(chunk)
    return file_path

audio_input = gr.Audio(sources=["microphone"], type="filepath", show_download_button=True)

demo = gr.Interface(
    fn=voice_to_voice,
    inputs=audio_input,
    outputs=[gr.Audio(label="Spanish"), gr.Textbox(label="Translated Text (Spanish)")],
    title="Voice-to-Voice Translator",
    description="Record yourself in English and get the translation in Spanish with voice output."
)

if __name__ == "__main__":
    demo.launch()