import gradio as gr import assemblyai as aai from translate import Translator from elevenlabs import VoiceSettings from elevenlabs.client import ElevenLabs import uuid from pathlib import Path import os def voice_to_voice(audio_file): transcript = transcribe_audio(audio_file) if transcript.status == aai.TranscriptStatus.error: raise gr.Error(transcript.error) else: transcript_text = transcript.text translation = translate_text(transcript_text) audio_path = text_to_speech(translation) return audio_path, translation def transcribe_audio(audio_file): aai.settings.api_key = os.getenv("ASSEMBLYAI_API_KEY") transcriber = aai.Transcriber() return transcriber.transcribe(audio_file) def translate_text(text): translator = Translator(from_lang="en", to_lang="es") # Translate to Spanish return translator.translate(text) def text_to_speech(text): client = ElevenLabs(api_key=os.getenv("ELEVENLABS_API_KEY")) response = client.text_to_speech.convert( voice_id=os.getenv("ELEVENLABS_VOICE_ID"), optimize_streaming_latency="0", output_format="mp3_22050_32", text=text, model_id="eleven_multilingual_v2", voice_settings=VoiceSettings(stability=0.5, similarity_boost=0.8, style=0.5, use_speaker_boost=True), ) file_path = f"{uuid.uuid4()}.mp3" with open(file_path, "wb") as f: for chunk in response: if chunk: f.write(chunk) return file_path audio_input = gr.Audio(sources=["microphone"], type="filepath", show_download_button=True) demo = gr.Interface( fn=voice_to_voice, inputs=audio_input, outputs=[gr.Audio(label="Spanish"), gr.Textbox(label="Translated Text (Spanish)")], title="Voice-to-Voice Translator", description="Record yourself in English and get the translation in Spanish with voice output." ) if __name__ == "__main__": demo.launch()