File size: 5,048 Bytes
e022d73 61ec040 e022d73 f5a70cf e022d73 c02529e 11a9152 cdd7f59 11a9152 e022d73 e947850 e022d73 e947850 e022d73 c02529e b4434ea 5ee45ef e947850 46fe82a 8456d62 46fe82a 129d330 e022d73 e947850 e022d73 4e6f40c e022d73 a5e9424 7e496f3 a5e9424 be998ff a5e9424 e022d73 d37b78f a5e9424 e947850 a5e9424 e022d73 aa3f0f2 7b81024 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | # app.py - ElevenLabs Podcast Dubbing Project
"""
Voices Across Borders - A Multilingual Podcast Audio Dubbing Tool
This Streamlit-based app takes a Spanish podcast/audio file, dubs it in English via ElevenLabs API,
and runs a linguistic and semantic QA using LLMs (in this case, Llama-3-8-B via Groq) to evaluate
translation accuracy and tone. Ideal for showcasing real-world multilingual API deployment.
"""
import streamlit as st
from elevenlabs.client import ElevenLabs
from groq import Groq
from pydub import AudioSegment
from dotenv import load_dotenv
import os
import io
load_dotenv()
# ======CONFIGURATION========
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
print("GROQ_API_KEY loaded?", bool(GROQ_API_KEY))
print("ELEVENLABS_API_KEY loaded?", bool(ELEVENLABS_API_KEY))
# Initialize ElevenLabs client
client_el = ElevenLabs(api_key=ELEVENLABS_API_KEY)
# Initialize Groq client
client_gr = Groq(api_key=GROQ_API_KEY)
# ======APP LAYOUT========
def transcribe_audio(audio_file):
print("Transcribing audio...")
# Transcribe audio using ElevenLabs API
audio_data = AudioSegment.from_file(audio_file)
audio_data = audio_data.set_frame_rate(44100).set_channels(1).set_sample_width(2) # Convert to 2 bytes / 16-bit .wav
audio_buffer = io.BytesIO()
audio_data.export(audio_buffer, format="wav")
audio_buffer.seek(0)
print("Audio converted successfully!")
with print("✍ Transcribing audio... "):
transcript = client_el.speech_to_text.convert(
file = audio_buffer,
model_id = "scribe_v1"
)
return transcript.text
def translate_transcript(transcript):
print("Translating transcript using Llama-3-8B...")
completion = client_gr.chat.completions.create(
model = "llama3-8b-8192",
messages = [
{
"role": "user",
"content": f"""Translate the following Spanish text to professional English.
Only return the translation, nothing else. Do not provide any commentary afterward.\n\n{transcript}"""
}
]
)
response = completion.choices[0].message.content
return response
# Helper function for generate_dub
def convert_audio_response(audio, to_wav=False):
# Handle generator or bytes input
if isinstance(audio, (bytes, bytearray)):
audio_bytes = audio
elif hasattr(audio, '__iter__'):
audio_bytes = b''.join(audio)
else:
raise ValueError("Unsupported audio format.")
audio_stream = io.BytesIO(audio_bytes)
audio_stream.seek(0)
return audio_stream, "audio/mp3"
def generate_dub(translated_text, to_wav=False):
try:
audio_data = client_el.text_to_speech.convert(
text=translated_text,
voice_id="nPczCjzI2devNBz1zQrb",
model_id="eleven_multilingual_v2"
)
audio_stream, mime_type = convert_audio_response(audio_data, to_wav=to_wav)
return audio_stream, mime_type
except Exception as e:
print(f"ElevenLabs API error: {e}")
return None, None
def run_quality_check(spanish, english):
qa_prompt = f"""
Compare the following Spanish source with its English translation.
- Identify any mistranslations or tone shifts.
- Rate fluency and accuracy from 1 to 10.
Spanish: {spanish}
English: {english}
"""
completion = client_gr.chat.completions.create(
model = "llama3-8b-8192",
messages = [
{
"role": "user",
"content": qa_prompt
}
]
)
response = completion.choices[0].message.content
return response
# ======APP LOGIC========
st.title("Voices Across Borders - Multilingual Podcast Dubbing Tool")
st.write("Upload a Spanish podcast audio file for dubbing in English.")
uploaded_file = st.file_uploader("Choose a Spanish audio file", type=["mp3", "wav"])
if uploaded_file:
transcript = transcribe_audio(uploaded_file)
st.subheader("📜Transcription")
st.write(transcript)
translated = translate_transcript(transcript)
st.subheader("Translated Text")
st.write(translated)
with st.spinner("💿 Generating dub..."):
dubbed_audio, mime_type = generate_dub(translated)
if dubbed_audio:
st.success("Dubbing process completed successfully!")
dubbed_audio.seek(0)
st.audio(dubbed_audio, format=mime_type)
st.download_button(
label="Download dub",
data = dubbed_audio,
file_name = f"dubbed_audio.{mime_type.split('/')[-1]}",
mime=mime_type
)
else:
st.warning("No audio to play or download.")
with st.spinner("Running quality check... "):
qa_result = run_quality_check(transcript, translated)
if qa_result:
st.success("Quality check complete!")
st.markdown("### Quality Check Result")
st.write(qa_result) |