translators-will's picture
Update app.py
f5a70cf verified
# app.py - ElevenLabs Podcast Dubbing Project
"""
Voices Across Borders - A Multilingual Podcast Audio Dubbing Tool
This Streamlit-based app takes a Spanish podcast/audio file, dubs it in English via ElevenLabs API,
and runs a linguistic and semantic QA using LLMs (in this case, Llama-3-8-B via Groq) to evaluate
translation accuracy and tone. Ideal for showcasing real-world multilingual API deployment.
"""
import streamlit as st
from elevenlabs.client import ElevenLabs
from groq import Groq
from pydub import AudioSegment
from dotenv import load_dotenv
import os
import io
load_dotenv()
# ======CONFIGURATION========
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
print("GROQ_API_KEY loaded?", bool(GROQ_API_KEY))
print("ELEVENLABS_API_KEY loaded?", bool(ELEVENLABS_API_KEY))
# Initialize ElevenLabs client
client_el = ElevenLabs(api_key=ELEVENLABS_API_KEY)
# Initialize Groq client
client_gr = Groq(api_key=GROQ_API_KEY)
# ======APP LAYOUT========
def transcribe_audio(audio_file):
print("Transcribing audio...")
# Transcribe audio using ElevenLabs API
audio_data = AudioSegment.from_file(audio_file)
audio_data = audio_data.set_frame_rate(44100).set_channels(1).set_sample_width(2) # Convert to 2 bytes / 16-bit .wav
audio_buffer = io.BytesIO()
audio_data.export(audio_buffer, format="wav")
audio_buffer.seek(0)
print("Audio converted successfully!")
with print("✍ Transcribing audio... "):
transcript = client_el.speech_to_text.convert(
file = audio_buffer,
model_id = "scribe_v1"
)
return transcript.text
def translate_transcript(transcript):
print("Translating transcript using Llama-3-8B...")
completion = client_gr.chat.completions.create(
model = "llama3-8b-8192",
messages = [
{
"role": "user",
"content": f"""Translate the following Spanish text to professional English.
Only return the translation, nothing else. Do not provide any commentary afterward.\n\n{transcript}"""
}
]
)
response = completion.choices[0].message.content
return response
# Helper function for generate_dub
def convert_audio_response(audio, to_wav=False):
# Handle generator or bytes input
if isinstance(audio, (bytes, bytearray)):
audio_bytes = audio
elif hasattr(audio, '__iter__'):
audio_bytes = b''.join(audio)
else:
raise ValueError("Unsupported audio format.")
audio_stream = io.BytesIO(audio_bytes)
audio_stream.seek(0)
return audio_stream, "audio/mp3"
def generate_dub(translated_text, to_wav=False):
try:
audio_data = client_el.text_to_speech.convert(
text=translated_text,
voice_id="nPczCjzI2devNBz1zQrb",
model_id="eleven_multilingual_v2"
)
audio_stream, mime_type = convert_audio_response(audio_data, to_wav=to_wav)
return audio_stream, mime_type
except Exception as e:
print(f"ElevenLabs API error: {e}")
return None, None
def run_quality_check(spanish, english):
qa_prompt = f"""
Compare the following Spanish source with its English translation.
- Identify any mistranslations or tone shifts.
- Rate fluency and accuracy from 1 to 10.
Spanish: {spanish}
English: {english}
"""
completion = client_gr.chat.completions.create(
model = "llama3-8b-8192",
messages = [
{
"role": "user",
"content": qa_prompt
}
]
)
response = completion.choices[0].message.content
return response
# ======APP LOGIC========
st.title("Voices Across Borders - Multilingual Podcast Dubbing Tool")
st.write("Upload a Spanish podcast audio file for dubbing in English.")
uploaded_file = st.file_uploader("Choose a Spanish audio file", type=["mp3", "wav"])
if uploaded_file:
transcript = transcribe_audio(uploaded_file)
st.subheader("📜Transcription")
st.write(transcript)
translated = translate_transcript(transcript)
st.subheader("Translated Text")
st.write(translated)
with st.spinner("💿 Generating dub..."):
dubbed_audio, mime_type = generate_dub(translated)
if dubbed_audio:
st.success("Dubbing process completed successfully!")
dubbed_audio.seek(0)
st.audio(dubbed_audio, format=mime_type)
st.download_button(
label="Download dub",
data = dubbed_audio,
file_name = f"dubbed_audio.{mime_type.split('/')[-1]}",
mime=mime_type
)
else:
st.warning("No audio to play or download.")
with st.spinner("Running quality check... "):
qa_result = run_quality_check(transcript, translated)
if qa_result:
st.success("Quality check complete!")
st.markdown("### Quality Check Result")
st.write(qa_result)