|
|
|
|
|
""" |
|
|
Voices Across Borders - A Multilingual Podcast Audio Dubbing Tool |
|
|
|
|
|
This Streamlit-based app takes a Spanish podcast/audio file, dubs it in English via ElevenLabs API, |
|
|
and runs a linguistic and semantic QA using LLMs (in this case, Llama-3-8-B via Groq) to evaluate |
|
|
translation accuracy and tone. Ideal for showcasing real-world multilingual API deployment. |
|
|
""" |
|
|
|
|
|
import streamlit as st |
|
|
from elevenlabs.client import ElevenLabs |
|
|
from groq import Groq |
|
|
from pydub import AudioSegment |
|
|
from dotenv import load_dotenv |
|
|
import os |
|
|
import io |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
|
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY") |
|
|
|
|
|
print("GROQ_API_KEY loaded?", bool(GROQ_API_KEY)) |
|
|
print("ELEVENLABS_API_KEY loaded?", bool(ELEVENLABS_API_KEY)) |
|
|
|
|
|
|
|
|
client_el = ElevenLabs(api_key=ELEVENLABS_API_KEY) |
|
|
|
|
|
client_gr = Groq(api_key=GROQ_API_KEY) |
|
|
|
|
|
|
|
|
def transcribe_audio(audio_file): |
|
|
print("Transcribing audio...") |
|
|
|
|
|
audio_data = AudioSegment.from_file(audio_file) |
|
|
audio_data = audio_data.set_frame_rate(44100).set_channels(1).set_sample_width(2) |
|
|
audio_buffer = io.BytesIO() |
|
|
audio_data.export(audio_buffer, format="wav") |
|
|
audio_buffer.seek(0) |
|
|
print("Audio converted successfully!") |
|
|
with print("✍ Transcribing audio... "): |
|
|
transcript = client_el.speech_to_text.convert( |
|
|
file = audio_buffer, |
|
|
model_id = "scribe_v1" |
|
|
) |
|
|
return transcript.text |
|
|
|
|
|
def translate_transcript(transcript): |
|
|
print("Translating transcript using Llama-3-8B...") |
|
|
completion = client_gr.chat.completions.create( |
|
|
model = "llama3-8b-8192", |
|
|
messages = [ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": f"""Translate the following Spanish text to professional English. |
|
|
Only return the translation, nothing else. Do not provide any commentary afterward.\n\n{transcript}""" |
|
|
} |
|
|
] |
|
|
) |
|
|
|
|
|
response = completion.choices[0].message.content |
|
|
return response |
|
|
|
|
|
|
|
|
def convert_audio_response(audio, to_wav=False): |
|
|
|
|
|
if isinstance(audio, (bytes, bytearray)): |
|
|
audio_bytes = audio |
|
|
elif hasattr(audio, '__iter__'): |
|
|
audio_bytes = b''.join(audio) |
|
|
else: |
|
|
raise ValueError("Unsupported audio format.") |
|
|
|
|
|
audio_stream = io.BytesIO(audio_bytes) |
|
|
audio_stream.seek(0) |
|
|
return audio_stream, "audio/mp3" |
|
|
|
|
|
def generate_dub(translated_text, to_wav=False): |
|
|
try: |
|
|
audio_data = client_el.text_to_speech.convert( |
|
|
text=translated_text, |
|
|
voice_id="nPczCjzI2devNBz1zQrb", |
|
|
model_id="eleven_multilingual_v2" |
|
|
) |
|
|
|
|
|
audio_stream, mime_type = convert_audio_response(audio_data, to_wav=to_wav) |
|
|
return audio_stream, mime_type |
|
|
|
|
|
except Exception as e: |
|
|
print(f"ElevenLabs API error: {e}") |
|
|
return None, None |
|
|
|
|
|
def run_quality_check(spanish, english): |
|
|
qa_prompt = f""" |
|
|
Compare the following Spanish source with its English translation. |
|
|
- Identify any mistranslations or tone shifts. |
|
|
- Rate fluency and accuracy from 1 to 10. |
|
|
|
|
|
Spanish: {spanish} |
|
|
English: {english} |
|
|
|
|
|
""" |
|
|
|
|
|
completion = client_gr.chat.completions.create( |
|
|
model = "llama3-8b-8192", |
|
|
messages = [ |
|
|
{ |
|
|
"role": "user", |
|
|
"content": qa_prompt |
|
|
} |
|
|
] |
|
|
) |
|
|
|
|
|
response = completion.choices[0].message.content |
|
|
return response |
|
|
|
|
|
|
|
|
st.title("Voices Across Borders - Multilingual Podcast Dubbing Tool") |
|
|
st.write("Upload a Spanish podcast audio file for dubbing in English.") |
|
|
uploaded_file = st.file_uploader("Choose a Spanish audio file", type=["mp3", "wav"]) |
|
|
|
|
|
if uploaded_file: |
|
|
transcript = transcribe_audio(uploaded_file) |
|
|
st.subheader("📜Transcription") |
|
|
st.write(transcript) |
|
|
|
|
|
translated = translate_transcript(transcript) |
|
|
st.subheader("Translated Text") |
|
|
st.write(translated) |
|
|
|
|
|
with st.spinner("💿 Generating dub..."): |
|
|
dubbed_audio, mime_type = generate_dub(translated) |
|
|
if dubbed_audio: |
|
|
st.success("Dubbing process completed successfully!") |
|
|
dubbed_audio.seek(0) |
|
|
st.audio(dubbed_audio, format=mime_type) |
|
|
st.download_button( |
|
|
label="Download dub", |
|
|
data = dubbed_audio, |
|
|
file_name = f"dubbed_audio.{mime_type.split('/')[-1]}", |
|
|
mime=mime_type |
|
|
) |
|
|
else: |
|
|
st.warning("No audio to play or download.") |
|
|
|
|
|
with st.spinner("Running quality check... "): |
|
|
qa_result = run_quality_check(transcript, translated) |
|
|
if qa_result: |
|
|
st.success("Quality check complete!") |
|
|
st.markdown("### Quality Check Result") |
|
|
st.write(qa_result) |