File size: 5,048 Bytes
e022d73
 
 
 
 
61ec040
e022d73
 
 
f5a70cf
e022d73
 
c02529e
11a9152
 
cdd7f59
11a9152
 
e022d73
 
e947850
 
 
 
 
 
e022d73
 
 
 
 
 
 
e947850
e022d73
c02529e
b4434ea
5ee45ef
 
 
e947850
 
46fe82a
8456d62
46fe82a
 
129d330
e022d73
 
e947850
e022d73
 
 
 
 
4e6f40c
 
e022d73
 
 
 
 
 
 
a5e9424
 
 
 
 
 
 
 
 
 
7e496f3
 
 
 
a5e9424
 
 
 
be998ff
a5e9424
 
e022d73
d37b78f
 
a5e9424
 
e947850
a5e9424
 
e022d73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
aa3f0f2
7b81024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# app.py - ElevenLabs Podcast Dubbing Project
"""
Voices Across Borders - A Multilingual Podcast Audio Dubbing Tool

This Streamlit-based app takes a Spanish podcast/audio file, dubs it in English via ElevenLabs API,
and runs a linguistic and semantic QA using LLMs (in this case, Llama-3-8-B via Groq) to evaluate
translation accuracy and tone. Ideal for showcasing real-world multilingual API deployment.
"""

import streamlit as st
from elevenlabs.client import ElevenLabs
from groq import Groq
from pydub import AudioSegment
from dotenv import load_dotenv
import os
import io

load_dotenv()

# ======CONFIGURATION========
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")

print("GROQ_API_KEY loaded?", bool(GROQ_API_KEY))
print("ELEVENLABS_API_KEY loaded?", bool(ELEVENLABS_API_KEY))

# Initialize ElevenLabs client
client_el = ElevenLabs(api_key=ELEVENLABS_API_KEY)
# Initialize Groq client
client_gr = Groq(api_key=GROQ_API_KEY)

# ======APP LAYOUT========
def transcribe_audio(audio_file):
    print("Transcribing audio...")
    # Transcribe audio using ElevenLabs API
    audio_data = AudioSegment.from_file(audio_file)
    audio_data = audio_data.set_frame_rate(44100).set_channels(1).set_sample_width(2) # Convert to 2 bytes / 16-bit .wav
    audio_buffer = io.BytesIO()
    audio_data.export(audio_buffer, format="wav")
    audio_buffer.seek(0)
    print("Audio converted successfully!")
    with print("✍ Transcribing audio... "):
        transcript = client_el.speech_to_text.convert(
            file = audio_buffer,
            model_id = "scribe_v1"
        )
    return transcript.text

def translate_transcript(transcript):
    print("Translating transcript using Llama-3-8B...")
    completion = client_gr.chat.completions.create(
        model = "llama3-8b-8192",
        messages = [
            {
                "role": "user",
                "content": f"""Translate the following Spanish text to professional English.
                Only return the translation, nothing else. Do not provide any commentary afterward.\n\n{transcript}"""
            }
        ]
    )

    response = completion.choices[0].message.content
    return response

# Helper function for generate_dub
def convert_audio_response(audio, to_wav=False):
    # Handle generator or bytes input
    if isinstance(audio, (bytes, bytearray)):
        audio_bytes = audio
    elif hasattr(audio, '__iter__'):
        audio_bytes = b''.join(audio)
    else:
        raise ValueError("Unsupported audio format.")

    audio_stream = io.BytesIO(audio_bytes)
    audio_stream.seek(0)
    return audio_stream, "audio/mp3"

def generate_dub(translated_text, to_wav=False):
    try:
        audio_data = client_el.text_to_speech.convert(
            text=translated_text,
            voice_id="nPczCjzI2devNBz1zQrb",
            model_id="eleven_multilingual_v2"
        )

        audio_stream, mime_type = convert_audio_response(audio_data, to_wav=to_wav)
        return audio_stream, mime_type

    except Exception as e:
        print(f"ElevenLabs API error: {e}")
        return None, None
        
def run_quality_check(spanish, english):
    qa_prompt = f"""
    Compare the following Spanish source with its English translation.
    - Identify any mistranslations or tone shifts.
    - Rate fluency and accuracy from 1 to 10.
    
    Spanish: {spanish}
    English: {english}

    """

    completion = client_gr.chat.completions.create(
        model = "llama3-8b-8192",
        messages = [
            {
                "role": "user",
                "content": qa_prompt
            }
        ]
    )

    response = completion.choices[0].message.content
    return response

# ======APP LOGIC========
st.title("Voices Across Borders - Multilingual Podcast Dubbing Tool")
st.write("Upload a Spanish podcast audio file for dubbing in English.")
uploaded_file = st.file_uploader("Choose a Spanish audio file", type=["mp3", "wav"])

if uploaded_file:
    transcript = transcribe_audio(uploaded_file)
    st.subheader("📜Transcription")
    st.write(transcript)

    translated = translate_transcript(transcript)
    st.subheader("Translated Text")
    st.write(translated)

    with st.spinner("💿 Generating dub..."):
        dubbed_audio, mime_type = generate_dub(translated)
        if dubbed_audio:
            st.success("Dubbing process completed successfully!")
            dubbed_audio.seek(0)
            st.audio(dubbed_audio, format=mime_type)
            st.download_button(
                label="Download dub",
                data = dubbed_audio,
                file_name = f"dubbed_audio.{mime_type.split('/')[-1]}",
                mime=mime_type
            )
        else:
            st.warning("No audio to play or download.")

    with st.spinner("Running quality check... "):
        qa_result = run_quality_check(transcript, translated)
        if qa_result:
            st.success("Quality check complete!")
    st.markdown("### Quality Check Result")
    st.write(qa_result)