umarabbas890's picture
Update app.py
a2ae3c3 verified
import os
os.environ["COQUI_TOS_AGREED"] = "1"
import torch.serialization
torch.serialization.add_safe_globals([
__import__("TTS.tts.configs.xtts_config").tts.configs.xtts_config.XttsConfig,
__import__("TTS.tts.models.xtts").tts.models.xtts.XttsAudioConfig,
__import__("TTS.tts.models.xtts").tts.models.xtts.XttsArgs,
__import__("TTS.config.shared_configs").config.shared_configs.BaseDatasetConfig
])
import gradio as gr
import torch
import torchaudio
from TTS.api import TTS
from pydub import AudioSegment
import uuid
# Load XTTS model
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
# Map emotions to file paths
emotion_to_file = {
"Neutral": "samples/neutral.wav",
"Sad": "samples/sad.wav",
"Happy": "samples/happy.wav",
"Angry": "samples/angry.wav",
"Excited": "samples/excited.wav"
}
# Voice generator
def generate_voice(text, emotion):
speaker_audio_path = emotion_to_file.get(emotion)
if not os.path.isfile(speaker_audio_path):
raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio_path}")
# Generate unique filenames to avoid overwrites
uid = uuid.uuid4().hex
wav_path = f"output_{uid}.wav"
mp3_path = f"output_{uid}.mp3"
tts.tts_to_file(
text=text,
speaker_wav=speaker_audio_path,
language="en",
file_path=wav_path
)
# Convert to MP3
sound = AudioSegment.from_wav(wav_path)
sound.export(mp3_path, format="mp3")
return mp3_path, mp3_path
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## ๐ŸŽ™๏ธ AI Voiceover Generator with Emotion Control")
with gr.Row():
script_input = gr.Textbox(label="Enter Your Script", lines=5, placeholder="Type your video script here...")
emotion_choice = gr.Dropdown(["Neutral", "Sad", "Happy", "Angry", "Excited"], label="Select Emotion", value="Neutral")
generate_button = gr.Button("๐ŸŽค Generate Voiceover")
audio_output = gr.Audio(label="Listen", type="filepath")
download_link = gr.File(label="Download MP3")
generate_button.click(fn=generate_voice, inputs=[script_input, emotion_choice], outputs=[audio_output, download_link])
demo.launch()