|
|
import os |
|
|
os.environ["COQUI_TOS_AGREED"] = "1" |
|
|
|
|
|
import torch.serialization |
|
|
torch.serialization.add_safe_globals([ |
|
|
__import__("TTS.tts.configs.xtts_config").tts.configs.xtts_config.XttsConfig, |
|
|
__import__("TTS.tts.models.xtts").tts.models.xtts.XttsAudioConfig, |
|
|
__import__("TTS.tts.models.xtts").tts.models.xtts.XttsArgs, |
|
|
__import__("TTS.config.shared_configs").config.shared_configs.BaseDatasetConfig |
|
|
]) |
|
|
|
|
|
import gradio as gr |
|
|
import torch |
|
|
import torchaudio |
|
|
from TTS.api import TTS |
|
|
from pydub import AudioSegment |
|
|
import uuid |
|
|
|
|
|
|
|
|
model_name = "tts_models/multilingual/multi-dataset/xtts_v2" |
|
|
tts = TTS(model_name=model_name, progress_bar=False, gpu=False) |
|
|
|
|
|
|
|
|
emotion_to_file = { |
|
|
"Neutral": "samples/neutral.wav", |
|
|
"Sad": "samples/sad.wav", |
|
|
"Happy": "samples/happy.wav", |
|
|
"Angry": "samples/angry.wav", |
|
|
"Excited": "samples/excited.wav" |
|
|
} |
|
|
|
|
|
|
|
|
def generate_voice(text, emotion): |
|
|
speaker_audio_path = emotion_to_file.get(emotion) |
|
|
if not os.path.isfile(speaker_audio_path): |
|
|
raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio_path}") |
|
|
|
|
|
|
|
|
uid = uuid.uuid4().hex |
|
|
wav_path = f"output_{uid}.wav" |
|
|
mp3_path = f"output_{uid}.mp3" |
|
|
|
|
|
tts.tts_to_file( |
|
|
text=text, |
|
|
speaker_wav=speaker_audio_path, |
|
|
language="en", |
|
|
file_path=wav_path |
|
|
) |
|
|
|
|
|
|
|
|
sound = AudioSegment.from_wav(wav_path) |
|
|
sound.export(mp3_path, format="mp3") |
|
|
|
|
|
return mp3_path, mp3_path |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## ๐๏ธ AI Voiceover Generator with Emotion Control") |
|
|
|
|
|
with gr.Row(): |
|
|
script_input = gr.Textbox(label="Enter Your Script", lines=5, placeholder="Type your video script here...") |
|
|
emotion_choice = gr.Dropdown(["Neutral", "Sad", "Happy", "Angry", "Excited"], label="Select Emotion", value="Neutral") |
|
|
|
|
|
generate_button = gr.Button("๐ค Generate Voiceover") |
|
|
audio_output = gr.Audio(label="Listen", type="filepath") |
|
|
download_link = gr.File(label="Download MP3") |
|
|
|
|
|
generate_button.click(fn=generate_voice, inputs=[script_input, emotion_choice], outputs=[audio_output, download_link]) |
|
|
|
|
|
demo.launch() |
|
|
|