File size: 2,259 Bytes
a323e65 a2ae3c3 2f7658e 025cead 8855e58 025cead 2f7658e a1b949b 7039992 a1b949b a2ae3c3 3957ee2 2f7658e a2ae3c3 2c4d20a a1b949b a2ae3c3 c8774e7 a1b949b a2ae3c3 2acb2e9 a2ae3c3 8b0a429 6eee829 8b0a429 2acb2e9 a2ae3c3 8b0a429 2acb2e9 a2ae3c3 3957ee2 c8774e7 3957ee2 c8774e7 a2ae3c3 07a7d2f a2ae3c3 a1b949b 3957ee2 a1b949b 3957ee2 a1b949b 3957ee2 a1b949b 3957ee2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
os.environ["COQUI_TOS_AGREED"] = "1"
import torch.serialization
torch.serialization.add_safe_globals([
__import__("TTS.tts.configs.xtts_config").tts.configs.xtts_config.XttsConfig,
__import__("TTS.tts.models.xtts").tts.models.xtts.XttsAudioConfig,
__import__("TTS.tts.models.xtts").tts.models.xtts.XttsArgs,
__import__("TTS.config.shared_configs").config.shared_configs.BaseDatasetConfig
])
import gradio as gr
import torch
import torchaudio
from TTS.api import TTS
from pydub import AudioSegment
import uuid
# Load XTTS model
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name=model_name, progress_bar=False, gpu=False)
# Map emotions to file paths
emotion_to_file = {
"Neutral": "samples/neutral.wav",
"Sad": "samples/sad.wav",
"Happy": "samples/happy.wav",
"Angry": "samples/angry.wav",
"Excited": "samples/excited.wav"
}
# Voice generator
def generate_voice(text, emotion):
speaker_audio_path = emotion_to_file.get(emotion)
if not os.path.isfile(speaker_audio_path):
raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio_path}")
# Generate unique filenames to avoid overwrites
uid = uuid.uuid4().hex
wav_path = f"output_{uid}.wav"
mp3_path = f"output_{uid}.mp3"
tts.tts_to_file(
text=text,
speaker_wav=speaker_audio_path,
language="en",
file_path=wav_path
)
# Convert to MP3
sound = AudioSegment.from_wav(wav_path)
sound.export(mp3_path, format="mp3")
return mp3_path, mp3_path
# Gradio UI
with gr.Blocks() as demo:
gr.Markdown("## 🎙️ AI Voiceover Generator with Emotion Control")
with gr.Row():
script_input = gr.Textbox(label="Enter Your Script", lines=5, placeholder="Type your video script here...")
emotion_choice = gr.Dropdown(["Neutral", "Sad", "Happy", "Angry", "Excited"], label="Select Emotion", value="Neutral")
generate_button = gr.Button("🎤 Generate Voiceover")
audio_output = gr.Audio(label="Listen", type="filepath")
download_link = gr.File(label="Download MP3")
generate_button.click(fn=generate_voice, inputs=[script_input, emotion_choice], outputs=[audio_output, download_link])
demo.launch()
|