File size: 2,259 Bytes
a323e65
a2ae3c3
2f7658e
 
 
 
025cead
8855e58
025cead
2f7658e
 
a1b949b
7039992
 
a1b949b
a2ae3c3
3957ee2
2f7658e
a2ae3c3
2c4d20a
 
a1b949b
a2ae3c3
 
 
 
 
 
 
c8774e7
a1b949b
a2ae3c3
 
 
2acb2e9
 
 
a2ae3c3
 
 
 
 
8b0a429
6eee829
8b0a429
2acb2e9
a2ae3c3
8b0a429
2acb2e9
a2ae3c3
 
3957ee2
c8774e7
3957ee2
c8774e7
a2ae3c3
07a7d2f
a2ae3c3
a1b949b
3957ee2
 
 
a1b949b
3957ee2
 
 
a1b949b
3957ee2
a1b949b
3957ee2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
os.environ["COQUI_TOS_AGREED"] = "1"

import torch.serialization
torch.serialization.add_safe_globals([
    __import__("TTS.tts.configs.xtts_config").tts.configs.xtts_config.XttsConfig,
    __import__("TTS.tts.models.xtts").tts.models.xtts.XttsAudioConfig,
    __import__("TTS.tts.models.xtts").tts.models.xtts.XttsArgs,
    __import__("TTS.config.shared_configs").config.shared_configs.BaseDatasetConfig
])

import gradio as gr
import torch
import torchaudio
from TTS.api import TTS
from pydub import AudioSegment
import uuid

# Load XTTS model
model_name = "tts_models/multilingual/multi-dataset/xtts_v2"
tts = TTS(model_name=model_name, progress_bar=False, gpu=False)

# Map emotions to file paths
emotion_to_file = {
    "Neutral": "samples/neutral.wav",
    "Sad": "samples/sad.wav",
    "Happy": "samples/happy.wav",
    "Angry": "samples/angry.wav",
    "Excited": "samples/excited.wav"
}

# Voice generator
def generate_voice(text, emotion):
    speaker_audio_path = emotion_to_file.get(emotion)
    if not os.path.isfile(speaker_audio_path):
        raise FileNotFoundError(f"Speaker audio file not found: {speaker_audio_path}")

    # Generate unique filenames to avoid overwrites
    uid = uuid.uuid4().hex
    wav_path = f"output_{uid}.wav"
    mp3_path = f"output_{uid}.mp3"

    tts.tts_to_file(
        text=text,
        speaker_wav=speaker_audio_path,
        language="en",
        file_path=wav_path
    )

    # Convert to MP3
    sound = AudioSegment.from_wav(wav_path)
    sound.export(mp3_path, format="mp3")

    return mp3_path, mp3_path

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("## 🎙️ AI Voiceover Generator with Emotion Control")

    with gr.Row():
        script_input = gr.Textbox(label="Enter Your Script", lines=5, placeholder="Type your video script here...")
        emotion_choice = gr.Dropdown(["Neutral", "Sad", "Happy", "Angry", "Excited"], label="Select Emotion", value="Neutral")

    generate_button = gr.Button("🎤 Generate Voiceover")
    audio_output = gr.Audio(label="Listen", type="filepath")
    download_link = gr.File(label="Download MP3")

    generate_button.click(fn=generate_voice, inputs=[script_input, emotion_choice], outputs=[audio_output, download_link])

demo.launch()