File size: 5,321 Bytes
321e586 ba4c6a9 e16556e 321e586 e16556e ba4c6a9 e16556e 321e586 ba4c6a9 321e586 ba4c6a9 e16556e ba4c6a9 8eab11b ba4c6a9 e16556e 321e586 c3d1a4f 0343a55 c3d1a4f 0343a55 c3d1a4f 0343a55 c3d1a4f 0343a55 c3d1a4f 0343a55 c3d1a4f 8eab11b 321e586 ba4c6a9 8eab11b ba4c6a9 8eab11b ba4c6a9 321e586 ba4c6a9 321e586 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
import subprocess
import sys
# Function to setup the environment
def setup_environment():
# Clone the Tortoise-TTS repository if it doesn't exist
if not os.path.exists("tortoise-tts"):
subprocess.run(["git", "clone", "https://github.com/neonbjb/tortoise-tts.git"], check=True)
# Change directory to the cloned repository
os.chdir("tortoise-tts")
# Install requirements from requirements.txt
subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)
# Install the package using setup.py
subprocess.run([sys.executable, "setup.py", "install"], check=True)
# Install Gradio
subprocess.run([sys.executable, "-m", "pip", "install", "gradio"], check=True)
def main():
# Call the setup function to ensure everything is installed
setup_environment()
# Import Gradio and other required libraries after setting up the environment
import gradio as gr
import torchaudio
import time
from datetime import datetime
# Ensure the tortoise package is correctly imported
try:
from tortoise.api import TextToSpeech
except ImportError as e:
raise ImportError("Tortoise TTS not found. Make sure it is correctly installed.") from e
# Initialize the TextToSpeech instance
tts = TextToSpeech()
VOICE_OPTIONS = [
"random", # special option for random voice
"custom_voice", # special option for custom voice
"disabled", # special option for disabled voice
]
def inference(text, emotion, prompt, voice, mic_audio, voice_b, voice_c, preset, seed):
if voice != "custom_voice":
voices = [voice]
else:
voices = []
if voice_b != "disabled":
voices.append(voice_b)
if voice_c != "disabled":
voices.append(voice_c)
if emotion != "None/Custom":
text = f"[I am really {emotion.lower()},] {text}"
elif prompt.strip() != "":
text = f"[{prompt},] {text}"
c = None
if voice == "custom_voice":
if mic_audio is None:
raise gr.Error("Please provide audio from mic when choosing custom voice")
c = torchaudio.load(mic_audio)[0] # Use torchaudio to load audio
if len(voices) == 1 or len(voices) == 0:
if voice == "custom_voice":
voice_samples, conditioning_latents = [c], None
else:
voice_samples, conditioning_latents = tts.load_voice(voice) # Ensure to call TTS method
else:
voice_samples, conditioning_latents = tts.load_voices(voices)
if voice == "custom_voice":
voice_samples.append(c)
sample_voice = voice_samples[0] if len(voice_samples) else None
start_time = time.time()
gen, _ = tts.tts_with_preset(
text,
voice_samples=voice_samples,
conditioning_latents=conditioning_latents,
preset=preset,
use_deterministic_seed=seed,
return_deterministic_state=True,
k=3,
)
return (
(22050, sample_voice.squeeze().cpu().numpy()),
(24000, gen[0].squeeze().cpu().numpy()),
(24000, gen[1].squeeze().cpu().numpy()),
(24000, gen[2].squeeze().cpu().numpy()),
)
# Create the Gradio interface
interface = gr.Interface(
fn=inference,
inputs=[
gr.Textbox(lines=4, label="Text:"),
gr.Radio(["None/Custom", "Happy", "Sad", "Angry", "Disgusted", "Arrogant"],
value="None/Custom", label="Select emotion:"),
gr.Textbox(lines=1, label="Enter prompt if [Custom] emotion:"),
gr.Radio(["ultra_fast", "fast", "standard", "high_quality"],
value="fast", label="Preset mode:"),
gr.Dropdown(
options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
value="angie", # Default voice
label="Select voice:"
),
gr.Audio(label="Record voice (when selected custom_voice):", type="filepath"),
gr.Dropdown(
options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
value="disabled",
label="(Optional) Select second voice:"
),
gr.Dropdown(
options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
value="disabled",
label="(Optional) Select third voice:"
),
gr.Number(value=0, precision=0, label="Seed (for reproducibility):"),
],
outputs=[
gr.Audio(label="Sample of selected voice (first):"),
gr.Audio(label="Output [Candidate 1]:"),
gr.Audio(label="Output [Candidate 2]:"),
gr.Audio(label="Output [Candidate 3]:"),
],
title="RJ VOICE CLONING",
description="<h1 style='text-align: center; color: orange; font-weight: bold;'>RJ VOICE CLONING</h1>",
css=".gradio-container { background-color: black; color: orange; }"
)
# Launch the interface
interface.launch(share=True)
if __name__ == "__main__":
main()
|