Spaces:

midhyaraj
/

vc

Runtime error

App Files Files Community

vc / app.py

midhyaraj

Update app.py

8eab11b verified about 1 year ago

raw

history blame contribute delete

5.32 kB

	import os
	import subprocess
	import sys

	# Function to setup the environment
	def setup_environment():
	# Clone the Tortoise-TTS repository if it doesn't exist
	if not os.path.exists("tortoise-tts"):
	subprocess.run(["git", "clone", "https://github.com/neonbjb/tortoise-tts.git"], check=True)

	# Change directory to the cloned repository
	os.chdir("tortoise-tts")

	# Install requirements from requirements.txt
	subprocess.run([sys.executable, "-m", "pip", "install", "-r", "requirements.txt"], check=True)

	# Install the package using setup.py
	subprocess.run([sys.executable, "setup.py", "install"], check=True)

	# Install Gradio
	subprocess.run([sys.executable, "-m", "pip", "install", "gradio"], check=True)

	def main():
	# Call the setup function to ensure everything is installed
	setup_environment()

	# Import Gradio and other required libraries after setting up the environment
	import gradio as gr
	import torchaudio
	import time
	from datetime import datetime

	# Ensure the tortoise package is correctly imported
	try:
	from tortoise.api import TextToSpeech
	except ImportError as e:
	raise ImportError("Tortoise TTS not found. Make sure it is correctly installed.") from e

	# Initialize the TextToSpeech instance
	tts = TextToSpeech()

	VOICE_OPTIONS = [
	"random", # special option for random voice
	"custom_voice", # special option for custom voice
	"disabled", # special option for disabled voice
	]

	def inference(text, emotion, prompt, voice, mic_audio, voice_b, voice_c, preset, seed):
	if voice != "custom_voice":
	voices = [voice]
	else:
	voices = []

	if voice_b != "disabled":
	voices.append(voice_b)
	if voice_c != "disabled":
	voices.append(voice_c)

	if emotion != "None/Custom":
	text = f"[I am really {emotion.lower()},] {text}"
	elif prompt.strip() != "":
	text = f"[{prompt},] {text}"

	c = None
	if voice == "custom_voice":
	if mic_audio is None:
	raise gr.Error("Please provide audio from mic when choosing custom voice")
	c = torchaudio.load(mic_audio)[0] # Use torchaudio to load audio

	if len(voices) == 1 or len(voices) == 0:
	if voice == "custom_voice":
	voice_samples, conditioning_latents = [c], None
	else:
	voice_samples, conditioning_latents = tts.load_voice(voice) # Ensure to call TTS method
	else:
	voice_samples, conditioning_latents = tts.load_voices(voices)
	if voice == "custom_voice":
	voice_samples.append(c)

	sample_voice = voice_samples[0] if len(voice_samples) else None

	start_time = time.time()
	gen, _ = tts.tts_with_preset(
	text,
	voice_samples=voice_samples,
	conditioning_latents=conditioning_latents,
	preset=preset,
	use_deterministic_seed=seed,
	return_deterministic_state=True,
	k=3,
	)

	return (
	(22050, sample_voice.squeeze().cpu().numpy()),
	(24000, gen[0].squeeze().cpu().numpy()),
	(24000, gen[1].squeeze().cpu().numpy()),
	(24000, gen[2].squeeze().cpu().numpy()),
	)

	# Create the Gradio interface
	interface = gr.Interface(
	fn=inference,
	inputs=[
	gr.Textbox(lines=4, label="Text:"),
	gr.Radio(["None/Custom", "Happy", "Sad", "Angry", "Disgusted", "Arrogant"],
	value="None/Custom", label="Select emotion:"),
	gr.Textbox(lines=1, label="Enter prompt if [Custom] emotion:"),
	gr.Radio(["ultra_fast", "fast", "standard", "high_quality"],
	value="fast", label="Preset mode:"),
	gr.Dropdown(
	options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
	value="angie", # Default voice
	label="Select voice:"
	),
	gr.Audio(label="Record voice (when selected custom_voice):", type="filepath"),
	gr.Dropdown(
	options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
	value="disabled",
	label="(Optional) Select second voice:"
	),
	gr.Dropdown(
	options=os.listdir(os.path.join("tortoise", "voices")) + VOICE_OPTIONS,
	value="disabled",
	label="(Optional) Select third voice:"
	),
	gr.Number(value=0, precision=0, label="Seed (for reproducibility):"),
	],
	outputs=[
	gr.Audio(label="Sample of selected voice (first):"),
	gr.Audio(label="Output [Candidate 1]:"),
	gr.Audio(label="Output [Candidate 2]:"),
	gr.Audio(label="Output [Candidate 3]:"),
	],
	title="RJ VOICE CLONING",
	description="<h1 style='text-align: center; color: orange; font-weight: bold;'>RJ VOICE CLONING</h1>",
	css=".gradio-container { background-color: black; color: orange; }"
	)

	# Launch the interface
	interface.launch(share=True)

	if __name__ == "__main__":
	main()