Spaces:

sugakrit6
/

AITextGen

Build error

App Files Files Community

AITextGen / app.py

sugakrit6

Update app.py

f9bb750 verified 2 months ago

raw

history blame contribute delete

5.78 kB

	import gradio as gr
	import os
	import requests
	import zipfile
	import asyncio
	import edge_tts
	import soundfile as sf
	from infer_rvc_python import BaseLoader

	# Directory to store downloaded models
	MODEL_DIR = "voice_models"
	os.makedirs(MODEL_DIR, exist_ok=True)

	# Initialize the RVC Loader (CPU mode for the Hugging Face Free Tier)
	# This will automatically download required background models on its first run
	print("Initializing RVC Engine...")
	rvc_converter = BaseLoader(only_cpu=True, hubert_path=None, rmvpe_path=None)

	# --- Helper Functions ---

	def download_and_extract_model(zip_url, model_name):
	"""Downloads the zip link and extracts the RVC files."""
	if not zip_url or not model_name:
	return "Error: Please provide both a URL and a Model Name."

	model_folder = os.path.join(MODEL_DIR, model_name)
	os.makedirs(model_folder, exist_ok=True)
	zip_path = os.path.join(model_folder, "model.zip")

	try:
	response = requests.get(zip_url, stream=True)
	response.raise_for_status()
	with open(zip_path, "wb") as f:
	for chunk in response.iter_content(chunk_size=8192):
	f.write(chunk)

	with zipfile.ZipFile(zip_path, 'r') as zip_ref:
	zip_ref.extractall(model_folder)

	os.remove(zip_path)

	# Verify if .pth exists
	pth_found = any(f.endswith(".pth") for root, dirs, files in os.walk(model_folder) for f in files)

	if pth_found:
	return f"Success! Model '{model_name}' downloaded and imported."
	else:
	return "Warning: Downloaded successfully, but no .pth file was found in the zip."

	except Exception as e:
	return f"Error downloading model: {str(e)}"

	async def generate_base_tts(text, output_path):
	"""Generates the base audio using Edge-TTS (Neutral Male Voice)."""
	communicate = edge_tts.Communicate(text, "en-US-ChristopherNeural")
	await communicate.save(output_path)

	def text_to_custom_speech(text, model_name, pitch_adjustment):
	"""Generates text, then applies the RVC model to change the voice."""
	if not text:
	return None, "Error: Please enter some text."

	model_folder = os.path.join(MODEL_DIR, model_name)
	if not model_name or not os.path.exists(model_folder):
	return None, "Error: Please import a valid model first."

	try:
	base_audio_path = "temp_base.wav"
	output_audio_path = "final_output.wav"

	# 1. Find the .pth and .index files for the requested model
	pth_file = None
	index_file = None
	for root, dirs, files in os.walk(model_folder):
	for file in files:
	if file.endswith(".pth"):
	pth_file = os.path.join(root, file)
	if file.endswith(".index"):
	index_file = os.path.join(root, file)

	if not pth_file:
	return None, "Error: No .pth file found for this model."

	# 2. Generate Base TTS
	asyncio.run(generate_base_tts(text, base_audio_path))

	# 3. Apply RVC Voice Conversion
	rvc_converter.apply_conf(
	tag=model_name,
	file_model=pth_file,
	pitch_algo="rmvpe",
	pitch_lvl=pitch_adjustment,
	file_index=index_file if index_file else "",
	index_influence=0.66,
	respiration_median_filtering=3,
	envelope_ratio=0.25,
	consonant_breath_protection=0.33
	)

	result_array, sample_rate = rvc_converter.generate_from_cache(
	audio_data=base_audio_path,
	tag=model_name,
	)

	# Save the final converted audio
	sf.write(file=output_audio_path, samplerate=sample_rate, data=result_array)

	return output_audio_path, "Speech generated successfully with custom voice!"

	except Exception as e:
	return None, f"Error generating speech: {str(e)}"

	# --- Gradio User Interface ---

	with gr.Blocks(theme=gr.themes.Soft()) as app:
	gr.Markdown("# 🎙️ RVC Text-to-Speech Space")
	gr.Markdown("Import a custom voice model via a direct zip link, then generate text-to-speech using that voice.")

	with gr.Row():
	with gr.Column():
	gr.Markdown("### 1. Import Voice Model")
	model_url_input = gr.Textbox(label="Model Zip URL (e.g., HuggingFace resolve link)", placeholder="https://huggingface.co/...")
	model_name_input = gr.Textbox(label="Model Name", placeholder="e.g., needlev2")
	import_btn = gr.Button("Done (Import Model)", variant="primary")
	import_status = gr.Textbox(label="Import Status", interactive=False)

	with gr.Column():
	gr.Markdown("### 2. Text to Speech")
	text_input = gr.Textbox(label="Enter Text", lines=4, placeholder="Type what you want the voice to say here...")
	pitch_slider = gr.Slider(minimum=-24, maximum=24, step=1, value=0, label="Pitch Adjustment (Set to +12 for Female voices, 0 or -12 for Male)")
	generate_btn = gr.Button("Done (Generate Speech)", variant="primary")

	audio_output = gr.Audio(label="Generated Audio", type="filepath")
	generation_status = gr.Textbox(label="Status", interactive=False)

	# Wire up the buttons
	import_btn.click(
	fn=download_and_extract_model,
	inputs=[model_url_input, model_name_input],
	outputs=import_status
	)

	generate_btn.click(
	fn=text_to_custom_speech,
	inputs=[text_input, model_name_input, pitch_slider],
	outputs=[audio_output, generation_status]
	)

	app.launch()