Upload folder using huggingface_hub

674e662 verified 7 days ago

7.45 kB

	import os
	import json
	from pathlib import Path
	import gradio as gr

	# Setup paths
	MODEL_PATH = Path("G_777.pth")
	CONFIG_PATH = Path("config.json")
	BANNER_PATH = Path("assets/banner.png")

	# Dynamic speaker loader
	speakers = ["quevedo"]
	if CONFIG_PATH.exists():
	try:
	with open(CONFIG_PATH, "r", encoding="utf-8") as f:
	config_data = json.load(f)
	if "spk" in config_data:
	speakers = list(config_data["spk"].keys())
	except Exception as e:
	print(f"Error loading speakers from config: {e}")

	# Inference function
	def convert_voice(input_audio, speaker, transpose, auto_predict_f0, f0_method, noise_scale):
	if input_audio is None:
	return None, "Please upload an audio file or use the microphone."

	input_path = Path(input_audio)
	output_path = input_path.parent / f"{input_path.stem}_quevedo.wav"

	# Lazy import to avoid startup errors if so-vits-svc-fork is not yet installed
	try:
	from so_vits_svc_fork.inference.main import infer
	except ImportError:
	return None, (
	"Error: 'so-vits-svc-fork' is not installed in this environment.\n"
	"Please run: pip install so-vits-svc-fork"
	)

	if not MODEL_PATH.exists():
	return None, f"Error: Model file {MODEL_PATH} not found."
	if not CONFIG_PATH.exists():
	return None, f"Error: Config file {CONFIG_PATH} not found."

	try:
	# Perform inference using the fork's main infer function
	infer(
	input_path=input_path,
	output_path=output_path,
	model_path=MODEL_PATH,
	config_path=CONFIG_PATH,
	recursive=False,
	speaker=speaker,
	transpose=int(transpose),
	auto_predict_f0=bool(auto_predict_f0),
	noise_scale=float(noise_scale),
	f0_method=f0_method
	)

	if output_path.exists():
	return str(output_path), "Conversion completed successfully!"
	else:
	return None, "Error: Output file was not generated."

	except Exception as e:
	return None, f"Error during inference: {str(e)}"

	# Custom CSS for premium styling matching the blue-purple theme
	custom_css = """
	body {
	background-color: #0b0c10;
	}
	.gradio-container {
	background-color: #0b0c10 !important;
	font-family: 'Outfit', 'Inter', sans-serif !important;
	max-width: 900px !important;
	margin: 0 auto !important;
	border-radius: 12px;
	}
	.header-area {
	text-align: center;
	padding: 20px 0;
	}
	.header-title {
	color: #4f46e5;
	background: linear-gradient(90deg, #818cf8 0%, #c084fc 100%);
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	font-weight: 800 !important;
	font-size: 2.5rem !important;
	margin-bottom: 0.5rem;
	}
	.header-desc {
	color: #9ca3af;
	font-size: 1.1rem;
	margin-bottom: 20px;
	}
	.main-box {
	background: rgba(17, 24, 39, 0.7);
	border: 1px solid rgba(255, 255, 255, 0.1);
	backdrop-filter: blur(10px);
	border-radius: 16px;
	padding: 20px;
	margin-bottom: 20px;
	}
	.convert-btn {
	background: linear-gradient(135deg, #6366f1 0%, #a855f7 100%) !important;
	border: none !important;
	color: white !important;
	font-weight: bold !important;
	transition: all 0.3s ease !important;
	}
	.convert-btn:hover {
	transform: translateY(-2px);
	box-shadow: 0 4px 20px rgba(139, 92, 246, 0.4);
	}
	"""

	# Build Gradio UI
	with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
	# Banner/Header
	with gr.Row():
	# Fallback to CDN URL if local banner is missing or is just a small Git LFS pointer file
	if BANNER_PATH.exists() and BANNER_PATH.stat().st_size > 5000:
	gr.Image(str(BANNER_PATH), show_label=False, container=False, interactive=False)
	else:
	gr.Image("https://huggingface.co/lagosproject/quevedo/resolve/main/assets/banner.png", show_label=False, container=False, interactive=False)

	with gr.Row(elem_classes=["header-area"]):
	gr.HTML(
	"<h1 class='header-title'>🗣️ Quevedo Voice Model (so-vits-svc-fork)</h1>"
	"<p class='header-desc'>Convert any voice or singing file into the voice of the Spanish singer Quevedo.</p>"
	)

	# Main conversion section
	with gr.Row(elem_classes=["main-box"]):
	with gr.Column(scale=1):
	gr.Markdown("### 📥 1. Audio Input")
	input_audio = gr.Audio(
	label="Audio to Convert (Clean Vocals / Acapella)",
	type="filepath",
	sources=["upload", "microphone"]
	)

	gr.Markdown("### ⚙️ 2. Conversion Parameters")
	speaker = gr.Dropdown(
	choices=speakers,
	value=speakers[0],
	label="Speaker Name"
	)

	transpose = gr.Slider(
	minimum=-12,
	maximum=12,
	value=0,
	step=1,
	label="Pitch Shift (Semitones)",
	info="Increase for female-to-male voices (e.g. -5 to -12), or decrease for male-to-female."
	)

	with gr.Accordion("Advanced Options", open=False):
	auto_predict_f0 = gr.Checkbox(
	value=False,
	label="Auto Predict F0",
	info="Recommended for speech/narration. UNCHECK for singing to preserve notes."
	)

	f0_method = gr.Dropdown(
	choices=["crepe", "crepe-tiny", "parselmouth", "dio", "harvest"],
	value="crepe",
	label="F0 Predictor Algorithm",
	info="crepe offers the best quality but is slower; dio is the fastest."
	)

	noise_scale = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.4,
	step=0.05,
	label="Noise Scale",
	info="Controls pitch variance and expressiveness (0.4 is standard)."
	)

	with gr.Column(scale=1):
	gr.Markdown("### 📤 3. Output Audio")
	output_audio = gr.Audio(
	label="Converted Audio",
	type="filepath"
	)

	status_output = gr.Textbox(
	label="Status",
	value="Ready",
	interactive=False
	)

	submit_btn = gr.Button(
	"Convert Voice 🚀",
	variant="primary",
	elem_classes=["convert-btn"]
	)

	submit_btn.click(
	fn=convert_voice,
	inputs=[input_audio, speaker, transpose, auto_predict_f0, f0_method, noise_scale],
	outputs=[output_audio, status_output]
	)

	# Footer
	gr.HTML(
	"<div style='text-align: center; color: #4b5563; font-size: 0.85rem; padding: 20px 0;'>"
	"This model is for artistic demonstration and research purposes only. "
	"Uses so-vits-svc-fork for inference.<br>"
	"Developed with 💜 for the open voice community.</div>"
	)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)