RVC-CH

Running

App Files Files Community

RVC-CH / app.py

LosCaquitos

Update app.py

e7d1d20 verified 3 days ago

raw

history blame

16.4 kB

	"""
	RVC Voice Conversion – HuggingFace Space

	Simple, fast, GPU/CPU auto-detected.
	Now with video upload and 5-output generation (acapella, instrumental, RVC on original, RVC on acapella).
	Original job queue and all tabs fully preserved.
	"""
	from __future__ import annotations

	import os
	import re

	import gradio as gr

	from lib.config import (
	BUILTIN_MODELS,
	CSS,
	DEVICE_LABEL,
	MAX_INPUT_DURATION,
	logger,
	)
	from lib.jobs import (
	get_jobs_table,
	get_queue_info,
	poll_job,
	submit_job,
	submit_full_pipeline,
	)
	from lib.models import list_models, startup_downloads
	from lib.ui import (
	refresh_models,
	toggle_autotune,
	upload_model,
	create_video_section,
	create_five_outputs,
	)

	# ── Startup ───────────────────────────────────────────────────────────────────
	startup_status = ""
	default_model = ""
	try:
	default_model = startup_downloads()
	startup_status = f"✅ Ready  ·  {DEVICE_LABEL}"
	except Exception as e:
	startup_status = f"⚠️ Some assets unavailable: {e}  ·  {DEVICE_LABEL}"
	logger.warning("Startup download issue: %s", e)

	initial_models = list_models()
	initial_value = default_model if default_model in initial_models else (
	initial_models[0] if initial_models else None
	)

	# ── Gradio UI ─────────────────────────────────────────────────────────────────
	with gr.Blocks(title="RVC Voice Conversion - Full Suite", delete_cache=(3600, 3600)) as demo:

	gr.HTML(f"""
	<div id="header">
	<h1>🎙️ RVC Voice Conversion - Full Suite</h1>
	<p>Conversão de voz com suporte a vídeos, extração de acapella/instrumental e 5 saídas!</p>
	</div>
	<p id="status">{startup_status}</p>
	""")

	with gr.Tabs():

	# ── TAB 1: Convert ────────────────────────────────────────────────────
	with gr.Tab("🎤 Convert"):
	gr.Markdown("## Opção 1: Conversão RVC clássica (um arquivo de saída)")
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 🔊 Input Audio")
	with gr.Tabs():
	with gr.Tab("🎙️ Microphone"):
	inp_mic = gr.Audio(
	sources=["microphone"],
	type="filepath",
	label="Record",
	)
	with gr.Tab("📁 Upload File"):
	inp_file = gr.Audio(
	sources=["upload"],
	type="filepath",
	label="Upload audio (wav / mp3 / flac / ogg …)",
	)

	gr.Markdown("### 🤖 Model")
	model_dd = gr.Dropdown(
	choices=initial_models,
	value=initial_value,
	label="Active Voice Model",
	interactive=True,
	)

	gr.Markdown("### 🎚️ Basic Settings")
	pitch_sl = gr.Slider(
	minimum=-24, maximum=24, value=0, step=1,
	label="Pitch Shift (semitones)",
	info="0 = unchanged · positive = higher · negative = lower",
	)
	f0_radio = gr.Radio(
	choices=["rmvpe", "fcpe", "crepe", "crepe-tiny"],
	value="rmvpe",
	label="Pitch Extraction Method",
	info="rmvpe = fastest & accurate · crepe = highest quality (slower)",
	)

	with gr.Column(scale=1):
	gr.Markdown("### ⚙️ Advanced Settings")
	with gr.Accordion("Expand advanced options", open=False):
	index_rate_sl = gr.Slider(
	0.0, 1.0, value=0.75, step=0.05,
	label="Index Rate",
	info="How strongly the FAISS index influences timbre (0 = off)",
	)
	protect_sl = gr.Slider(
	0.0, 0.5, value=0.5, step=0.01,
	label="Protect Consonants",
	info="Protects unvoiced consonants — 0.5 = max protection",
	)
	filter_radius_sl = gr.Slider(
	0, 7, value=3, step=1,
	label="Respiration Filter Radius",
	info="Median filter on pitch — higher = smoother, reduces breath noise",
	)
	vol_env_sl = gr.Slider(
	0.0, 1.0, value=0.25, step=0.05,
	label="Volume Envelope Mix",
	info="0.25 = natural blend · 1 = preserve input loudness · 0 = model output",
	)
	with gr.Row():
	clean_cb = gr.Checkbox(value=False, label="Noise Reduction")
	clean_sl = gr.Slider(
	0.0, 1.0, value=0.5, step=0.05,
	label="Reduction Strength",
	)
	with gr.Row():
	split_cb = gr.Checkbox(value=False, label="Split Long Audio")
	autotune_cb = gr.Checkbox(value=False, label="Autotune")
	autotune_sl = gr.Slider(
	0.0, 1.0, value=1.0, step=0.05,
	label="Autotune Strength",
	visible=False,
	)
	autotune_cb.change(
	fn=toggle_autotune,
	inputs=autotune_cb,
	outputs=autotune_sl,
	)

	gr.Markdown("🎛️ Reverb")
	reverb_cb = gr.Checkbox(value=False, label="Enable Reverb")
	with gr.Group(visible=False) as reverb_group:
	reverb_room_sl = gr.Slider(
	0.0, 1.0, value=0.15, step=0.05,
	label="Room Size",
	info="Larger = bigger sounding space",
	)
	reverb_damp_sl = gr.Slider(
	0.0, 1.0, value=0.7, step=0.05,
	label="Damping",
	info="Higher = more absorption, less echo tail",
	)
	reverb_wet_sl = gr.Slider(
	0.0, 1.0, value=0.15, step=0.05,
	label="Wet Level",
	info="How much reverb is mixed in (0.15 = subtle)",
	)
	reverb_cb.change(
	fn=lambda v: gr.update(visible=v),
	inputs=reverb_cb,
	outputs=reverb_group,
	)

	fmt_radio = gr.Radio(
	choices=["WAV", "MP3", "FLAC", "OPUS"],
	value="WAV",
	label="Output Format",
	info="WAV = lossless, large file; MP3/OPUS = smaller",
	)

	with gr.Row():
	classic_btn = gr.Button("🚀 Convert Voice (Classic)", variant="primary")
	classic_status = gr.Markdown(value="")
	classic_audio = gr.Audio(label="Converted Audio", type="filepath", interactive=False)

	gr.Markdown("---")
	gr.Markdown("## Opção 2: Pipeline completo – 5 saídas (vídeo + separação de faixas + RVC)")

	with gr.Row():
	with gr.Column(scale=1):
	video_input = create_video_section()
	gr.Markdown("(Ou use áudio/microfone acima – o pipeline aceita qualquer fonte)")
	with gr.Column(scale=1):
	full_btn = gr.Button("🚀 Full Pipeline (5 Outputs)", variant="secondary")
	full_status = gr.Markdown(value="")

	# 5 componentes de saída
	(entrada_acapella, entrada_audio, entrada_instrumental,
	saida_audio, saida_acapella) = create_five_outputs()

	# Seção de verificação de jobs legada
	gr.Markdown("---")
	gr.Markdown("### 🔍 Verificar status de um job (clássico)")
	with gr.Row():
	job_id_box = gr.Textbox(
	label="Job ID",
	placeholder="e.g. a3f2b1c9",
	scale=3,
	)
	poll_btn = gr.Button("🔄 Check", scale=1)
	poll_status = gr.Markdown(value="")
	poll_audio = gr.Audio(label="Result", type="filepath", interactive=False)

	# ── TAB 2: Models (original) ──────────────────────────────────────────
	with gr.Tab("📦 Models"):
	gr.Markdown("""
	### Upload a Custom RVC Model
	Provide a `.zip` containing:
	- `model.pth` — weights (required)
	- `model.index` — FAISS index (optional, improves voice matching)

	Built-in models (pre-downloaded on startup):
	Vestia Zeta v1 · Vestia Zeta v2 · Ayunda Risu · Gawr Gura
	""")
	with gr.Row():
	with gr.Column(scale=1):
	up_zip = gr.File(label="Model ZIP", file_types=[".zip"])
	up_name = gr.Textbox(
	label="Model Name",
	placeholder="Leave blank to use zip filename",
	)
	up_btn = gr.Button("📤 Load Model", variant="primary")
	up_status = gr.Textbox(label="Status", interactive=False, lines=2)
	with gr.Column(scale=1):
	gr.Markdown("### Loaded Models")
	models_table = gr.Dataframe(
	col_count=(1, "fixed"),
	value=[[m] for m in initial_models],
	interactive=False,
	label="",
	)
	refresh_btn = gr.Button("🔄 Refresh")

	up_btn.click(
	fn=upload_model,
	inputs=[up_zip, up_name],
	outputs=[up_status, model_dd, models_table],
	)
	refresh_btn.click(
	fn=refresh_models,
	outputs=[models_table, model_dd],
	)

	# ── TAB 3: Jobs (original, intocada) ──────────────────────────────────
	with gr.Tab("📋 Jobs"):
	gr.Markdown("All submitted jobs, newest first. Click Refresh to update.")
	queue_status = gr.Markdown(value=get_queue_info, every=10)
	jobs_table = gr.Dataframe(
	headers=["Job ID", "Model", "Status", "Time", "Download"],
	col_count=(5, "fixed"),
	value=get_jobs_table,
	interactive=False,
	wrap=True,
	datatype=["str", "str", "str", "str", "markdown"],
	every=10,
	)
	refresh_jobs_btn = gr.Button("🔄 Refresh")

	def _refresh_jobs():
	return get_queue_info(), get_jobs_table()

	refresh_jobs_btn.click(fn=_refresh_jobs, outputs=[queue_status, jobs_table])

	# ── TAB 4: Help (atualizada) ──────────────────────────────────────────
	with gr.Tab("ℹ️ Help"):
	gr.Markdown(f"""
	## How it works
	RVC (Retrieval-Based Voice Conversion) transforms a voice recording to sound
	like a target speaker using only that speaker's model file.

	---

	## Two conversion modes

	### 1. Classic Mode (single output)
	- Upload an audio file or record your voice
	- Choose a model and settings
	- Click Convert Voice (Classic)
	- Result is added to the job queue – you can monitor it in the Jobs tab

	### 2. Full Pipeline Mode (5 outputs)
	- Works with video (MP4) or any audio source
	- Automatically extracts acapella (vocals) and instrumental using Demucs
	- Runs RVC conversion on both the original audio and the acapella
	- Returns 5 files:
	1. `entrada_acapella.mp3` – extracted vocals
	2. `entrada.mp3` – original audio
	3. `entrada_instrumental.mp3` – background music
	4. `saida.mp3` – RVC applied to original audio
	5. `saida_acapella.mp3` – RVC applied only to vocals
	- All 5 files appear directly on the interface – no need to poll jobs

	---

	Device: `{DEVICE_LABEL}`
	Max input duration: {MAX_INPUT_DURATION // 60} minutes

	---

	## Credits
	Engine: [Ultimate RVC](https://github.com/JackismyShephard/ultimate-rvc)
	Separation: [Demucs](https://github.com/facebookresearch/demucs)
	""")

	# ── Event handlers ────────────────────────────────────────────────────────
	def _submit_classic(*args):
	status, audio = submit_job(*args)
	match = re.search(r"[a-f0-9]{8}", status or "")
	job_id = match.group(0) if match else ""
	return status, audio, job_id, get_queue_info(), get_jobs_table()

	classic_btn.click(
	fn=_submit_classic,
	inputs=[
	inp_mic, inp_file, model_dd,
	pitch_sl, f0_radio,
	index_rate_sl, protect_sl, vol_env_sl,
	clean_cb, clean_sl,
	split_cb, autotune_cb, autotune_sl,
	filter_radius_sl,
	fmt_radio,
	reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl,
	],
	outputs=[classic_status, classic_audio, job_id_box, queue_status, jobs_table],
	)

	full_btn.click(
	fn=submit_full_pipeline,
	inputs=[
	video_input, inp_mic, inp_file, model_dd,
	pitch_sl, f0_radio,
	index_rate_sl, protect_sl, vol_env_sl,
	clean_cb, clean_sl,
	split_cb, autotune_cb, autotune_sl,
	filter_radius_sl,
	fmt_radio,
	reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl,
	],
	outputs=[
	full_status,
	entrada_acapella, entrada_audio, entrada_instrumental,
	saida_audio, saida_acapella,
	],
	)

	def _poll_and_refresh(job_id):
	status, file = poll_job(job_id)
	return status, file, get_queue_info(), get_jobs_table()

	poll_btn.click(
	fn=_poll_and_refresh,
	inputs=[job_id_box],
	outputs=[poll_status, poll_audio, queue_status, jobs_table],
	)

	# ── Launch ────────────────────────────────────────────────────────────────────
	if __name__ == "__main__":
	demo.queue(default_concurrency_limit=5)
	demo.launch(
	server_name="0.0.0.0",
	server_port=int(os.getenv("PORT", 7860)),
	max_threads=10,
	ssr_mode=False,
	css=CSS,
	)