LosCaquitos commited on
Commit
9944e2c
Β·
1 Parent(s): 7ddf2bf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +395 -223
app.py CHANGED
@@ -1,241 +1,413 @@
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
- import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  import os
4
- from pathlib import Path
5
- import logging
6
- import uuid
7
- import time
8
- import threading
9
- import zipfile
10
- from typing import Optional, Dict, Any, List
11
-
12
- logging.basicConfig(level=logging.INFO)
13
- logger = logging.getLogger(__name__)
14
-
15
- initial_models = ["modelo_default.pth"]
16
- initial_value = initial_models[0] if initial_models else None
17
- jobs = {}
18
- queue_info = "Nenhum job na fila"
19
-
20
- def get_queue_info():
21
- global queue_info
22
- return queue_info
23
-
24
- def jobs_table_fn():
25
- global jobs
26
- data = []
27
- for job_id, info in jobs.items():
28
- status = info.get("status", "Desconhecido")
29
- time_str = info.get("time", "N/A")
30
- files_html = ""
31
- files = info.get("files", [])
32
- if files:
33
- files_html = "<br>".join([f'<a href="{f}" download>πŸ“ {Path(f).name}</a>' for f in files])
34
- data.append([job_id, status, time_str, files_html])
35
- return data
36
-
37
- def extract_audio_from_video(video_path: str) -> str:
38
- audio_path = video_path.rsplit('.', 1)[0] + '.wav'
39
- cmd = ["ffmpeg", "-y", "-i", video_path, "-vn", "-acodec", "pcm_s16le", "-ar", "44100", "-ac", "1", audio_path]
40
- subprocess.run(cmd, check=True, capture_output=True)
41
- return audio_path
42
-
43
- def upload_model(zip_file, model_name):
44
- if not zip_file:
45
- return "Nenhum arquivo selecionado", initial_models, [[m] for m in initial_models]
46
-
47
- try:
48
- with zipfile.ZipFile(zip_file.name, 'r') as zf:
49
- zf.extractall("models/")
50
-
51
- global initial_models
52
- initial_models = [f for f in os.listdir("models/") if f.endswith('.pth')]
53
- model_display = model_name or Path(zip_file.name).stem
54
- return f"βœ… Modelo '{model_display}' carregado!", initial_models, [[m] for m in initial_models]
55
- except Exception as e:
56
- return f"❌ Erro: {e}", initial_models, [[m] for m in initial_models]
57
-
58
- def refresh_models():
59
- global initial_models
60
- try:
61
- initial_models = [f for f in os.listdir("models/") if f.endswith('.pth')]
62
- except:
63
- initial_models = []
64
- return [[m] for m in initial_models], initial_models[0] if initial_models else None
65
-
66
- def process_audio(input_audio: str, model_pth: Path, work_dir: Path):
67
- stem = Path(input_audio).stem
68
-
69
- try:
70
- subprocess.run(["demucs", "--two-stems=vocals", input_audio, "-o", str(work_dir)], check=True)
71
- except subprocess.CalledProcessError as e:
72
- raise RuntimeError(f"Demucs falhou: {e}")
73
-
74
- candidates = [work_dir / "separated", work_dir / "demucs", work_dir / "demucs_out"]
75
- vocal_path = None
76
- inst_path = None
77
-
78
- for cand in candidates:
79
- if not cand.exists():
80
- continue
81
- for p in cand.rglob(f"*{stem}*"):
82
- name = p.name.lower()
83
- if any(x in name for x in ["vocals", "vocal", "acapella"]):
84
- vocal_path = p
85
- if any(x in name for x in ["no_vocals", "instrumental", "accompaniment"]):
86
- inst_path = p
87
-
88
- if not vocal_path:
89
- for p in work_dir.rglob("*"):
90
- if p.is_file() and "vocals" in p.name.lower():
91
- vocal_path = p
92
- break
93
-
94
- if not vocal_path:
95
- raise RuntimeError("NΓ£o foi possΓ­vel localizar o arquivo vocal")
96
-
97
- tmp_vocal = work_dir / f"{stem}_vocal_extracted.wav"
98
- cmd = ["ffmpeg", "-y", "-i", str(vocal_path), "-ar", "44100", "-ac", "1", str(tmp_vocal)]
99
- subprocess.run(cmd, check=True)
100
-
101
- rvc_out = work_dir / f"{stem}_rvc_converted.wav"
102
-
103
- try:
104
- from rvc import RVCInference
105
- infer = RVCInference(device="cuda:0")
106
- infer.set_model(str(model_pth))
107
- infer.infer(str(tmp_vocal), str(rvc_out))
108
- except Exception as e:
109
- raise RuntimeError(f"Falha RVC: {e}")
110
-
111
- return {
112
- "vocal_extracted": str(tmp_vocal),
113
- "instrumental": str(inst_path) if inst_path else "",
114
- "rvc_output": str(rvc_out),
115
- }
116
-
117
- def submit_job(mic, file_input, model_name, pitch, f0_method, index_rate, protect, vol_env, clean, clean_strength, split_audio, autotune, autotune_strength, filter_radius, fmt, reverb, reverb_room, reverb_damp, reverb_wet):
118
- global jobs, queue_info
119
-
120
- job_id = str(uuid.uuid4())[:8]
121
- start_time = time.strftime("%H:%M:%S")
122
-
123
- jobs[job_id] = {"status": "πŸ”„ Processando...", "time": start_time, "files": []}
124
- queue_info = f"{len(jobs)} jobs na fila"
125
-
126
- def worker():
127
- try:
128
- work_dir = Path("jobs") / job_id
129
- work_dir.mkdir(parents=True, exist_ok=True)
130
-
131
- audio_input = None
132
- if mic and os.path.isfile(mic):
133
- audio_input = mic
134
- elif file_input and os.path.isfile(file_input):
135
- audio_input = file_input
136
-
137
- if not audio_input:
138
- jobs[job_id]["status"] = "❌ Nenhum Ñudio vÑlido"
139
- return
140
-
141
- model_pth = Path("models") / model_name
142
- if not model_pth.exists():
143
- jobs[job_id]["status"] = f"❌ Modelo {model_name} não encontrado"
144
- return
145
-
146
- jobs[job_id]["status"] = "🎡 Separando vocais..."
147
- result = process_audio(audio_input, model_pth, work_dir)
148
-
149
- jobs[job_id]["status"] = "βœ… Finalizado"
150
- jobs[job_id]["files"] = [f for f in [result["rvc_output"], result["vocal_extracted"]] if f]
151
-
152
- except Exception as e:
153
- jobs[job_id]["status"] = f"❌ Erro: {str(e)[:80]}"
154
- logger.error(f"Job {job_id} falhou: {e}")
155
-
156
- threading.Thread(target=worker, daemon=True).start()
157
- return f"βœ… Job {job_id} submetido! Veja em 'Jobs'.", job_id
158
-
159
- def _submit_and_extract_id(mic, file_input, video, model_name, pitch, f0_method, index_rate, protect, vol_env, clean, clean_strength, split_audio, autotune, autotune_strength, filter_radius, fmt, reverb, reverb_room, reverb_damp, reverb_wet):
160
- audio_input = None
161
- if mic and os.path.isfile(mic):
162
- audio_input = mic
163
- elif file_input and os.path.isfile(file_input):
164
- audio_input = file_input
165
- elif video and hasattr(video, "name") and os.path.isfile(video.name):
166
- try:
167
- audio_input = extract_audio_from_video(video.name)
168
- except Exception as err:
169
- return f"⚠️ Erro vídeo: {err}", ""
170
- else:
171
- return "⚠️ Envie Ñudio ou vídeo vÑlido.", ""
172
 
173
- result = submit_job(
174
- mic, file_input, model_name, pitch, f0_method, index_rate, protect,
175
- vol_env, clean, clean_strength, split_audio, autotune, autotune_strength,
176
- filter_radius, fmt, reverb, reverb_room, reverb_damp, reverb_wet
177
- )
178
- return result
 
 
 
179
 
180
- with gr.Blocks(title="🎀 Ultimate RVC WebUI") as demo:
181
- gr.Markdown("# 🎀 Ultimate RVC - Voice Conversion")
 
 
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  with gr.Tabs():
 
 
184
  with gr.Tab("🎀 Convert"):
185
  with gr.Row():
 
186
  with gr.Column(scale=1):
 
187
  with gr.Tabs():
188
- with gr.Tab("πŸŽ™οΈ Microfone"):
189
- inp_mic = gr.Audio(sources=["microphone"], type="filepath")
190
- with gr.Tab("πŸ“ Áudio"):
191
- inp_file = gr.Audio(sources=["upload"], type="filepath")
192
- with gr.Tab("🎬 Vídeo"):
193
- inp_video = gr.File(file_types=[".mp4", ".mov", ".mkv"])
194
- model_dd = gr.Dropdown(choices=initial_models, value=initial_value, label="Modelo")
195
- pitch_sl = gr.Slider(-24, 24, value=0, step=1, label="Pitch")
196
- convert_btn = gr.Button("πŸš€ Converter", variant="primary")
197
- out_status = gr.Markdown()
198
- job_id_box = gr.Textbox(label="Job ID", interactive=False)
199
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  with gr.Column(scale=1):
201
- with gr.Accordion("AvanΓ§ado", open=False):
202
- index_rate_sl = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Index Rate")
203
- protect_sl = gr.Slider(0.0, 0.5, value=0.5, step=0.01, label="Protect")
204
- filter_radius_sl = gr.Slider(0, 7, value=3, step=1, label="Filter Radius")
205
- vol_env_sl = gr.Slider(0.0, 1.0, value=0.25, step=0.05, label="Vol Envelope")
206
- fmt_radio = gr.Radio(["WAV", "MP3"], value="WAV", label="Formato")
207
-
208
- with gr.Tab("πŸ“‹ Jobs"):
209
- refresh_jobs_btn = gr.Button("πŸ”„ Refresh")
210
- queue_status = gr.Markdown(value=get_queue_info())
211
- jobs_table = gr.Dataframe(headers=["ID", "Status", "Tempo", "Download"], interactive=False)
212
-
213
- def refresh():
214
- return get_queue_info(), jobs_table_fn()
215
- refresh_jobs_btn.click(refresh, outputs=[queue_status, jobs_table])
216
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  with gr.Tab("πŸ“¦ Models"):
218
- gr.Markdown("Upload .zip com model.pth")
 
 
 
 
 
 
 
 
219
  with gr.Row():
220
- up_zip = gr.File(label="ZIP", file_types=[".zip"])
221
- up_name = gr.Textbox(label="Nome")
222
- up_btn = gr.Button("οΏ½οΏ½οΏ½οΏ½ Upload")
223
- up_status = gr.Textbox()
224
- refresh_btn = gr.Button("πŸ”„ Refresh")
225
-
226
- models_table = gr.Dataframe(label="Modelos")
227
- up_btn.click(upload_model, inputs=[up_zip, up_name], outputs=[up_status, model_dd, models_table])
228
- refresh_btn.click(refresh_models, outputs=[models_table, model_dd])
229
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  convert_btn.click(
231
- _submit_and_extract_id,
232
  inputs=[
233
- inp_mic, inp_file, inp_video, model_dd, pitch_sl, gr.State("rmvpe"),
234
- index_rate_sl, protect_sl, vol_env_sl, gr.State(False), gr.State(0.5),
235
- gr.State(False), gr.State(False), gr.State(0.5), gr.State(0.5),
236
- gr.State(0.5), gr.State(0.5), gr.State(0.5)
 
 
 
 
237
  ],
238
- outputs=[out_status, job_id_box]
 
 
 
 
 
 
 
 
 
 
239
  )
240
 
241
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
1
+ pip install audio-separator[cpu] rvc-python pydub
2
+
3
+ """RVC Voice Conversion – HuggingFace Space
4
+
5
+ Simple, fast, GPU/CPU auto-detected.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import os
10
+
11
  import gradio as gr
12
+
13
+ from lib.config import (
14
+ BUILTIN_MODELS,
15
+ CSS,
16
+ DEVICE_LABEL,
17
+ MAX_INPUT_DURATION,
18
+ logger,
19
+ )
20
+ from lib.jobs import (
21
+ get_jobs_table,
22
+ get_queue_info,
23
+ poll_job,
24
+ submit_job,
25
+ )
26
+ from lib.models import list_models, startup_downloads
27
+ from lib.ui import refresh_models, toggle_autotune, upload_model
28
+
29
  import os
30
+ from audio_separator.separator import Separator
31
+ from rvc_python.infer import RVCInference
32
+ from pydub import AudioSegment
33
+
34
+ def processar_audio_rvc(caminho_entrada, caminho_modelo_pth):
35
+ # 1. ConfiguraΓ§Γ£o de nomes de arquivos
36
+ entrada = caminho_entrada # O arquivo original
37
+ instrumental = "instrumental.wav"
38
+ entrada_vocais = "vocais_extraidos.wav"
39
+ saida_rvc_vocal = "saida_rvc_vocal.wav"
40
+ saida_rvc_final = "saida_rvc_completa.wav"
41
+
42
+ print("--- Iniciando SeparaΓ§Γ£o (UVR5) ---")
43
+ # 2. Separar em Vocal e Instrumental (entrada_vocais e instrumental)
44
+ separator = Separator()
45
+ # Usando o modelo MDX-Net para vocais limpos
46
+ separator.load_model('UVRAnywhere')
47
+ output_files = separator.separate(entrada)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ # Renomeando para facilitar (ajuste conforme o nome gerado pelo modelo)
50
+ os.rename(output_files[0], instrumental)
51
+ os.rename(output_files[1], entrada_vocais)
52
+
53
+ print("--- Iniciando ConversΓ£o RVC (SaΓ­da RVC Vocal) ---")
54
+ # 3. Converter a voz (entrada_vocais -> SaΓ­da RVC vocal)
55
+ rvc = RVCInference(device="cpu") # Mude para "cuda:0" se tiver GPU
56
+ rvc.load_model(caminho_modelo_pth)
57
+ rvc.infer_file(entrada_vocais, saida_rvc_vocal)
58
 
59
+ print("--- Mixagem Final (SaΓ­da RVC) ---")
60
+ # 4. Juntar Vocal convertido com Instrumental original
61
+ vocal_track = AudioSegment.from_file(saida_rvc_vocal)
62
+ inst_track = AudioSegment.from_file(instrumental)
63
 
64
+ # Sobrepor as duas faixas
65
+ combinado = inst_track.overlay(vocal_track)
66
+ combinado.export(saida_rvc_final, format="wav")
67
+
68
+ print(f"Processo concluΓ­do! Arquivos gerados:")
69
+ print(f"- Entrada: {entrada}")
70
+ print(f"- Vocal Original: {entrada_vocais}")
71
+ print(f"- Instrumental: {instrumental}")
72
+ print(f"- Vocal RVC: {saida_rvc_vocal}")
73
+ print(f"- MΓΊsica Final: {saida_rvc_final}")
74
+
75
+ # Uso: Informe o Γ‘udio e o caminho do seu modelo .pth treinado
76
+ processar_audio_rvc("minha_musica.mp3", "meu_modelo_rvc.pth")
77
+
78
+ # ── Startup ───────────────────────────────────────────────────────────────────
79
+ startup_status = ""
80
+ default_model = ""
81
+ try:
82
+ default_model = startup_downloads()
83
+ startup_status = f"βœ… Ready &nbsp;Β·&nbsp; {DEVICE_LABEL}"
84
+ except Exception as e:
85
+ startup_status = f"⚠️ Some assets unavailable: {e} &nbsp;·&nbsp; {DEVICE_LABEL}"
86
+ logger.warning("Startup download issue: %s", e)
87
+
88
+ initial_models = list_models()
89
+ initial_value = default_model if default_model in initial_models else (
90
+ initial_models[0] if initial_models else None
91
+ )
92
+
93
+ # ── Gradio UI ─────────────────────────────────────────────────────────────────
94
+ with gr.Blocks(title="RVC Voice Conversion", delete_cache=(3600, 3600)) as demo:
95
+
96
+ gr.HTML(f"""
97
+ <div id="header">
98
+ <h1>πŸŽ™οΈ RVC Voice Conversion</h1>
99
+ <p>Retrieval-Based Voice Conversion Β· record or upload Β· custom models Β· GPU/CPU auto</p>
100
+ </div>
101
+ <p id="status">{startup_status}</p>
102
+ """)
103
+
104
  with gr.Tabs():
105
+
106
+ # ── TAB 1: Convert ────────────────────────────────────────────────────
107
  with gr.Tab("🎀 Convert"):
108
  with gr.Row():
109
+
110
  with gr.Column(scale=1):
111
+ gr.Markdown("### πŸ”Š Input Audio")
112
  with gr.Tabs():
113
+ with gr.Tab("πŸŽ™οΈ Microphone"):
114
+ inp_mic = gr.Audio(
115
+ sources=["microphone"],
116
+ type="filepath",
117
+ label="Record",
118
+ )
119
+ with gr.Tab("πŸ“ Upload File"):
120
+ inp_file = gr.Audio(
121
+ sources=["upload"],
122
+ type="filepath",
123
+ label="Upload audio (wav / mp3 / mp4 / flac / ogg …)",
124
+ )
125
+
126
+ gr.Markdown("### πŸ€– Model")
127
+ model_dd = gr.Dropdown(
128
+ choices=initial_models,
129
+ value=initial_value,
130
+ label="Active Voice Model",
131
+ interactive=True,
132
+ )
133
+
134
+ gr.Markdown("### 🎚️ Basic Settings")
135
+ pitch_sl = gr.Slider(
136
+ minimum=-24, maximum=24, value=0, step=1,
137
+ label="Pitch Shift (semitones)",
138
+ info="0 = unchanged Β· positive = higher Β· negative = lower",
139
+ )
140
+ f0_radio = gr.Radio(
141
+ choices=["rmvpe", "fcpe", "crepe", "crepe-tiny"],
142
+ value="rmvpe",
143
+ label="Pitch Extraction Method",
144
+ info="rmvpe = fastest & accurate Β· crepe = highest quality (slower)",
145
+ )
146
+
147
  with gr.Column(scale=1):
148
+ gr.Markdown("### βš™οΈ Advanced Settings")
149
+ with gr.Accordion("Expand advanced options", open=False):
150
+ index_rate_sl = gr.Slider(
151
+ 0.0, 1.0, value=0.75, step=0.05,
152
+ label="Index Rate",
153
+ info="How strongly the FAISS index influences timbre (0 = off)",
154
+ )
155
+ protect_sl = gr.Slider(
156
+ 0.0, 0.5, value=0.5, step=0.01,
157
+ label="Protect Consonants",
158
+ info="Protects unvoiced consonants β€” 0.5 = max protection",
159
+ )
160
+ filter_radius_sl = gr.Slider(
161
+ 0, 7, value=3, step=1,
162
+ label="Respiration Filter Radius",
163
+ info="Median filter on pitch β€” higher = smoother, reduces breath noise",
164
+ )
165
+ vol_env_sl = gr.Slider(
166
+ 0.0, 1.0, value=0.25, step=0.05,
167
+ label="Volume Envelope Mix",
168
+ info="0.25 = natural blend Β· 1 = preserve input loudness Β· 0 = model output",
169
+ )
170
+ with gr.Row():
171
+ clean_cb = gr.Checkbox(value=False, label="Noise Reduction")
172
+ clean_sl = gr.Slider(
173
+ 0.0, 1.0, value=0.5, step=0.05,
174
+ label="Reduction Strength",
175
+ )
176
+ with gr.Row():
177
+ split_cb = gr.Checkbox(value=False, label="Split Long Audio")
178
+ autotune_cb = gr.Checkbox(value=False, label="Autotune")
179
+ autotune_sl = gr.Slider(
180
+ 0.0, 1.0, value=1.0, step=0.05,
181
+ label="Autotune Strength",
182
+ visible=False,
183
+ )
184
+ autotune_cb.change(
185
+ fn=toggle_autotune,
186
+ inputs=autotune_cb,
187
+ outputs=autotune_sl,
188
+ )
189
+
190
+ gr.Markdown("**πŸŽ›οΈ Reverb**")
191
+ reverb_cb = gr.Checkbox(value=False, label="Enable Reverb")
192
+ with gr.Group(visible=False) as reverb_group:
193
+ reverb_room_sl = gr.Slider(
194
+ 0.0, 1.0, value=0.15, step=0.05,
195
+ label="Room Size",
196
+ info="Larger = bigger sounding space",
197
+ )
198
+ reverb_damp_sl = gr.Slider(
199
+ 0.0, 1.0, value=0.7, step=0.05,
200
+ label="Damping",
201
+ info="Higher = more absorption, less echo tail",
202
+ )
203
+ reverb_wet_sl = gr.Slider(
204
+ 0.0, 1.0, value=0.15, step=0.05,
205
+ label="Wet Level",
206
+ info="How much reverb is mixed in (0.15 = subtle)",
207
+ )
208
+ reverb_cb.change(
209
+ fn=lambda v: gr.update(visible=v),
210
+ inputs=reverb_cb,
211
+ outputs=reverb_group,
212
+ )
213
+
214
+ fmt_radio = gr.Radio(
215
+ choices=["WAV", "MP3", "FLAC", "OPUS"],
216
+ value="WAV",
217
+ label="Output Format",
218
+ info="OPUS = small file (~64 kbps, Telegram/Discord quality)",
219
+ )
220
+ convert_btn = gr.Button(
221
+ "πŸš€ Convert Voice",
222
+ variant="primary",
223
+ )
224
+
225
+ gr.Markdown("### 🎧 Output")
226
+ out_status = gr.Markdown(value="")
227
+ out_audio = gr.Audio(label="Result (if still on page)", type="filepath", interactive=False)
228
+
229
+ gr.Markdown("#### πŸ” Check Job Status")
230
+ with gr.Row():
231
+ job_id_box = gr.Textbox(
232
+ label="Job ID",
233
+ placeholder="e.g. a3f2b1c9",
234
+ scale=3,
235
+ )
236
+ poll_btn = gr.Button("πŸ”„ Check", scale=1)
237
+ poll_status = gr.Markdown(value="")
238
+ poll_audio = gr.Audio(label="Result", type="filepath", interactive=False)
239
+
240
+ # ── TAB 2: Models ─────────────────────────────────────────────────────
241
  with gr.Tab("πŸ“¦ Models"):
242
+ gr.Markdown("""
243
+ ### Upload a Custom RVC Model
244
+ Provide a **`.zip`** containing:
245
+ - **`model.pth`** β€” weights (required)
246
+ - **`model.index`** β€” FAISS index (optional, improves voice matching)
247
+
248
+ **Built-in models** (pre-downloaded on startup):
249
+ Vestia Zeta v1 Β· Vestia Zeta v2 Β· Ayunda Risu Β· Gawr Gura
250
+ """)
251
  with gr.Row():
252
+ with gr.Column(scale=1):
253
+ up_zip = gr.File(label="Model ZIP", file_types=[".zip"])
254
+ up_name = gr.Textbox(
255
+ label="Model Name",
256
+ placeholder="Leave blank to use zip filename",
257
+ )
258
+ up_btn = gr.Button("πŸ“€ Load Model", variant="primary")
259
+ up_status = gr.Textbox(label="Status", interactive=False, lines=2)
260
+ with gr.Column(scale=1):
261
+ gr.Markdown("### Loaded Models")
262
+ models_table = gr.Dataframe(
263
+ col_count=(1, "fixed"),
264
+ value=[[m] for m in initial_models],
265
+ interactive=False,
266
+ label="",
267
+ )
268
+ refresh_btn = gr.Button("πŸ”„ Refresh")
269
+
270
+ up_btn.click(
271
+ fn=upload_model,
272
+ inputs=[up_zip, up_name],
273
+ outputs=[up_status, model_dd, models_table],
274
+ )
275
+ refresh_btn.click(
276
+ fn=refresh_models,
277
+ outputs=[models_table, model_dd],
278
+ )
279
+
280
+ # ── TAB 3: Jobs ───────────────────────────────────────────────────────
281
+ with gr.Tab("πŸ“‹ Jobs"):
282
+ gr.Markdown("All submitted jobs, newest first. Click **Refresh** to update.")
283
+ queue_status = gr.Markdown(value=get_queue_info, every=10)
284
+ jobs_table = gr.Dataframe(
285
+ headers=["Job ID", "Model", "Status", "Time", "Download"],
286
+ col_count=(5, "fixed"),
287
+ value=get_jobs_table,
288
+ interactive=False,
289
+ wrap=True,
290
+ datatype=["str", "str", "str", "str", "markdown"],
291
+ every=10,
292
+ )
293
+ refresh_jobs_btn = gr.Button("πŸ”„ Refresh")
294
+
295
+ def _refresh_jobs():
296
+ return get_queue_info(), get_jobs_table()
297
+
298
+ refresh_jobs_btn.click(fn=_refresh_jobs, outputs=[queue_status, jobs_table])
299
+
300
+ # ── TAB 4: Help ───────────────────────────────────────────────────────
301
+ with gr.Tab("ℹ️ Help"):
302
+ gr.Markdown(f"""
303
+ ## How it works
304
+ RVC (Retrieval-Based Voice Conversion) transforms a voice recording to sound
305
+ like a target speaker using only that speaker's model file.
306
+
307
+ ---
308
+
309
+ ## Quick Guide
310
+ 1. Open the **Convert** tab
311
+ 2. **Record** via microphone or **upload** an audio file (wav, mp3, flac, ogg …)
312
+ 3. Choose a **model** from the dropdown β€” 4 models are pre-loaded on startup
313
+ 4. Set **Pitch Shift** if needed (e.g. male β†’ female: try +12 semitones)
314
+ 5. Click **πŸš€ Convert Voice** and wait for the result
315
+
316
+ ---
317
+
318
+ ## Built-in Models
319
+ | Model | Description |
320
+ |---|---|
321
+ | **Vestia Zeta v1** | Hololive ID VTuber, v1 model |
322
+ | **Vestia Zeta v2** | Hololive ID VTuber, v2 model (recommended) |
323
+ | **Ayunda Risu** | Hololive ID VTuber |
324
+ | **Gawr Gura** | Hololive EN VTuber |
325
+
326
+ ---
327
+
328
+ ## Pitch Extraction Methods
329
+ | Method | Speed | Quality | Best for |
330
+ |---|---|---|---|
331
+ | **rmvpe** | ⚑⚑⚑ | β˜…β˜…β˜…β˜… | General use (default) |
332
+ | **fcpe** | ⚑⚑ | β˜…β˜…β˜…β˜… | Singing |
333
+ | **crepe** | ⚑ | β˜…β˜…β˜…β˜…β˜… | Highest quality, slow |
334
+ | **crepe-tiny** | ⚑⚑ | β˜…β˜…β˜… | Low resource |
335
+
336
+ ---
337
+
338
+ ## Advanced Settings
339
+ | Setting | Description |
340
+ |---|---|
341
+ | **Index Rate** | Influence of FAISS index on output timbre (0.75 recommended) |
342
+ | **Protect Consonants** | Prevents artefacts on consonants (0.5 = max) |
343
+ | **Respiration Filter Radius** | Smooths pitch curve β€” higher reduces breath noise (0–7, default 3) |
344
+ | **Volume Envelope Mix** | 0.25 = natural blend Β· 1 = preserve input loudness |
345
+ | **Noise Reduction** | Removes background noise before conversion |
346
+ | **Split Long Audio** | Chunks audio for recordings > 60 s |
347
+ | **Autotune** | Snaps pitch to nearest musical note |
348
+
349
+ ---
350
+
351
+ ## Output Formats
352
+ | Format | Size | Quality |
353
+ |---|---|---|
354
+ | **WAV** | Large | Lossless |
355
+ | **FLAC** | Medium | Lossless compressed | | **mp4** | Big | Lossless|
356
+ | **MP3** | Small | Lossy |
357
+ | **OPUS** | Tiny (~64 kbps) | Telegram/Discord quality |
358
+
359
+ ---
360
+
361
+ **Device:** `{DEVICE_LABEL}`
362
+ **Max input duration:** {MAX_INPUT_DURATION // 60} minutes
363
+
364
+ ---
365
+
366
+ ## Credits
367
+ Engine: [Ultimate RVC](https://github.com/JackismyShephard/ultimate-rvc)
368
+ """)
369
+
370
+ # Wire convert button after all tabs
371
+ def _submit_and_extract_id(*args):
372
+ import re
373
+ status, audio = submit_job(*args)
374
+ match = re.search(r"[a-f0-9]{8}", status or "")
375
+ job_id = match.group(0) if match else ""
376
+ return status, audio, job_id, get_queue_info(), get_jobs_table()
377
+
378
  convert_btn.click(
379
+ fn=_submit_and_extract_id,
380
  inputs=[
381
+ inp_mic, inp_file, model_dd,
382
+ pitch_sl, f0_radio,
383
+ index_rate_sl, protect_sl, vol_env_sl,
384
+ clean_cb, clean_sl,
385
+ split_cb, autotune_cb, autotune_sl,
386
+ filter_radius_sl,
387
+ fmt_radio,
388
+ reverb_cb, reverb_room_sl, reverb_damp_sl, reverb_wet_sl,
389
  ],
390
+ outputs=[out_status, out_audio, job_id_box, queue_status, jobs_table],
391
+ )
392
+
393
+ def _poll_and_refresh(job_id):
394
+ status, file = poll_job(job_id)
395
+ return status, file, get_queue_info(), get_jobs_table()
396
+
397
+ poll_btn.click(
398
+ fn=_poll_and_refresh,
399
+ inputs=[job_id_box],
400
+ outputs=[poll_status, poll_audio, queue_status, jobs_table],
401
  )
402
 
403
+
404
+ # ── Launch ────────────────────────────────────────────────────────────────────
405
+ if __name__ == "__main__":
406
+ demo.queue(default_concurrency_limit=5)
407
+ demo.launch(
408
+ server_name="0.0.0.0",
409
+ server_port=int(os.getenv("PORT", 7860)),
410
+ max_threads=10,
411
+ ssr_mode=False,
412
+ css=CSS,
413
+ )