Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ import subprocess
|
|
| 5 |
import shutil
|
| 6 |
from pathlib import Path
|
| 7 |
import zipfile
|
|
|
|
| 8 |
|
| 9 |
class RealRVCTrainer:
|
| 10 |
def __init__(self):
|
|
@@ -31,37 +32,29 @@ class RealRVCTrainer:
|
|
| 31 |
core_packages = [
|
| 32 |
"torch", "torchaudio", "torchvision", "numpy", "scipy",
|
| 33 |
"librosa", "soundfile", "faiss-cpu", "praat-parselmouth",
|
| 34 |
-
"pyworld", "scikit-learn", "numba", "resampy", "pydub"
|
| 35 |
-
"fairseq", "gradio", "tensorboard" # Added common deps from repo requirements
|
| 36 |
]
|
| 37 |
|
| 38 |
for pkg in core_packages:
|
| 39 |
try:
|
| 40 |
-
subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg], timeout=
|
| 41 |
except:
|
| 42 |
pass
|
| 43 |
|
| 44 |
progress(0.6, desc="Downloading pretrained models...")
|
| 45 |
|
| 46 |
-
|
| 47 |
-
# https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI/tree/main/assets
|
| 48 |
-
assets_dir = self.rvc_dir / "assets"
|
| 49 |
-
pretrained_dir = assets_dir / "pretrained"
|
| 50 |
-
pretrained_v2_dir = assets_dir / "pretrained_v2" # Recommended for better models
|
| 51 |
-
hubert_path = self.rvc_dir / "hubert_base.pt" # Often in root
|
| 52 |
-
|
| 53 |
-
assets_dir.mkdir(exist_ok=True)
|
| 54 |
pretrained_dir.mkdir(exist_ok=True)
|
| 55 |
-
pretrained_v2_dir.mkdir(exist_ok=True)
|
| 56 |
|
| 57 |
models_to_download = [
|
| 58 |
-
("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/
|
| 59 |
-
("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/
|
| 60 |
-
("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/
|
| 61 |
]
|
| 62 |
|
| 63 |
-
for idx, (url,
|
| 64 |
-
progress(0.6 + (idx / len(models_to_download)) * 0.3, desc=f"Downloading {
|
|
|
|
| 65 |
if not output_path.exists():
|
| 66 |
try:
|
| 67 |
subprocess.run(["wget", "-q", "-O", str(output_path), url], timeout=300)
|
|
@@ -78,52 +71,102 @@ class RealRVCTrainer:
|
|
| 78 |
self.setup_complete = True
|
| 79 |
progress(1.0, desc="Setup complete!")
|
| 80 |
|
| 81 |
-
return "β
RVC Installation Complete!\n\
|
| 82 |
|
| 83 |
except Exception as e:
|
| 84 |
-
return f"β Installation failed: {str(e)}"
|
| 85 |
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
def train_model(self, model_name, epochs, batch_size, sample_rate, progress=gr.Progress()):
|
| 89 |
"""Run actual RVC training"""
|
| 90 |
try:
|
| 91 |
progress(0.05, desc="Initializing training...")
|
| 92 |
-
# Citation: log_dir directly under logs/ from train.py exp_dir setup
|
| 93 |
log_dir = self.rvc_dir / "logs" / model_name
|
| 94 |
log_dir.mkdir(parents=True, exist_ok=True)
|
| 95 |
|
| 96 |
progress(0.1, desc="Starting RVC training...")
|
| 97 |
train_script = self.rvc_dir / "infer" / "modules" / "train" / "train.py"
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
pg_path = self.rvc_dir / "assets" / "pretrained_v2" / "f0G40k.pth"
|
| 101 |
-
pd_path = self.rvc_dir / "assets" / "pretrained_v2" / "f0D40k.pth"
|
| 102 |
|
| 103 |
cmd = [
|
| 104 |
sys.executable, str(train_script),
|
| 105 |
"-e", model_name, "-sr", str(sample_rate),
|
| 106 |
"-f0", "1", "-bs", str(batch_size),
|
| 107 |
"-g", "0", "-te", str(epochs), "-se", "10",
|
| 108 |
-
"-pg", str(
|
| 109 |
-
"-pd", str(
|
| 110 |
-
"-l", "0", "-c", "0"
|
| 111 |
]
|
| 112 |
|
| 113 |
-
|
|
|
|
| 114 |
|
| 115 |
for line in process.stdout:
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
progress(0.2 + 0.7 * (process.poll() is not None), desc=line[:100])
|
| 119 |
|
| 120 |
process.wait()
|
| 121 |
-
|
| 122 |
progress(0.9, desc="Searching for model files...")
|
| 123 |
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
|
| 128 |
if model_files or index_files:
|
| 129 |
output_dir = self.workspace / model_name
|
|
@@ -142,17 +185,85 @@ class RealRVCTrainer:
|
|
| 142 |
files_info.append(f"- {latest_index.name}")
|
| 143 |
|
| 144 |
progress(1.0, desc="Training complete!")
|
| 145 |
-
|
| 146 |
-
return f"β
Training Complete!\n\nπ Model: {model_name}\nπ Epochs: {epochs}\n\nπΎ Model Files:\n{chr(10).join(files_info)}\n\nπ Location: {output_dir}\n\nπ Ready to download!\n\nβ οΈ On CPU this is very slow β use GPU!"
|
| 147 |
else:
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
except Exception as e:
|
| 151 |
return f"β Training failed: {str(e)}"
|
| 152 |
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
|
| 157 |
if __name__ == "__main__":
|
| 158 |
demo.launch()
|
|
|
|
| 5 |
import shutil
|
| 6 |
from pathlib import Path
|
| 7 |
import zipfile
|
| 8 |
+
import json
|
| 9 |
|
| 10 |
class RealRVCTrainer:
|
| 11 |
def __init__(self):
|
|
|
|
| 32 |
core_packages = [
|
| 33 |
"torch", "torchaudio", "torchvision", "numpy", "scipy",
|
| 34 |
"librosa", "soundfile", "faiss-cpu", "praat-parselmouth",
|
| 35 |
+
"pyworld", "scikit-learn", "numba", "resampy", "pydub"
|
|
|
|
| 36 |
]
|
| 37 |
|
| 38 |
for pkg in core_packages:
|
| 39 |
try:
|
| 40 |
+
subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg], timeout=60)
|
| 41 |
except:
|
| 42 |
pass
|
| 43 |
|
| 44 |
progress(0.6, desc="Downloading pretrained models...")
|
| 45 |
|
| 46 |
+
pretrained_dir = self.rvc_dir / "pretrained"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
pretrained_dir.mkdir(exist_ok=True)
|
|
|
|
| 48 |
|
| 49 |
models_to_download = [
|
| 50 |
+
("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth", "f0G40k.pth"),
|
| 51 |
+
("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth", "f0D40k.pth"),
|
| 52 |
+
("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt", "hubert_base.pt"),
|
| 53 |
]
|
| 54 |
|
| 55 |
+
for idx, (url, filename) in enumerate(models_to_download):
|
| 56 |
+
progress(0.6 + (idx / len(models_to_download)) * 0.3, desc=f"Downloading {filename}...")
|
| 57 |
+
output_path = pretrained_dir / filename
|
| 58 |
if not output_path.exists():
|
| 59 |
try:
|
| 60 |
subprocess.run(["wget", "-q", "-O", str(output_path), url], timeout=300)
|
|
|
|
| 71 |
self.setup_complete = True
|
| 72 |
progress(1.0, desc="Setup complete!")
|
| 73 |
|
| 74 |
+
return "β
RVC Installation Complete!\n\nπ¦ Installed:\n- Official RVC codebase\n- Pre-trained models\n- All dependencies\n\nπ Ready to train!"
|
| 75 |
|
| 76 |
except Exception as e:
|
| 77 |
+
return f"β Installation failed: {str(e)}\n\nπ§ Try manual installation or use Google Colab."
|
| 78 |
|
| 79 |
+
def prepare_dataset(self, audio_files, model_name, progress=gr.Progress()):
|
| 80 |
+
"""Prepare dataset in RVC format"""
|
| 81 |
+
if not audio_files:
|
| 82 |
+
return "β Please upload audio files"
|
| 83 |
+
|
| 84 |
+
if not model_name:
|
| 85 |
+
model_name = "my_model"
|
| 86 |
+
|
| 87 |
+
try:
|
| 88 |
+
progress(0.1, desc="Creating dataset structure...")
|
| 89 |
+
dataset_path = self.rvc_dir / "dataset" / model_name
|
| 90 |
+
dataset_path.mkdir(parents=True, exist_ok=True)
|
| 91 |
+
|
| 92 |
+
progress(0.3, desc="Copying audio files...")
|
| 93 |
+
for idx, audio_file in enumerate(audio_files):
|
| 94 |
+
dest = dataset_path / f"{idx:04d}_{Path(audio_file.name).name}"
|
| 95 |
+
shutil.copy2(audio_file.name, dest)
|
| 96 |
+
progress(0.3 + (idx / len(audio_files)) * 0.6, desc=f"Copied {idx+1}/{len(audio_files)} files")
|
| 97 |
+
|
| 98 |
+
progress(1.0, desc="Dataset ready!")
|
| 99 |
+
return f"β
Dataset Prepared!\n\nπ Location: {dataset_path}\nπ Files: {len(audio_files)}\nπ€ Model: {model_name}\n\nβ
Ready for preprocessing!"
|
| 100 |
+
|
| 101 |
+
except Exception as e:
|
| 102 |
+
return f"β Error: {str(e)}"
|
| 103 |
+
|
| 104 |
+
def preprocess_data(self, model_name, sample_rate, progress=gr.Progress()):
|
| 105 |
+
"""Run RVC preprocessing"""
|
| 106 |
+
try:
|
| 107 |
+
progress(0.1, desc="Starting preprocessing...")
|
| 108 |
+
dataset_path = self.rvc_dir / "dataset" / model_name
|
| 109 |
+
if not dataset_path.exists():
|
| 110 |
+
return "β Dataset not found. Please prepare dataset first."
|
| 111 |
+
|
| 112 |
+
preprocess_script = self.rvc_dir / "infer" / "modules" / "train" / "preprocess.py"
|
| 113 |
+
if not preprocess_script.exists():
|
| 114 |
+
preprocess_script = self.rvc_dir / "trainset_preprocess_pipeline_print.py"
|
| 115 |
+
|
| 116 |
+
progress(0.3, desc="Preprocessing audio...")
|
| 117 |
+
cmd = [sys.executable, str(preprocess_script), str(dataset_path), str(sample_rate), "2"]
|
| 118 |
+
result = subprocess.run(cmd, capture_output=True, text=True)
|
| 119 |
+
|
| 120 |
+
progress(1.0, desc="Preprocessing complete!")
|
| 121 |
+
return f"β
Preprocessing Complete!\n\nπ΅ Sample Rate: {sample_rate}Hz\nπ Features extracted\nπ Ready for training!"
|
| 122 |
+
|
| 123 |
+
except Exception as e:
|
| 124 |
+
return f"β Preprocessing failed: {str(e)}"
|
| 125 |
|
| 126 |
def train_model(self, model_name, epochs, batch_size, sample_rate, progress=gr.Progress()):
|
| 127 |
"""Run actual RVC training"""
|
| 128 |
try:
|
| 129 |
progress(0.05, desc="Initializing training...")
|
|
|
|
| 130 |
log_dir = self.rvc_dir / "logs" / model_name
|
| 131 |
log_dir.mkdir(parents=True, exist_ok=True)
|
| 132 |
|
| 133 |
progress(0.1, desc="Starting RVC training...")
|
| 134 |
train_script = self.rvc_dir / "infer" / "modules" / "train" / "train.py"
|
| 135 |
+
if not train_script.exists():
|
| 136 |
+
train_script = self.rvc_dir / "train_nsf_sim_cache_sid_load_pretrain.py"
|
|
|
|
|
|
|
| 137 |
|
| 138 |
cmd = [
|
| 139 |
sys.executable, str(train_script),
|
| 140 |
"-e", model_name, "-sr", str(sample_rate),
|
| 141 |
"-f0", "1", "-bs", str(batch_size),
|
| 142 |
"-g", "0", "-te", str(epochs), "-se", "10",
|
| 143 |
+
"-pg", str(self.rvc_dir / "pretrained" / "f0G40k.pth"),
|
| 144 |
+
"-pd", str(self.rvc_dir / "pretrained" / "f0D40k.pth"),
|
| 145 |
+
"-l", "0", "-c", "0"
|
| 146 |
]
|
| 147 |
|
| 148 |
+
progress(0.2, desc=f"Training {model_name}...")
|
| 149 |
+
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
| 150 |
|
| 151 |
for line in process.stdout:
|
| 152 |
+
if "epoch" in line.lower():
|
| 153 |
+
progress(0.2 + 0.6, desc=f"Training: {line.strip()[:50]}")
|
|
|
|
| 154 |
|
| 155 |
process.wait()
|
|
|
|
| 156 |
progress(0.9, desc="Searching for model files...")
|
| 157 |
|
| 158 |
+
possible_paths = [
|
| 159 |
+
log_dir / "weights", log_dir,
|
| 160 |
+
self.rvc_dir / "weights" / model_name,
|
| 161 |
+
self.rvc_dir / "logs" / model_name
|
| 162 |
+
]
|
| 163 |
+
|
| 164 |
+
model_files = []
|
| 165 |
+
index_files = []
|
| 166 |
+
for path in possible_paths:
|
| 167 |
+
if path.exists():
|
| 168 |
+
model_files.extend(list(path.glob("**/*.pth")))
|
| 169 |
+
index_files.extend(list(path.glob("**/*.index")))
|
| 170 |
|
| 171 |
if model_files or index_files:
|
| 172 |
output_dir = self.workspace / model_name
|
|
|
|
| 185 |
files_info.append(f"- {latest_index.name}")
|
| 186 |
|
| 187 |
progress(1.0, desc="Training complete!")
|
| 188 |
+
return f"β
Training Complete!\n\nπ Model: {model_name}\nπ Epochs: {epochs}\n\nπΎ Model Files:\n{chr(10).join(files_info)}\n\nπ Location: {output_dir}\n\nπ Ready to download!"
|
|
|
|
| 189 |
else:
|
| 190 |
+
debug_info = []
|
| 191 |
+
if log_dir.exists():
|
| 192 |
+
debug_info.append(f"Log dir: {log_dir}")
|
| 193 |
+
for item in log_dir.rglob("*"):
|
| 194 |
+
debug_info.append(f" - {item.relative_to(log_dir)}")
|
| 195 |
+
|
| 196 |
+
return f"β οΈ Training completed but model files not found.\n\nπ Searched in:\n{chr(10).join([f'- {p}' for p in possible_paths])}\n\nπ Debug:\n{chr(10).join(debug_info)}"
|
| 197 |
|
| 198 |
except Exception as e:
|
| 199 |
return f"β Training failed: {str(e)}"
|
| 200 |
|
| 201 |
+
def package_model(self, model_name):
|
| 202 |
+
"""Package model for download"""
|
| 203 |
+
try:
|
| 204 |
+
output_dir = self.workspace / model_name
|
| 205 |
+
if not output_dir.exists():
|
| 206 |
+
output_dir = self.rvc_dir / "logs" / model_name / "weights"
|
| 207 |
+
|
| 208 |
+
if not output_dir.exists():
|
| 209 |
+
return None, "β Model not found"
|
| 210 |
+
|
| 211 |
+
zip_path = self.workspace / f"{model_name}_RVC.zip"
|
| 212 |
+
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
| 213 |
+
for file in output_dir.rglob("*"):
|
| 214 |
+
if file.is_file() and (file.suffix in ['.pth', '.index', '.json']):
|
| 215 |
+
zipf.write(file, file.name)
|
| 216 |
+
|
| 217 |
+
return str(zip_path), f"β
Model packaged: {zip_path.name}"
|
| 218 |
+
except Exception as e:
|
| 219 |
+
return None, f"β Error: {str(e)}"
|
| 220 |
|
| 221 |
+
trainer = RealRVCTrainer()
|
| 222 |
+
|
| 223 |
+
with gr.Blocks(title="Real RVC Training") as demo:
|
| 224 |
+
gr.Markdown("# π€ Real RVC Model Training\n### Using Official RVC-Project Implementation\n\nβ οΈ Uses REAL RVC training. Models work on weights.gg!")
|
| 225 |
+
|
| 226 |
+
with gr.Tab("βοΈ Step 0: Install RVC"):
|
| 227 |
+
gr.Markdown("Install official RVC codebase and pretrained models (~200MB)")
|
| 228 |
+
install_btn = gr.Button("π¦ Install RVC Components", variant="primary", size="lg")
|
| 229 |
+
install_output = gr.Textbox(label="Installation Status", lines=10)
|
| 230 |
+
install_btn.click(fn=trainer.install_rvc, outputs=install_output)
|
| 231 |
+
|
| 232 |
+
with gr.Tab("π Step 1: Prepare Dataset"):
|
| 233 |
+
gr.Markdown("Upload voice audio files (10-30 min recommended, WAV/MP3/FLAC)")
|
| 234 |
+
model_name_prep = gr.Textbox(label="Model Name", value="my_voice_model")
|
| 235 |
+
audio_files = gr.File(label="Upload Audio Files", file_count="multiple", file_types=["audio"])
|
| 236 |
+
prep_btn = gr.Button("π Prepare Dataset", variant="primary")
|
| 237 |
+
prep_output = gr.Textbox(label="Status", lines=8)
|
| 238 |
+
prep_btn.click(fn=trainer.prepare_dataset, inputs=[audio_files, model_name_prep], outputs=prep_output)
|
| 239 |
+
|
| 240 |
+
with gr.Tab("π§ Step 2: Preprocess"):
|
| 241 |
+
gr.Markdown("Preprocess audio and extract features")
|
| 242 |
+
model_name_process = gr.Textbox(label="Model Name", value="my_voice_model")
|
| 243 |
+
sample_rate_process = gr.Radio(choices=["40000", "48000"], value="40000", label="Sample Rate")
|
| 244 |
+
process_btn = gr.Button("π§ Preprocess Data", variant="primary")
|
| 245 |
+
process_output = gr.Textbox(label="Status", lines=8)
|
| 246 |
+
process_btn.click(fn=trainer.preprocess_data, inputs=[model_name_process, sample_rate_process], outputs=process_output)
|
| 247 |
+
|
| 248 |
+
with gr.Tab("π Step 3: Train Model"):
|
| 249 |
+
gr.Markdown("Train RVC model (β οΈ CPU training takes hours/days)")
|
| 250 |
+
model_name_train = gr.Textbox(label="Model Name", value="my_voice_model")
|
| 251 |
+
epochs_train = gr.Slider(minimum=10, maximum=500, value=100, step=10, label="Epochs")
|
| 252 |
+
batch_size_train = gr.Slider(minimum=1, maximum=16, value=4, step=1, label="Batch Size")
|
| 253 |
+
sample_rate_train = gr.Radio(choices=["40000", "48000"], value="40000", label="Sample Rate")
|
| 254 |
+
train_btn = gr.Button("π Start Real Training", variant="primary")
|
| 255 |
+
train_output = gr.Textbox(label="Training Status", lines=15)
|
| 256 |
+
train_btn.click(fn=trainer.train_model, inputs=[model_name_train, epochs_train, batch_size_train, sample_rate_train], outputs=train_output)
|
| 257 |
+
|
| 258 |
+
with gr.Tab("π₯ Step 4: Download"):
|
| 259 |
+
gr.Markdown("Download your trained RVC model")
|
| 260 |
+
model_name_download = gr.Textbox(label="Model Name", value="my_voice_model")
|
| 261 |
+
download_btn = gr.Button("π¦ Package Model", variant="primary")
|
| 262 |
+
download_file = gr.File(label="Download")
|
| 263 |
+
download_status = gr.Textbox(label="Status")
|
| 264 |
+
download_btn.click(fn=trainer.package_model, inputs=model_name_download, outputs=[download_file, download_status])
|
| 265 |
+
|
| 266 |
+
gr.Markdown("---\n### π Resources\n- [RVC Project](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)\n- [Weights.gg](https://weights.gg/)\n\n### β οΈ Important\n- Uses REAL RVC training\n- Models work on weights.gg\n- CPU training is VERY slow\n- Recommended: Google Colab with GPU")
|
| 267 |
|
| 268 |
if __name__ == "__main__":
|
| 269 |
demo.launch()
|