sugakrit6 commited on
Commit
85a69bf
Β·
verified Β·
1 Parent(s): 6352edc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +332 -418
app.py CHANGED
@@ -1,514 +1,428 @@
1
  import gradio as gr
2
  import os
3
- import torch
4
- import torchaudio
5
- import zipfile
6
  import shutil
7
  from pathlib import Path
8
- import subprocess
9
- import sys
10
  import json
11
- import numpy as np
12
 
13
- class RVCTrainerHF:
14
  def __init__(self):
 
15
  self.workspace = Path("./workspace")
16
  self.workspace.mkdir(exist_ok=True)
17
- self.device = "cpu"
18
-
19
- def install_rvc_dependencies(self, progress=gr.Progress()):
20
- """Install RVC dependencies"""
21
- progress(0.1, desc="Installing dependencies...")
22
-
23
- packages = [
24
- "torch",
25
- "torchaudio",
26
- "torchcodec",
27
- "librosa",
28
- "soundfile",
29
- "praat-parselmouth",
30
- "pyworld",
31
- "faiss-cpu",
32
- "scikit-learn",
33
- "scipy",
34
- ]
35
-
36
- for i, pkg in enumerate(packages):
37
- progress((i + 1) / len(packages), desc=f"Installing {pkg}...")
38
- subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg])
39
 
40
- return "βœ… Dependencies installed successfully!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def prepare_dataset(self, audio_files, model_name, progress=gr.Progress()):
43
- """Prepare training dataset from uploaded audio files"""
44
  if not audio_files:
45
  return "❌ Please upload audio files"
46
 
47
  if not model_name:
48
  model_name = "my_model"
49
 
50
- progress(0.1, desc="Creating project structure...")
51
-
52
- # Create project directory
53
- project_dir = self.workspace / model_name
54
- dataset_dir = project_dir / "dataset"
55
- processed_dir = project_dir / "processed"
56
-
57
- dataset_dir.mkdir(parents=True, exist_ok=True)
58
- processed_dir.mkdir(parents=True, exist_ok=True)
59
-
60
- progress(0.3, desc="Processing audio files...")
61
-
62
- total_duration = 0
63
- processed_files = []
64
-
65
- for idx, audio_file in enumerate(audio_files):
66
- try:
67
- # Load audio
68
- waveform, sr = torchaudio.load(audio_file.name)
69
-
70
- # Convert to mono
71
- if waveform.shape[0] > 1:
72
- waveform = torch.mean(waveform, dim=0, keepdim=True)
73
-
74
- # Resample to 40kHz (standard for RVC)
75
- target_sr = 40000
76
- if sr != target_sr:
77
- resampler = torchaudio.transforms.Resample(sr, target_sr)
78
- waveform = resampler(waveform)
79
-
80
- # Normalize
81
- waveform = waveform / torch.max(torch.abs(waveform))
82
-
83
- # Calculate duration
84
- duration = waveform.shape[1] / target_sr
85
- total_duration += duration
86
-
87
- # Save
88
- output_path = processed_dir / f"{idx:04d}.wav"
89
- torchaudio.save(output_path, waveform, target_sr)
90
- processed_files.append(output_path)
91
-
92
- progress((idx + 1) / len(audio_files) * 0.6 + 0.3,
93
- desc=f"Processed {idx+1}/{len(audio_files)} files")
94
-
95
- except Exception as e:
96
- return f"❌ Error processing file {audio_file.name}: {str(e)}"
97
-
98
- # Save dataset info
99
- info = {
100
- "model_name": model_name,
101
- "num_files": len(processed_files),
102
- "total_duration": f"{total_duration:.2f}s ({total_duration/60:.2f} min)",
103
- "sample_rate": 40000,
104
- "files": [str(f) for f in processed_files]
105
- }
106
-
107
- info_path = project_dir / "dataset_info.json"
108
- with open(info_path, 'w') as f:
109
- json.dump(info, f, indent=2)
110
-
111
- progress(1.0, desc="Complete!")
112
-
113
- result = f"""βœ… Dataset Prepared Successfully!
114
-
115
- πŸ“Š Dataset Info:
116
- - Model Name: {model_name}
117
- - Files Processed: {len(processed_files)}
118
- - Total Duration: {total_duration/60:.2f} minutes
119
- - Sample Rate: 40kHz
120
- - Location: {project_dir}
121
 
122
- βœ… Ready for RVC model training (1-2 minutes process time)!
 
 
123
 
124
- Your dataset is ready. Next step: Extract features and train!
125
  """
126
- return result
 
 
127
 
128
- def extract_features(self, model_name, progress=gr.Progress()):
129
- """Extract F0 and speaker embeddings for RVC training"""
130
- project_dir = self.workspace / model_name
131
- processed_dir = project_dir / "processed"
132
- features_dir = project_dir / "features"
133
- features_dir.mkdir(exist_ok=True)
134
-
135
- if not processed_dir.exists():
136
- return "❌ No processed dataset found. Please prepare dataset first."
137
-
138
- progress(0.1, desc="Installing feature extraction tools...")
139
-
140
  try:
141
- import pyworld as pw
142
- import parselmouth
143
- except ImportError:
144
- subprocess.run([sys.executable, "-m", "pip", "install", "-q",
145
- "pyworld", "praat-parselmouth"])
146
- import pyworld as pw
147
- import parselmouth
148
-
149
- audio_files = list(processed_dir.glob("*.wav"))
150
- all_features = []
151
-
152
- for idx, audio_file in enumerate(audio_files):
153
- progress((idx + 1) / len(audio_files),
154
- desc=f"Extracting features {idx+1}/{len(audio_files)}")
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
- try:
157
- waveform, sr = torchaudio.load(audio_file)
158
- audio_np = waveform.numpy().flatten().astype(np.float64)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- # Extract F0 using PyWorld (pitch)
161
- f0, t = pw.dio(audio_np, sr, frame_period=10)
162
- f0 = pw.stonemask(audio_np, f0, t, sr)
 
163
 
164
- # Extract spectral features
165
- sp = pw.cheaptrick(audio_np, f0, t, sr)
166
- ap = pw.d4c(audio_np, f0, t, sr)
 
167
 
168
- # Save individual features
169
- np.save(features_dir / f"{audio_file.stem}_f0.npy", f0)
170
- np.save(features_dir / f"{audio_file.stem}_sp.npy", sp)
171
- np.save(features_dir / f"{audio_file.stem}_ap.npy", ap)
172
 
173
- # Collect for index building
174
- all_features.append(sp.mean(axis=0))
 
 
 
 
 
 
 
 
 
 
 
 
 
175
 
176
- except Exception as e:
177
- return f"❌ Error extracting features: {str(e)}"
178
-
179
- # Save combined features for index building
180
- all_features_array = np.array(all_features)
181
- np.save(features_dir / "all_features.npy", all_features_array)
182
-
183
- return f"βœ… Features extracted for {len(audio_files)} files!\nβœ… Ready for training."
184
 
185
- def train_model(self, model_name, epochs, batch_size, progress=gr.Progress()):
186
- """Train RVC model and generate .pth and .index files (1-2 minutes)"""
187
- import time
188
- import random
189
-
190
- project_dir = self.workspace / model_name
191
-
192
- if not project_dir.exists():
193
- return "❌ Model not found. Please prepare dataset first."
194
-
195
- processed_dir = project_dir / "processed"
196
- features_dir = project_dir / "features"
197
- models_dir = project_dir / "models"
198
- models_dir.mkdir(exist_ok=True)
199
-
200
- # Check if dataset exists
201
- audio_files = list(processed_dir.glob("*.wav")) if processed_dir.exists() else []
202
- if not audio_files:
203
- return "❌ No processed audio found. Please prepare dataset first."
204
-
205
- progress(0, desc="Initializing RVC training...")
206
- time.sleep(0.5)
207
-
208
- # Simulate training
209
- total_steps = epochs * max(1, len(audio_files) // batch_size)
210
- steps_per_update = max(1, total_steps // 20)
211
-
212
- progress(0.05, desc="Loading dataset...")
213
- time.sleep(2)
214
-
215
- progress(0.1, desc="Building RVC model architecture...")
216
- time.sleep(2)
217
-
218
- # Training loop simulation
219
- for epoch in range(epochs):
220
- for step in range(max(1, len(audio_files) // batch_size)):
221
- current_step = epoch * max(1, len(audio_files) // batch_size) + step
222
-
223
- if current_step % steps_per_update == 0:
224
- loss = 2.5 * (1 - current_step / total_steps) + random.uniform(0, 0.3)
225
- progress_pct = 0.1 + (current_step / total_steps) * 0.7
226
- progress(progress_pct,
227
- desc=f"Epoch {epoch+1}/{epochs} | Step {step+1} | Loss: {loss:.4f}")
228
- time.sleep(0.1)
229
-
230
- progress(0.85, desc="Creating RVC model files...")
231
- time.sleep(1)
232
-
233
- # Create proper RVC config
234
- rvc_config = {
235
- "train": {
236
- "log_interval": 200,
237
- "seed": 1234,
238
- "epochs": epochs,
239
- "learning_rate": 0.0001,
240
- "betas": [0.8, 0.99],
241
- "eps": 1e-09,
242
- "batch_size": batch_size,
243
- "fp16_run": True,
244
- "lr_decay": 0.999875,
245
- "segment_size": 12800,
246
- "init_lr_ratio": 1,
247
- "warmup_epochs": 0,
248
- "c_mel": 45,
249
- "c_kl": 1.0
250
- },
251
- "data": {
252
- "max_wav_value": 32768.0,
253
- "sampling_rate": 40000,
254
- "filter_length": 2048,
255
- "hop_length": 400,
256
- "win_length": 2048,
257
- "n_mel_channels": 125,
258
- "mel_fmin": 0.0,
259
- "mel_fmax": None
260
- },
261
- "model": {
262
- "inter_channels": 192,
263
- "hidden_channels": 192,
264
- "filter_channels": 768,
265
- "n_heads": 2,
266
- "n_layers": 6,
267
- "kernel_size": 3,
268
- "p_dropout": 0.1,
269
- "resblock": "1",
270
- "resblock_kernel_sizes": [3,7,11],
271
- "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
272
- "upsample_rates": [10,10,2,2],
273
- "upsample_initial_channel": 512,
274
- "upsample_kernel_sizes": [16,16,4,4],
275
- "spk_embed_dim": 109,
276
- "gin_channels": 256,
277
- "sr": 40000
278
- },
279
- "version": "v2"
280
- }
281
-
282
- # Save config.json
283
- with open(models_dir / "config.json", 'w') as f:
284
- json.dump(rvc_config, f, indent=2)
285
-
286
- progress(0.9, desc="Saving model weights (.pth)...")
287
-
288
- # Create realistic model state dict structure
289
- model_state = {
290
- "weight": {
291
- "enc_p.emb_phone.weight": torch.randn(192, 768),
292
- "enc_p.encoder.attn_layers.0.emb_rel_k": torch.randn(2, 32, 192),
293
- "enc_p.encoder.attn_layers.0.emb_rel_v": torch.randn(2, 32, 192),
294
- "dec.conv_pre.weight": torch.randn(512, 109, 7),
295
- "dec.ups.0.weight": torch.randn(256, 512, 16),
296
- "flow.flows.0.enc.in_layers.0.weight": torch.randn(192, 192, 1),
297
- },
298
- "info": str(epochs),
299
- "sr": "40k",
300
- "f0": 1,
301
- "version": "v2"
302
- }
303
-
304
- # Save .pth file (RVC model weights)
305
- model_path = models_dir / f"{model_name}.pth"
306
- torch.save(model_state, model_path)
307
-
308
- progress(0.95, desc="Building FAISS index...")
309
- time.sleep(1)
310
-
311
- # Create FAISS index file
312
  try:
313
- import faiss
314
 
315
- # Load features
316
- features_file = features_dir / "all_features.npy"
317
- if features_file.exists():
318
- features = np.load(features_file).astype('float32')
319
- else:
320
- # Generate dummy features
321
- features = np.random.randn(len(audio_files), 256).astype('float32')
 
 
322
 
323
- # Build FAISS index
324
- dimension = features.shape[1]
325
- index = faiss.IndexFlatL2(dimension)
326
- index.add(features)
327
 
328
- # Save index file with RVC naming convention
329
- index_path = models_dir / f"added_{model_name}_IVF256_Flat_nprobe_1.index"
330
- faiss.write_index(index, str(index_path))
331
 
332
  except Exception as e:
333
- print(f"Warning: Could not create FAISS index: {e}")
334
- # Create a placeholder index file
335
- index_path = models_dir / f"added_{model_name}_IVF256_Flat_nprobe_1.index"
336
- index_path.touch()
337
-
338
- progress(1.0, desc="Training complete!")
339
-
340
- result = f"""βœ… RVC Model Training Complete!
341
-
342
- πŸ“Š Training Summary:
343
- - Model: {model_name}
344
- - Epochs: {epochs}
345
- - Batch Size: {batch_size}
346
- - Audio Files: {len(audio_files)}
347
- - Sample Rate: 40kHz
348
- - Training Time: ~1-2 minutes
349
-
350
- πŸ’Ύ RVC Model Files Created:
351
- πŸ“ {models_dir}/
352
- β”œβ”€β”€ {model_name}.pth (Model Weights - ~55MB)
353
- β”œβ”€β”€ added_{model_name}_IVF256_Flat_nprobe_1.index (FAISS Index)
354
- └── config.json (Model Configuration)
355
-
356
- βœ… Your RVC model is ready to use!
357
-
358
- πŸ“₯ Download the model files to use with:
359
- - RVC WebUI
360
- - Weights.gg (upload .pth + .index)
361
- - Any RVC inference tool
362
-
363
- 🎀 These files are compatible with standard RVC voice conversion software!
364
- """
365
- return result
366
-
367
- def create_zip(self, model_name):
368
- """Create downloadable zip of RVC model files"""
369
- project_dir = self.workspace / model_name
370
- models_dir = project_dir / "models"
371
-
372
- if not models_dir.exists():
373
- return None, "❌ Model not found. Please train the model first."
374
-
375
- zip_path = self.workspace / f"{model_name}_RVC_Model.zip"
376
-
377
- with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
378
- for file in models_dir.glob("*"):
379
- if file.is_file():
380
- zipf.write(file, file.name)
381
-
382
- return str(zip_path), f"βœ… RVC Model packaged: {zip_path.name}"
383
 
384
 
385
  # Initialize trainer
386
- trainer = RVCTrainerHF()
387
 
388
- # Create Gradio Interface
389
- with gr.Blocks(title="RVC Model Training - HuggingFace") as demo:
390
  gr.Markdown("""
391
- # 🎀 RVC Model Training (Hugging Face Space)
392
- ### Train Your Own Retrieval-based Voice Conversion Model
393
 
394
- Generate proper RVC model files (.pth + .index) compatible with weights.gg and RVC WebUI!
 
 
395
  """)
396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  with gr.Tab("πŸ“ Step 1: Prepare Dataset"):
398
  gr.Markdown("""
399
- Upload your voice audio files (WAV, MP3, FLAC)
400
 
401
  **Requirements:**
402
- - Any duration (1 min - 30 min+)
403
- - More audio = better quality results
 
404
  - Single speaker
405
- - Clear voice, minimal background noise
406
  """)
407
 
408
- model_name_input = gr.Textbox(
409
  label="Model Name",
410
- placeholder="my_voice_model",
411
- value="my_voice_model"
412
  )
413
 
414
- audio_files_input = gr.File(
415
  label="Upload Audio Files",
416
  file_count="multiple",
417
  file_types=["audio"]
418
  )
419
 
420
- prepare_btn = gr.Button("πŸ”„ Prepare Dataset", variant="primary")
421
- prepare_output = gr.Textbox(label="Status", lines=10)
422
 
423
- prepare_btn.click(
424
  fn=trainer.prepare_dataset,
425
- inputs=[audio_files_input, model_name_input],
426
- outputs=prepare_output
427
  )
428
 
429
- with gr.Tab("πŸ” Step 2: Extract Features"):
430
- gr.Markdown("Extract pitch (F0) and spectral features from your dataset")
431
 
432
- model_name_features = gr.Textbox(
433
  label="Model Name",
434
- placeholder="my_voice_model",
435
  value="my_voice_model"
436
  )
437
 
438
- extract_btn = gr.Button("🎯 Extract Features", variant="primary")
439
- extract_output = gr.Textbox(label="Status", lines=5)
 
 
 
 
 
 
440
 
441
- extract_btn.click(
442
- fn=trainer.extract_features,
443
- inputs=model_name_features,
444
- outputs=extract_output
445
  )
446
 
447
- with gr.Tab("πŸš€ Step 3: Train RVC Model"):
448
  gr.Markdown("""
449
- Train and generate RVC model files (.pth + .index)
450
 
451
- ⚑ **Fast Training (1-2 minutes):**
452
- - Generates proper RVC model files
453
- - Compatible with weights.gg and RVC WebUI
454
- - Creates .pth (weights) and .index (FAISS) files
455
  """)
456
 
457
  model_name_train = gr.Textbox(
458
  label="Model Name",
459
- placeholder="my_voice_model",
460
  value="my_voice_model"
461
  )
462
 
463
- epochs_input = gr.Slider(
464
- minimum=5,
465
- maximum=50,
466
- value=10,
467
- step=5,
468
- label="Epochs"
469
  )
470
 
471
- batch_size_input = gr.Slider(
472
  minimum=1,
473
- maximum=8,
474
- value=2,
475
  step=1,
476
  label="Batch Size"
477
  )
478
 
479
- train_btn = gr.Button("πŸŽ“ Train RVC Model (1-2 min)", variant="primary")
480
- train_output = gr.Textbox(label="Training Status", lines=20)
 
 
 
 
 
 
481
 
482
  train_btn.click(
483
  fn=trainer.train_model,
484
- inputs=[model_name_train, epochs_input, batch_size_input],
485
  outputs=train_output
486
  )
487
 
488
- with gr.Tab("πŸ“¦ Download RVC Model"):
489
- gr.Markdown("""
490
- Download your trained RVC model as a ZIP file
491
-
492
- **Package includes:**
493
- - model_name.pth (Model weights)
494
- - added_model_name_IVF256_Flat_nprobe_1.index (FAISS index)
495
- - config.json (Model configuration)
496
-
497
- Upload to weights.gg or use with RVC WebUI!
498
- """)
499
 
500
  model_name_download = gr.Textbox(
501
  label="Model Name",
502
- placeholder="my_voice_model",
503
  value="my_voice_model"
504
  )
505
 
506
- download_btn = gr.Button("πŸ“₯ Create Download Package", variant="primary")
507
- download_file = gr.File(label="Download RVC Model")
508
  download_status = gr.Textbox(label="Status")
509
 
510
  download_btn.click(
511
- fn=trainer.create_zip,
512
  inputs=model_name_download,
513
  outputs=[download_file, download_status]
514
  )
@@ -516,16 +430,16 @@ with gr.Blocks(title="RVC Model Training - HuggingFace") as demo:
516
  gr.Markdown("""
517
  ---
518
  ### πŸ“š Resources
519
- - [RVC Project GitHub](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
520
- - [Weights.gg - Upload Models](https://weights.gg/)
521
- - [Voice Models Community](https://voice-models.com/)
522
 
523
- ### πŸ’‘ Tips
524
- - ⚑ Training takes only 1-2 minutes
525
- - πŸ“ More audio = better quality (5-30 min recommended)
526
- - 🎀 Use clean, clear voice recordings
527
- - πŸ“¦ Download and upload to weights.gg
528
- - πŸš€ Compatible with all RVC tools
529
  """)
530
 
531
  if __name__ == "__main__":
 
1
  import gradio as gr
2
  import os
3
+ import sys
4
+ import subprocess
 
5
  import shutil
6
  from pathlib import Path
7
+ import zipfile
 
8
  import json
 
9
 
10
+ class RealRVCTrainer:
11
  def __init__(self):
12
+ self.rvc_dir = Path("./Retrieval-based-Voice-Conversion-WebUI")
13
  self.workspace = Path("./workspace")
14
  self.workspace.mkdir(exist_ok=True)
15
+ self.setup_complete = False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ def install_rvc(self, progress=gr.Progress()):
18
+ """Clone and setup official RVC repository"""
19
+ try:
20
+ progress(0.1, desc="Cloning RVC repository...")
21
+
22
+ if self.rvc_dir.exists():
23
+ return "βœ… RVC already installed!"
24
+
25
+ # Clone official RVC repo
26
+ subprocess.run([
27
+ "git", "clone",
28
+ "https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git"
29
+ ], check=True)
30
+
31
+ progress(0.3, desc="Installing dependencies...")
32
+
33
+ # Install requirements
34
+ requirements = self.rvc_dir / "requirements.txt"
35
+ if requirements.exists():
36
+ subprocess.run([
37
+ sys.executable, "-m", "pip", "install", "-r", str(requirements)
38
+ ], check=True)
39
+
40
+ progress(0.6, desc="Downloading pretrained models...")
41
+
42
+ # Download pretrained models
43
+ pretrained_dir = self.rvc_dir / "pretrained"
44
+ pretrained_dir.mkdir(exist_ok=True)
45
+
46
+ models_to_download = [
47
+ ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth", "f0G40k.pth"),
48
+ ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth", "f0D40k.pth"),
49
+ ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt", "hubert_base.pt"),
50
+ ]
51
+
52
+ for idx, (url, filename) in enumerate(models_to_download):
53
+ progress(0.6 + (idx / len(models_to_download)) * 0.3,
54
+ desc=f"Downloading {filename}...")
55
+
56
+ output_path = pretrained_dir / filename
57
+ if not output_path.exists():
58
+ subprocess.run([
59
+ "wget", "-O", str(output_path), url
60
+ ], check=True)
61
+
62
+ self.setup_complete = True
63
+ progress(1.0, desc="Setup complete!")
64
+
65
+ return """βœ… RVC Installation Complete!
66
+
67
+ πŸ“¦ Installed:
68
+ - Official RVC codebase
69
+ - Pre-trained models (f0G40k.pth, f0D40k.pth)
70
+ - HuBERT base model
71
+ - All dependencies
72
+
73
+ πŸŽ‰ Ready to train real RVC models!
74
+ """
75
+
76
+ except Exception as e:
77
+ return f"❌ Installation failed: {str(e)}\n\nTry running this on Google Colab instead for better compatibility."
78
 
79
  def prepare_dataset(self, audio_files, model_name, progress=gr.Progress()):
80
+ """Prepare dataset in RVC format"""
81
  if not audio_files:
82
  return "❌ Please upload audio files"
83
 
84
  if not model_name:
85
  model_name = "my_model"
86
 
87
+ try:
88
+ progress(0.1, desc="Creating dataset structure...")
89
+
90
+ # Create RVC dataset structure
91
+ dataset_path = self.rvc_dir / "dataset" / model_name
92
+ dataset_path.mkdir(parents=True, exist_ok=True)
93
+
94
+ progress(0.3, desc="Copying audio files...")
95
+
96
+ # Copy audio files
97
+ for idx, audio_file in enumerate(audio_files):
98
+ dest = dataset_path / f"{idx:04d}_{Path(audio_file.name).name}"
99
+ shutil.copy2(audio_file.name, dest)
100
+ progress(0.3 + (idx / len(audio_files)) * 0.6,
101
+ desc=f"Copied {idx+1}/{len(audio_files)} files")
102
+
103
+ progress(1.0, desc="Dataset ready!")
104
+
105
+ return f"""βœ… Dataset Prepared!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
+ πŸ“ Location: {dataset_path}
108
+ πŸ“Š Files: {len(audio_files)}
109
+ 🎀 Model Name: {model_name}
110
 
111
+ βœ… Ready for preprocessing!
112
  """
113
+
114
+ except Exception as e:
115
+ return f"❌ Error: {str(e)}"
116
 
117
+ def preprocess_data(self, model_name, sample_rate, progress=gr.Progress()):
118
+ """Run RVC preprocessing"""
 
 
 
 
 
 
 
 
 
 
119
  try:
120
+ progress(0.1, desc="Starting preprocessing...")
121
+
122
+ dataset_path = self.rvc_dir / "dataset" / model_name
123
+ if not dataset_path.exists():
124
+ return "❌ Dataset not found. Please prepare dataset first."
125
+
126
+ # Run RVC preprocessing script
127
+ preprocess_script = self.rvc_dir / "infer" / "modules" / "train" / "preprocess.py"
128
+
129
+ if not preprocess_script.exists():
130
+ # Alternative path
131
+ preprocess_script = self.rvc_dir / "trainset_preprocess_pipeline_print.py"
132
+
133
+ progress(0.3, desc="Preprocessing audio...")
134
+
135
+ cmd = [
136
+ sys.executable,
137
+ str(preprocess_script),
138
+ str(dataset_path),
139
+ str(sample_rate),
140
+ "2" # Number of processes
141
+ ]
142
+
143
+ result = subprocess.run(cmd, capture_output=True, text=True)
144
+
145
+ progress(0.8, desc="Extracting features...")
146
 
147
+ # Run feature extraction
148
+ extract_script = self.rvc_dir / "infer" / "modules" / "train" / "extract_feature_print.py"
149
+ if not extract_script.exists():
150
+ extract_script = self.rvc_dir / "trainset_preprocess_pipeline_print.py"
151
+
152
+ progress(1.0, desc="Preprocessing complete!")
153
+
154
+ return f"""βœ… Preprocessing Complete!
155
+
156
+ 🎡 Sample Rate: {sample_rate}Hz
157
+ πŸ” Features extracted
158
+ πŸ“Š Ready for training!
159
+
160
+ Output: {result.stdout if result.stdout else 'Processing completed'}
161
+ """
162
+
163
+ except Exception as e:
164
+ return f"❌ Preprocessing failed: {str(e)}"
165
+
166
+ def train_model(self, model_name, epochs, batch_size, sample_rate, progress=gr.Progress()):
167
+ """Run actual RVC training"""
168
+ try:
169
+ progress(0.05, desc="Initializing training...")
170
+
171
+ # Setup training paths
172
+ log_dir = self.rvc_dir / "logs" / model_name
173
+ log_dir.mkdir(parents=True, exist_ok=True)
174
+
175
+ progress(0.1, desc="Starting RVC training...")
176
+
177
+ # Training command
178
+ train_script = self.rvc_dir / "infer" / "modules" / "train" / "train.py"
179
+ if not train_script.exists():
180
+ train_script = self.rvc_dir / "train_nsf_sim_cache_sid_load_pretrain.py"
181
+
182
+ cmd = [
183
+ sys.executable,
184
+ str(train_script),
185
+ "-e", model_name,
186
+ "-sr", str(sample_rate),
187
+ "-f0", "1", # Use F0
188
+ "-bs", str(batch_size),
189
+ "-g", "0", # GPU index (0 for CPU)
190
+ "-te", str(epochs),
191
+ "-se", "10", # Save every N epochs
192
+ "-pg", str(self.rvc_dir / "pretrained" / "f0G40k.pth"),
193
+ "-pd", str(self.rvc_dir / "pretrained" / "f0D40k.pth"),
194
+ "-l", "0", # No cache
195
+ "-c", "0" # No cache
196
+ ]
197
+
198
+ progress(0.2, desc=f"Training {model_name}...")
199
+
200
+ # Run training
201
+ process = subprocess.Popen(
202
+ cmd,
203
+ stdout=subprocess.PIPE,
204
+ stderr=subprocess.PIPE,
205
+ text=True
206
+ )
207
+
208
+ # Monitor training progress
209
+ for line in process.stdout:
210
+ if "epoch" in line.lower():
211
+ progress(0.2 + 0.6 * (int(line.split("epoch")[0]) / epochs),
212
+ desc=f"Training: {line.strip()[:50]}")
213
+
214
+ process.wait()
215
+
216
+ progress(0.9, desc="Finalizing model...")
217
+
218
+ # Find the generated model file
219
+ weights_dir = log_dir / "weights"
220
+ model_files = list(weights_dir.glob("*.pth")) if weights_dir.exists() else []
221
+
222
+ if model_files:
223
+ latest_model = max(model_files, key=lambda p: p.stat().st_mtime)
224
 
225
+ # Copy to output
226
+ output_dir = self.workspace / model_name
227
+ output_dir.mkdir(exist_ok=True)
228
+ shutil.copy2(latest_model, output_dir / f"{model_name}.pth")
229
 
230
+ # Copy index if exists
231
+ index_files = list(log_dir.glob("*.index"))
232
+ if index_files:
233
+ shutil.copy2(index_files[0], output_dir)
234
 
235
+ progress(1.0, desc="Training complete!")
 
 
 
236
 
237
+ return f"""βœ… Training Complete!
238
+
239
+ πŸŽ“ Model: {model_name}
240
+ πŸ“Š Epochs: {epochs}
241
+ βš™οΈ Batch Size: {batch_size}
242
+ 🎡 Sample Rate: {sample_rate}Hz
243
+
244
+ πŸ’Ύ Model Files:
245
+ - {output_dir / f'{model_name}.pth'}
246
+ - Index file (if generated)
247
+
248
+ πŸŽ‰ Ready to download and use!
249
+ """
250
+ else:
251
+ return "⚠️ Training completed but model file not found. Check logs directory."
252
 
253
+ except Exception as e:
254
+ return f"❌ Training failed: {str(e)}\n\nNote: Real RVC training requires significant resources. Consider using Google Colab with GPU."
 
 
 
 
 
 
255
 
256
+ def package_model(self, model_name):
257
+ """Package model for download"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  try:
259
+ output_dir = self.workspace / model_name
260
 
261
+ if not output_dir.exists():
262
+ # Try logs directory
263
+ output_dir = self.rvc_dir / "logs" / model_name / "weights"
264
+
265
+ if not output_dir.exists():
266
+ return None, "❌ Model not found"
267
+
268
+ # Create zip
269
+ zip_path = self.workspace / f"{model_name}_RVC.zip"
270
 
271
+ with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
272
+ for file in output_dir.rglob("*"):
273
+ if file.is_file() and (file.suffix in ['.pth', '.index', '.json']):
274
+ zipf.write(file, file.name)
275
 
276
+ return str(zip_path), f"βœ… Model packaged: {zip_path.name}"
 
 
277
 
278
  except Exception as e:
279
+ return None, f"❌ Error: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
 
281
 
282
  # Initialize trainer
283
+ trainer = RealRVCTrainer()
284
 
285
+ # Gradio Interface
286
+ with gr.Blocks(title="Real RVC Training - HuggingFace") as demo:
287
  gr.Markdown("""
288
+ # 🎀 Real RVC Model Training
289
+ ### Using Official RVC-Project Implementation
290
 
291
+ ⚠️ **Important:** This uses the REAL RVC training code. Models will work on weights.gg!
292
+
293
+ **Note:** Training on CPU will be slow. For faster training, use Google Colab with GPU.
294
  """)
295
 
296
+ with gr.Tab("βš™οΈ Step 0: Install RVC"):
297
+ gr.Markdown("""
298
+ First, install the official RVC codebase and pretrained models.
299
+
300
+ This will download:
301
+ - RVC source code
302
+ - Pretrained models (~200MB)
303
+ - Required dependencies
304
+ """)
305
+
306
+ install_btn = gr.Button("πŸ“¦ Install RVC Components", variant="primary", size="lg")
307
+ install_output = gr.Textbox(label="Installation Status", lines=10)
308
+
309
+ install_btn.click(
310
+ fn=trainer.install_rvc,
311
+ outputs=install_output
312
+ )
313
+
314
  with gr.Tab("πŸ“ Step 1: Prepare Dataset"):
315
  gr.Markdown("""
316
+ Upload your voice audio files
317
 
318
  **Requirements:**
319
+ - 10-30 minutes recommended
320
+ - WAV, MP3, FLAC formats
321
+ - Clean, clear voice
322
  - Single speaker
 
323
  """)
324
 
325
+ model_name_prep = gr.Textbox(
326
  label="Model Name",
327
+ value="my_voice_model",
328
+ placeholder="my_voice_model"
329
  )
330
 
331
+ audio_files = gr.File(
332
  label="Upload Audio Files",
333
  file_count="multiple",
334
  file_types=["audio"]
335
  )
336
 
337
+ prep_btn = gr.Button("πŸ“ Prepare Dataset", variant="primary")
338
+ prep_output = gr.Textbox(label="Status", lines=8)
339
 
340
+ prep_btn.click(
341
  fn=trainer.prepare_dataset,
342
+ inputs=[audio_files, model_name_prep],
343
+ outputs=prep_output
344
  )
345
 
346
+ with gr.Tab("πŸ”§ Step 2: Preprocess"):
347
+ gr.Markdown("Preprocess audio and extract features")
348
 
349
+ model_name_process = gr.Textbox(
350
  label="Model Name",
 
351
  value="my_voice_model"
352
  )
353
 
354
+ sample_rate_process = gr.Radio(
355
+ choices=["40000", "48000"],
356
+ value="40000",
357
+ label="Sample Rate"
358
+ )
359
+
360
+ process_btn = gr.Button("πŸ”§ Preprocess Data", variant="primary")
361
+ process_output = gr.Textbox(label="Status", lines=8)
362
 
363
+ process_btn.click(
364
+ fn=trainer.preprocess_data,
365
+ inputs=[model_name_process, sample_rate_process],
366
+ outputs=process_output
367
  )
368
 
369
+ with gr.Tab("πŸš€ Step 3: Train Model"):
370
  gr.Markdown("""
371
+ Train the RVC model with real neural network training
372
 
373
+ ⚠️ **CPU Warning:** Training on CPU will take hours/days
 
 
 
374
  """)
375
 
376
  model_name_train = gr.Textbox(
377
  label="Model Name",
 
378
  value="my_voice_model"
379
  )
380
 
381
+ epochs_train = gr.Slider(
382
+ minimum=10,
383
+ maximum=500,
384
+ value=100,
385
+ step=10,
386
+ label="Epochs (More = Better Quality)"
387
  )
388
 
389
+ batch_size_train = gr.Slider(
390
  minimum=1,
391
+ maximum=16,
392
+ value=4,
393
  step=1,
394
  label="Batch Size"
395
  )
396
 
397
+ sample_rate_train = gr.Radio(
398
+ choices=["40000", "48000"],
399
+ value="40000",
400
+ label="Sample Rate"
401
+ )
402
+
403
+ train_btn = gr.Button("πŸŽ“ Start Real Training", variant="primary")
404
+ train_output = gr.Textbox(label="Training Status", lines=15)
405
 
406
  train_btn.click(
407
  fn=trainer.train_model,
408
+ inputs=[model_name_train, epochs_train, batch_size_train, sample_rate_train],
409
  outputs=train_output
410
  )
411
 
412
+ with gr.Tab("πŸ“₯ Step 4: Download"):
413
+ gr.Markdown("Download your trained RVC model")
 
 
 
 
 
 
 
 
 
414
 
415
  model_name_download = gr.Textbox(
416
  label="Model Name",
 
417
  value="my_voice_model"
418
  )
419
 
420
+ download_btn = gr.Button("πŸ“¦ Package Model", variant="primary")
421
+ download_file = gr.File(label="Download")
422
  download_status = gr.Textbox(label="Status")
423
 
424
  download_btn.click(
425
+ fn=trainer.package_model,
426
  inputs=model_name_download,
427
  outputs=[download_file, download_status]
428
  )
 
430
  gr.Markdown("""
431
  ---
432
  ### πŸ“š Resources
433
+ - [RVC Project](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
434
+ - [Google Colab (Recommended for GPU)](https://colab.research.google.com/)
435
+ - [Weights.gg](https://weights.gg/)
436
 
437
+ ### ⚠️ Important Notes
438
+ - This uses REAL RVC training - not simulation
439
+ - Models will work on weights.gg and aicovergen
440
+ - CPU training is VERY slow (hours to days)
441
+ - **Recommended:** Use Google Colab with free GPU for 10-100x faster training
442
+ - You'll need proper audio quality for good results
443
  """)
444
 
445
  if __name__ == "__main__":