Spaces:

sugakrit6
/

TrainRVC

Sleeping

App Files Files Community

sugakrit6 commited on Dec 14, 2025

Commit

85a69bf

verified ·

1 Parent(s): 6352edc

Update app.py

Browse files

Files changed (1) hide show

app.py +332 -418

app.py CHANGED Viewed

@@ -1,514 +1,428 @@
 import gradio as gr
 import os
-import torch
-import torchaudio
-import zipfile
 import shutil
 from pathlib import Path
-import subprocess
-import sys
 import json
-import numpy as np
-class RVCTrainerHF:
     def __init__(self):
         self.workspace = Path("./workspace")
         self.workspace.mkdir(exist_ok=True)
-        self.device = "cpu"
-    def install_rvc_dependencies(self, progress=gr.Progress()):
-        """Install RVC dependencies"""
-        progress(0.1, desc="Installing dependencies...")
-        packages = [
-            "torch",
-            "torchaudio",
-            "torchcodec",
-            "librosa",
-            "soundfile",
-            "praat-parselmouth",
-            "pyworld",
-            "faiss-cpu",
-            "scikit-learn",
-            "scipy",
-        ]
-        for i, pkg in enumerate(packages):
-            progress((i + 1) / len(packages), desc=f"Installing {pkg}...")
-            subprocess.run([sys.executable, "-m", "pip", "install", "-q", pkg])
-        return "✅ Dependencies installed successfully!"
     def prepare_dataset(self, audio_files, model_name, progress=gr.Progress()):
-        """Prepare training dataset from uploaded audio files"""
         if not audio_files:
             return "❌ Please upload audio files"
         if not model_name:
             model_name = "my_model"
-        progress(0.1, desc="Creating project structure...")
-        # Create project directory
-        project_dir = self.workspace / model_name
-        dataset_dir = project_dir / "dataset"
-        processed_dir = project_dir / "processed"
-        dataset_dir.mkdir(parents=True, exist_ok=True)
-        processed_dir.mkdir(parents=True, exist_ok=True)
-        progress(0.3, desc="Processing audio files...")
-        total_duration = 0
-        processed_files = []
-        for idx, audio_file in enumerate(audio_files):
-            try:
-                # Load audio
-                waveform, sr = torchaudio.load(audio_file.name)
-                # Convert to mono
-                if waveform.shape[0] > 1:
-                    waveform = torch.mean(waveform, dim=0, keepdim=True)
-                # Resample to 40kHz (standard for RVC)
-                target_sr = 40000
-                if sr != target_sr:
-                    resampler = torchaudio.transforms.Resample(sr, target_sr)
-                    waveform = resampler(waveform)
-                # Normalize
-                waveform = waveform / torch.max(torch.abs(waveform))
-                # Calculate duration
-                duration = waveform.shape[1] / target_sr
-                total_duration += duration
-                # Save
-                output_path = processed_dir / f"{idx:04d}.wav"
-                torchaudio.save(output_path, waveform, target_sr)
-                processed_files.append(output_path)
-                progress((idx + 1) / len(audio_files) * 0.6 + 0.3,
-                        desc=f"Processed {idx+1}/{len(audio_files)} files")
-            except Exception as e:
-                return f"❌ Error processing file {audio_file.name}: {str(e)}"
-        # Save dataset info
-        info = {
-            "model_name": model_name,
-            "num_files": len(processed_files),
-            "total_duration": f"{total_duration:.2f}s ({total_duration/60:.2f} min)",
-            "sample_rate": 40000,
-            "files": [str(f) for f in processed_files]
-        }
-        info_path = project_dir / "dataset_info.json"
-        with open(info_path, 'w') as f:
-            json.dump(info, f, indent=2)
-        progress(1.0, desc="Complete!")
-        result = f"""✅ Dataset Prepared Successfully!
-📊 Dataset Info:
-- Model Name: {model_name}
-- Files Processed: {len(processed_files)}
-- Total Duration: {total_duration/60:.2f} minutes
-- Sample Rate: 40kHz
-- Location: {project_dir}
-✅ Ready for RVC model training (1-2 minutes process time)!
-Your dataset is ready. Next step: Extract features and train!
 """
-        return result
-    def extract_features(self, model_name, progress=gr.Progress()):
-        """Extract F0 and speaker embeddings for RVC training"""
-        project_dir = self.workspace / model_name
-        processed_dir = project_dir / "processed"
-        features_dir = project_dir / "features"
-        features_dir.mkdir(exist_ok=True)
-        if not processed_dir.exists():
-            return "❌ No processed dataset found. Please prepare dataset first."
-        progress(0.1, desc="Installing feature extraction tools...")
         try:
-            import pyworld as pw
-            import parselmouth
-        except ImportError:
-            subprocess.run([sys.executable, "-m", "pip", "install", "-q",
-                          "pyworld", "praat-parselmouth"])
-            import pyworld as pw
-            import parselmouth
-        audio_files = list(processed_dir.glob("*.wav"))
-        all_features = []
-        for idx, audio_file in enumerate(audio_files):
-            progress((idx + 1) / len(audio_files),
-                    desc=f"Extracting features {idx+1}/{len(audio_files)}")
-            try:
-                waveform, sr = torchaudio.load(audio_file)
-                audio_np = waveform.numpy().flatten().astype(np.float64)
-                # Extract F0 using PyWorld (pitch)
-                f0, t = pw.dio(audio_np, sr, frame_period=10)
-                f0 = pw.stonemask(audio_np, f0, t, sr)
-                # Extract spectral features
-                sp = pw.cheaptrick(audio_np, f0, t, sr)
-                ap = pw.d4c(audio_np, f0, t, sr)
-                # Save individual features
-                np.save(features_dir / f"{audio_file.stem}_f0.npy", f0)
-                np.save(features_dir / f"{audio_file.stem}_sp.npy", sp)
-                np.save(features_dir / f"{audio_file.stem}_ap.npy", ap)
-                # Collect for index building
-                all_features.append(sp.mean(axis=0))
-            except Exception as e:
-                return f"❌ Error extracting features: {str(e)}"
-        # Save combined features for index building
-        all_features_array = np.array(all_features)
-        np.save(features_dir / "all_features.npy", all_features_array)
-        return f"✅ Features extracted for {len(audio_files)} files!\n✅ Ready for training."
-    def train_model(self, model_name, epochs, batch_size, progress=gr.Progress()):
-        """Train RVC model and generate .pth and .index files (1-2 minutes)"""
-        import time
-        import random
-        project_dir = self.workspace / model_name
-        if not project_dir.exists():
-            return "❌ Model not found. Please prepare dataset first."
-        processed_dir = project_dir / "processed"
-        features_dir = project_dir / "features"
-        models_dir = project_dir / "models"
-        models_dir.mkdir(exist_ok=True)
-        # Check if dataset exists
-        audio_files = list(processed_dir.glob("*.wav")) if processed_dir.exists() else []
-        if not audio_files:
-            return "❌ No processed audio found. Please prepare dataset first."
-        progress(0, desc="Initializing RVC training...")
-        time.sleep(0.5)
-        # Simulate training
-        total_steps = epochs * max(1, len(audio_files) // batch_size)
-        steps_per_update = max(1, total_steps // 20)
-        progress(0.05, desc="Loading dataset...")
-        time.sleep(2)
-        progress(0.1, desc="Building RVC model architecture...")
-        time.sleep(2)
-        # Training loop simulation
-        for epoch in range(epochs):
-            for step in range(max(1, len(audio_files) // batch_size)):
-                current_step = epoch * max(1, len(audio_files) // batch_size) + step
-                if current_step % steps_per_update == 0:
-                    loss = 2.5 * (1 - current_step / total_steps) + random.uniform(0, 0.3)
-                    progress_pct = 0.1 + (current_step / total_steps) * 0.7
-                    progress(progress_pct,
-                            desc=f"Epoch {epoch+1}/{epochs} | Step {step+1} | Loss: {loss:.4f}")
-                    time.sleep(0.1)
-        progress(0.85, desc="Creating RVC model files...")
-        time.sleep(1)
-        # Create proper RVC config
-        rvc_config = {
-            "train": {
-                "log_interval": 200,
-                "seed": 1234,
-                "epochs": epochs,
-                "learning_rate": 0.0001,
-                "betas": [0.8, 0.99],
-                "eps": 1e-09,
-                "batch_size": batch_size,
-                "fp16_run": True,
-                "lr_decay": 0.999875,
-                "segment_size": 12800,
-                "init_lr_ratio": 1,
-                "warmup_epochs": 0,
-                "c_mel": 45,
-                "c_kl": 1.0
-            },
-            "data": {
-                "max_wav_value": 32768.0,
-                "sampling_rate": 40000,
-                "filter_length": 2048,
-                "hop_length": 400,
-                "win_length": 2048,
-                "n_mel_channels": 125,
-                "mel_fmin": 0.0,
-                "mel_fmax": None
-            },
-            "model": {
-                "inter_channels": 192,
-                "hidden_channels": 192,
-                "filter_channels": 768,
-                "n_heads": 2,
-                "n_layers": 6,
-                "kernel_size": 3,
-                "p_dropout": 0.1,
-                "resblock": "1",
-                "resblock_kernel_sizes": [3,7,11],
-                "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
-                "upsample_rates": [10,10,2,2],
-                "upsample_initial_channel": 512,
-                "upsample_kernel_sizes": [16,16,4,4],
-                "spk_embed_dim": 109,
-                "gin_channels": 256,
-                "sr": 40000
-            },
-            "version": "v2"
-        }
-        # Save config.json
-        with open(models_dir / "config.json", 'w') as f:
-            json.dump(rvc_config, f, indent=2)
-        progress(0.9, desc="Saving model weights (.pth)...")
-        # Create realistic model state dict structure
-        model_state = {
-            "weight": {
-                "enc_p.emb_phone.weight": torch.randn(192, 768),
-                "enc_p.encoder.attn_layers.0.emb_rel_k": torch.randn(2, 32, 192),
-                "enc_p.encoder.attn_layers.0.emb_rel_v": torch.randn(2, 32, 192),
-                "dec.conv_pre.weight": torch.randn(512, 109, 7),
-                "dec.ups.0.weight": torch.randn(256, 512, 16),
-                "flow.flows.0.enc.in_layers.0.weight": torch.randn(192, 192, 1),
-            },
-            "info": str(epochs),
-            "sr": "40k",
-            "f0": 1,
-            "version": "v2"
-        }
-        # Save .pth file (RVC model weights)
-        model_path = models_dir / f"{model_name}.pth"
-        torch.save(model_state, model_path)
-        progress(0.95, desc="Building FAISS index...")
-        time.sleep(1)
-        # Create FAISS index file
         try:
-            import faiss
-            # Load features
-            features_file = features_dir / "all_features.npy"
-            if features_file.exists():
-                features = np.load(features_file).astype('float32')
-            else:
-                # Generate dummy features
-                features = np.random.randn(len(audio_files), 256).astype('float32')
-            # Build FAISS index
-            dimension = features.shape[1]
-            index = faiss.IndexFlatL2(dimension)
-            index.add(features)
-            # Save index file with RVC naming convention
-            index_path = models_dir / f"added_{model_name}_IVF256_Flat_nprobe_1.index"
-            faiss.write_index(index, str(index_path))
         except Exception as e:
-            print(f"Warning: Could not create FAISS index: {e}")
-            # Create a placeholder index file
-            index_path = models_dir / f"added_{model_name}_IVF256_Flat_nprobe_1.index"
-            index_path.touch()
-        progress(1.0, desc="Training complete!")
-        result = f"""✅ RVC Model Training Complete!
-📊 Training Summary:
-- Model: {model_name}
-- Epochs: {epochs}
-- Batch Size: {batch_size}
-- Audio Files: {len(audio_files)}
-- Sample Rate: 40kHz
-- Training Time: ~1-2 minutes
-💾 RVC Model Files Created:
-📁 {models_dir}/
-  ├── {model_name}.pth (Model Weights - ~55MB)
-  ├── added_{model_name}_IVF256_Flat_nprobe_1.index (FAISS Index)
-  └── config.json (Model Configuration)
-✅ Your RVC model is ready to use!
-📥 Download the model files to use with:
-- RVC WebUI
-- Weights.gg (upload .pth + .index)
-- Any RVC inference tool
-🎤 These files are compatible with standard RVC voice conversion software!
-"""
-        return result
-    def create_zip(self, model_name):
-        """Create downloadable zip of RVC model files"""
-        project_dir = self.workspace / model_name
-        models_dir = project_dir / "models"
-        if not models_dir.exists():
-            return None, "❌ Model not found. Please train the model first."
-        zip_path = self.workspace / f"{model_name}_RVC_Model.zip"
-        with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
-            for file in models_dir.glob("*"):
-                if file.is_file():
-                    zipf.write(file, file.name)
-        return str(zip_path), f"✅ RVC Model packaged: {zip_path.name}"
 # Initialize trainer
-trainer = RVCTrainerHF()
-# Create Gradio Interface
-with gr.Blocks(title="RVC Model Training - HuggingFace") as demo:
     gr.Markdown("""
-    # 🎤 RVC Model Training (Hugging Face Space)
-    ### Train Your Own Retrieval-based Voice Conversion Model
-    Generate proper RVC model files (.pth + .index) compatible with weights.gg and RVC WebUI!
     """)
     with gr.Tab("📁 Step 1: Prepare Dataset"):
         gr.Markdown("""
-        Upload your voice audio files (WAV, MP3, FLAC)
         **Requirements:**
-        - Any duration (1 min - 30 min+)
-        - More audio = better quality results
         - Single speaker
-        - Clear voice, minimal background noise
         """)
-        model_name_input = gr.Textbox(
             label="Model Name",
-            placeholder="my_voice_model",
-            value="my_voice_model"
         )
-        audio_files_input = gr.File(
             label="Upload Audio Files",
             file_count="multiple",
             file_types=["audio"]
         )
-        prepare_btn = gr.Button("🔄 Prepare Dataset", variant="primary")
-        prepare_output = gr.Textbox(label="Status", lines=10)
-        prepare_btn.click(
             fn=trainer.prepare_dataset,
-            inputs=[audio_files_input, model_name_input],
-            outputs=prepare_output
         )
-    with gr.Tab("🔍 Step 2: Extract Features"):
-        gr.Markdown("Extract pitch (F0) and spectral features from your dataset")
-        model_name_features = gr.Textbox(
             label="Model Name",
-            placeholder="my_voice_model",
             value="my_voice_model"
         )
-        extract_btn = gr.Button("🎯 Extract Features", variant="primary")
-        extract_output = gr.Textbox(label="Status", lines=5)
-        extract_btn.click(
-            fn=trainer.extract_features,
-            inputs=model_name_features,
-            outputs=extract_output
         )
-    with gr.Tab("🚀 Step 3: Train RVC Model"):
         gr.Markdown("""
-        Train and generate RVC model files (.pth + .index)
-        ⚡ **Fast Training (1-2 minutes):**
-        - Generates proper RVC model files
-        - Compatible with weights.gg and RVC WebUI
-        - Creates .pth (weights) and .index (FAISS) files
         """)
         model_name_train = gr.Textbox(
             label="Model Name",
-            placeholder="my_voice_model",
             value="my_voice_model"
         )
-        epochs_input = gr.Slider(
-            minimum=5,
-            maximum=50,
-            value=10,
-            step=5,
-            label="Epochs"
         )
-        batch_size_input = gr.Slider(
             minimum=1,
-            maximum=8,
-            value=2,
             step=1,
             label="Batch Size"
         )
-        train_btn = gr.Button("🎓 Train RVC Model (1-2 min)", variant="primary")
-        train_output = gr.Textbox(label="Training Status", lines=20)
         train_btn.click(
             fn=trainer.train_model,
-            inputs=[model_name_train, epochs_input, batch_size_input],
             outputs=train_output
         )
-    with gr.Tab("📦 Download RVC Model"):
-        gr.Markdown("""
-        Download your trained RVC model as a ZIP file
-        **Package includes:**
-        - model_name.pth (Model weights)
-        - added_model_name_IVF256_Flat_nprobe_1.index (FAISS index)
-        - config.json (Model configuration)
-        Upload to weights.gg or use with RVC WebUI!
-        """)
         model_name_download = gr.Textbox(
             label="Model Name",
-            placeholder="my_voice_model",
             value="my_voice_model"
         )
-        download_btn = gr.Button("📥 Create Download Package", variant="primary")
-        download_file = gr.File(label="Download RVC Model")
         download_status = gr.Textbox(label="Status")
         download_btn.click(
-            fn=trainer.create_zip,
             inputs=model_name_download,
             outputs=[download_file, download_status]
         )
@@ -516,16 +430,16 @@ with gr.Blocks(title="RVC Model Training - HuggingFace") as demo:
     gr.Markdown("""
     ---
     ### 📚 Resources
-    - [RVC Project GitHub](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
-    - [Weights.gg - Upload Models](https://weights.gg/)
-    - [Voice Models Community](https://voice-models.com/)
-    ### 💡 Tips
-    - ⚡ Training takes only 1-2 minutes
-    - 📁 More audio = better quality (5-30 min recommended)
-    - 🎤 Use clean, clear voice recordings
-    - 📦 Download and upload to weights.gg
-    - 🚀 Compatible with all RVC tools
     """)
 if __name__ == "__main__":

 import gradio as gr
 import os
+import sys
+import subprocess
 import shutil
 from pathlib import Path
+import zipfile
 import json
+class RealRVCTrainer:
     def __init__(self):
+        self.rvc_dir = Path("./Retrieval-based-Voice-Conversion-WebUI")
         self.workspace = Path("./workspace")
         self.workspace.mkdir(exist_ok=True)
+        self.setup_complete = False
+    def install_rvc(self, progress=gr.Progress()):
+        """Clone and setup official RVC repository"""
+        try:
+            progress(0.1, desc="Cloning RVC repository...")
+            if self.rvc_dir.exists():
+                return "✅ RVC already installed!"
+            # Clone official RVC repo
+            subprocess.run([
+                "git", "clone",
+                "https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI.git"
+            ], check=True)
+            progress(0.3, desc="Installing dependencies...")
+            # Install requirements
+            requirements = self.rvc_dir / "requirements.txt"
+            if requirements.exists():
+                subprocess.run([
+                    sys.executable, "-m", "pip", "install", "-r", str(requirements)
+                ], check=True)
+            progress(0.6, desc="Downloading pretrained models...")
+            # Download pretrained models
+            pretrained_dir = self.rvc_dir / "pretrained"
+            pretrained_dir.mkdir(exist_ok=True)
+            models_to_download = [
+                ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0G40k.pth", "f0G40k.pth"),
+                ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/pretrained/f0D40k.pth", "f0D40k.pth"),
+                ("https://huggingface.co/lj1995/VoiceConversionWebUI/resolve/main/hubert_base.pt", "hubert_base.pt"),
+            ]
+            for idx, (url, filename) in enumerate(models_to_download):
+                progress(0.6 + (idx / len(models_to_download)) * 0.3,
+                        desc=f"Downloading {filename}...")
+                output_path = pretrained_dir / filename
+                if not output_path.exists():
+                    subprocess.run([
+                        "wget", "-O", str(output_path), url
+                    ], check=True)
+            self.setup_complete = True
+            progress(1.0, desc="Setup complete!")
+            return """✅ RVC Installation Complete!
+📦 Installed:
+- Official RVC codebase
+- Pre-trained models (f0G40k.pth, f0D40k.pth)
+- HuBERT base model
+- All dependencies
+🎉 Ready to train real RVC models!
+"""
+        except Exception as e:
+            return f"❌ Installation failed: {str(e)}\n\nTry running this on Google Colab instead for better compatibility."
     def prepare_dataset(self, audio_files, model_name, progress=gr.Progress()):
+        """Prepare dataset in RVC format"""
         if not audio_files:
             return "❌ Please upload audio files"
         if not model_name:
             model_name = "my_model"
+        try:
+            progress(0.1, desc="Creating dataset structure...")
+            # Create RVC dataset structure
+            dataset_path = self.rvc_dir / "dataset" / model_name
+            dataset_path.mkdir(parents=True, exist_ok=True)
+            progress(0.3, desc="Copying audio files...")
+            # Copy audio files
+            for idx, audio_file in enumerate(audio_files):
+                dest = dataset_path / f"{idx:04d}_{Path(audio_file.name).name}"
+                shutil.copy2(audio_file.name, dest)
+                progress(0.3 + (idx / len(audio_files)) * 0.6,
+                        desc=f"Copied {idx+1}/{len(audio_files)} files")
+            progress(1.0, desc="Dataset ready!")
+            return f"""✅ Dataset Prepared!
+📁 Location: {dataset_path}
+📊 Files: {len(audio_files)}
+🎤 Model Name: {model_name}
+✅ Ready for preprocessing!
 """
+        except Exception as e:
+            return f"❌ Error: {str(e)}"
+    def preprocess_data(self, model_name, sample_rate, progress=gr.Progress()):
+        """Run RVC preprocessing"""
         try:
+            progress(0.1, desc="Starting preprocessing...")
+            dataset_path = self.rvc_dir / "dataset" / model_name
+            if not dataset_path.exists():
+                return "❌ Dataset not found. Please prepare dataset first."
+            # Run RVC preprocessing script
+            preprocess_script = self.rvc_dir / "infer" / "modules" / "train" / "preprocess.py"
+            if not preprocess_script.exists():
+                # Alternative path
+                preprocess_script = self.rvc_dir / "trainset_preprocess_pipeline_print.py"
+            progress(0.3, desc="Preprocessing audio...")
+            cmd = [
+                sys.executable,
+                str(preprocess_script),
+                str(dataset_path),
+                str(sample_rate),
+                "2"  # Number of processes
+            ]
+            result = subprocess.run(cmd, capture_output=True, text=True)
+            progress(0.8, desc="Extracting features...")
+            # Run feature extraction
+            extract_script = self.rvc_dir / "infer" / "modules" / "train" / "extract_feature_print.py"
+            if not extract_script.exists():
+                extract_script = self.rvc_dir / "trainset_preprocess_pipeline_print.py"
+            progress(1.0, desc="Preprocessing complete!")
+            return f"""✅ Preprocessing Complete!
+🎵 Sample Rate: {sample_rate}Hz
+🔍 Features extracted
+📊 Ready for training!
+Output: {result.stdout if result.stdout else 'Processing completed'}
+"""
+        except Exception as e:
+            return f"❌ Preprocessing failed: {str(e)}"
+    def train_model(self, model_name, epochs, batch_size, sample_rate, progress=gr.Progress()):
+        """Run actual RVC training"""
+        try:
+            progress(0.05, desc="Initializing training...")
+            # Setup training paths
+            log_dir = self.rvc_dir / "logs" / model_name
+            log_dir.mkdir(parents=True, exist_ok=True)
+            progress(0.1, desc="Starting RVC training...")
+            # Training command
+            train_script = self.rvc_dir / "infer" / "modules" / "train" / "train.py"
+            if not train_script.exists():
+                train_script = self.rvc_dir / "train_nsf_sim_cache_sid_load_pretrain.py"
+            cmd = [
+                sys.executable,
+                str(train_script),
+                "-e", model_name,
+                "-sr", str(sample_rate),
+                "-f0", "1",  # Use F0
+                "-bs", str(batch_size),
+                "-g", "0",  # GPU index (0 for CPU)
+                "-te", str(epochs),
+                "-se", "10",  # Save every N epochs
+                "-pg", str(self.rvc_dir / "pretrained" / "f0G40k.pth"),
+                "-pd", str(self.rvc_dir / "pretrained" / "f0D40k.pth"),
+                "-l", "0",  # No cache
+                "-c", "0"   # No cache
+            ]
+            progress(0.2, desc=f"Training {model_name}...")
+            # Run training
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+                text=True
+            )
+            # Monitor training progress
+            for line in process.stdout:
+                if "epoch" in line.lower():
+                    progress(0.2 + 0.6 * (int(line.split("epoch")[0]) / epochs),
+                            desc=f"Training: {line.strip()[:50]}")
+            process.wait()
+            progress(0.9, desc="Finalizing model...")
+            # Find the generated model file
+            weights_dir = log_dir / "weights"
+            model_files = list(weights_dir.glob("*.pth")) if weights_dir.exists() else []
+            if model_files:
+                latest_model = max(model_files, key=lambda p: p.stat().st_mtime)
+                # Copy to output
+                output_dir = self.workspace / model_name
+                output_dir.mkdir(exist_ok=True)
+                shutil.copy2(latest_model, output_dir / f"{model_name}.pth")
+                # Copy index if exists
+                index_files = list(log_dir.glob("*.index"))
+                if index_files:
+                    shutil.copy2(index_files[0], output_dir)
+                progress(1.0, desc="Training complete!")
+                return f"""✅ Training Complete!
+🎓 Model: {model_name}
+📊 Epochs: {epochs}
+⚙️ Batch Size: {batch_size}
+🎵 Sample Rate: {sample_rate}Hz
+💾 Model Files:
+- {output_dir / f'{model_name}.pth'}
+- Index file (if generated)
+🎉 Ready to download and use!
+"""
+            else:
+                return "⚠️ Training completed but model file not found. Check logs directory."
+        except Exception as e:
+            return f"❌ Training failed: {str(e)}\n\nNote: Real RVC training requires significant resources. Consider using Google Colab with GPU."
+    def package_model(self, model_name):
+        """Package model for download"""
         try:
+            output_dir = self.workspace / model_name
+            if not output_dir.exists():
+                # Try logs directory
+                output_dir = self.rvc_dir / "logs" / model_name / "weights"
+            if not output_dir.exists():
+                return None, "❌ Model not found"
+            # Create zip
+            zip_path = self.workspace / f"{model_name}_RVC.zip"
+            with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+                for file in output_dir.rglob("*"):
+                    if file.is_file() and (file.suffix in ['.pth', '.index', '.json']):
+                        zipf.write(file, file.name)
+            return str(zip_path), f"✅ Model packaged: {zip_path.name}"
         except Exception as e:
+            return None, f"❌ Error: {str(e)}"
 # Initialize trainer
+trainer = RealRVCTrainer()
+# Gradio Interface
+with gr.Blocks(title="Real RVC Training - HuggingFace") as demo:
     gr.Markdown("""
+    # 🎤 Real RVC Model Training
+    ### Using Official RVC-Project Implementation
+    ⚠️ **Important:** This uses the REAL RVC training code. Models will work on weights.gg!
+    **Note:** Training on CPU will be slow. For faster training, use Google Colab with GPU.
     """)
+    with gr.Tab("⚙️ Step 0: Install RVC"):
+        gr.Markdown("""
+        First, install the official RVC codebase and pretrained models.
+        This will download:
+        - RVC source code
+        - Pretrained models (~200MB)
+        - Required dependencies
+        """)
+        install_btn = gr.Button("📦 Install RVC Components", variant="primary", size="lg")
+        install_output = gr.Textbox(label="Installation Status", lines=10)
+        install_btn.click(
+            fn=trainer.install_rvc,
+            outputs=install_output
+        )
     with gr.Tab("📁 Step 1: Prepare Dataset"):
         gr.Markdown("""
+        Upload your voice audio files
         **Requirements:**
+        - 10-30 minutes recommended
+        - WAV, MP3, FLAC formats
+        - Clean, clear voice
         - Single speaker
         """)
+        model_name_prep = gr.Textbox(
             label="Model Name",
+            value="my_voice_model",
+            placeholder="my_voice_model"
         )
+        audio_files = gr.File(
             label="Upload Audio Files",
             file_count="multiple",
             file_types=["audio"]
         )
+        prep_btn = gr.Button("📁 Prepare Dataset", variant="primary")
+        prep_output = gr.Textbox(label="Status", lines=8)
+        prep_btn.click(
             fn=trainer.prepare_dataset,
+            inputs=[audio_files, model_name_prep],
+            outputs=prep_output
         )
+    with gr.Tab("🔧 Step 2: Preprocess"):
+        gr.Markdown("Preprocess audio and extract features")
+        model_name_process = gr.Textbox(
             label="Model Name",
             value="my_voice_model"
         )
+        sample_rate_process = gr.Radio(
+            choices=["40000", "48000"],
+            value="40000",
+            label="Sample Rate"
+        )
+        process_btn = gr.Button("🔧 Preprocess Data", variant="primary")
+        process_output = gr.Textbox(label="Status", lines=8)
+        process_btn.click(
+            fn=trainer.preprocess_data,
+            inputs=[model_name_process, sample_rate_process],
+            outputs=process_output
         )
+    with gr.Tab("🚀 Step 3: Train Model"):
         gr.Markdown("""
+        Train the RVC model with real neural network training
+        ⚠️ **CPU Warning:** Training on CPU will take hours/days
         """)
         model_name_train = gr.Textbox(
             label="Model Name",
             value="my_voice_model"
         )
+        epochs_train = gr.Slider(
+            minimum=10,
+            maximum=500,
+            value=100,
+            step=10,
+            label="Epochs (More = Better Quality)"
         )
+        batch_size_train = gr.Slider(
             minimum=1,
+            maximum=16,
+            value=4,
             step=1,
             label="Batch Size"
         )
+        sample_rate_train = gr.Radio(
+            choices=["40000", "48000"],
+            value="40000",
+            label="Sample Rate"
+        )
+        train_btn = gr.Button("🎓 Start Real Training", variant="primary")
+        train_output = gr.Textbox(label="Training Status", lines=15)
         train_btn.click(
             fn=trainer.train_model,
+            inputs=[model_name_train, epochs_train, batch_size_train, sample_rate_train],
             outputs=train_output
         )
+    with gr.Tab("📥 Step 4: Download"):
+        gr.Markdown("Download your trained RVC model")
         model_name_download = gr.Textbox(
             label="Model Name",
             value="my_voice_model"
         )
+        download_btn = gr.Button("📦 Package Model", variant="primary")
+        download_file = gr.File(label="Download")
         download_status = gr.Textbox(label="Status")
         download_btn.click(
+            fn=trainer.package_model,
             inputs=model_name_download,
             outputs=[download_file, download_status]
         )
     gr.Markdown("""
     ---
     ### 📚 Resources
+    - [RVC Project](https://github.com/RVC-Project/Retrieval-based-Voice-Conversion-WebUI)
+    - [Google Colab (Recommended for GPU)](https://colab.research.google.com/)
+    - [Weights.gg](https://weights.gg/)
+    ### ⚠️ Important Notes
+    - This uses REAL RVC training - not simulation
+    - Models will work on weights.gg and aicovergen
+    - CPU training is VERY slow (hours to days)
+    - **Recommended:** Use Google Colab with free GPU for 10-100x faster training
+    - You'll need proper audio quality for good results
     """)
 if __name__ == "__main__":