RVC-CH

Running

ozipoetra commited on 18 days ago

Commit

754f043

1 Parent(s): aac14c4

refactor: create own RVC library from ultimate_rvc

- Create lib/rvc/ with essential voice conversion components
- lib/rvc/algorithm/: encoders, generators, synthesizers
- lib/rvc/predictors/: F0 extraction (RMVPE, FCPE, CREPE)
- lib/rvc/tools/: audio splitting utilities
- lib/rvc/converter.py: VoiceConverter class
- lib/rvc/pipeline.py: voice conversion pipeline
- lib/rvc/config.py: device and model configuration
- Move configs (48000.json, 40000.json, 32000.json) to configs/
- Remove ultimate_rvc/ directory (no longer needed)
- Update lib/jobs.py to use new lib.rvc.converter

This creates a self-contained RVC library based on ultimate-rvc,
removing the external dependency and allowing for easier maintenance.

Files changed (42) hide show

{ultimate_rvc/rvc/configs → configs}/32000.json +0 -0
{ultimate_rvc/rvc/configs → configs}/40000.json +0 -0
{ultimate_rvc/rvc/configs → configs}/48000.json +0 -0
lib/jobs.py +1 -1
lib/rvc/__init__.py +16 -0
lib/rvc/algorithm/__init__.py +2 -0
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/attentions.py +1 -1
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/commons.py +0 -0
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/encoders.py +4 -4
lib/rvc/algorithm/generators/__init__.py +2 -0
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/hifigan.py +2 -2
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/hifigan_mrf.py +0 -0
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/hifigan_nsf.py +3 -3
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/refinegan.py +1 -1
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/modules.py +1 -1
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/normalization.py +0 -0
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/residuals.py +2 -2
{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/synthesizers.py +7 -7
{ultimate_rvc → lib}/rvc/common.py +2 -2
{ultimate_rvc/rvc/configs → lib/rvc}/config.py +1 -1
ultimate_rvc/rvc/infer/infer.py → lib/rvc/converter.py +35 -122
{ultimate_rvc/rvc/infer → lib/rvc}/pipeline.py +5 -9
{ultimate_rvc/rvc/lib → lib/rvc}/predictors/F0Extractor.py +3 -3
{ultimate_rvc/rvc/lib → lib/rvc}/predictors/FCPE.py +0 -0
{ultimate_rvc/rvc/lib → lib/rvc}/predictors/RMVPE.py +0 -0
lib/rvc/predictors/__init__.py +2 -0
{ultimate_rvc/rvc/lib → lib/rvc}/predictors/f0.py +2 -2
lib/rvc/tools/__init__.py +2 -0
{ultimate_rvc/rvc/lib → lib/rvc}/tools/split_audio.py +0 -0
{ultimate_rvc/rvc/lib → lib/rvc}/utils.py +1 -1
ultimate_rvc/__init__.py +0 -0
ultimate_rvc/common.py +0 -37
ultimate_rvc/rvc/__init__.py +0 -4
ultimate_rvc/rvc/configs/__init__.py +0 -0
ultimate_rvc/rvc/infer/__init__.py +0 -0
ultimate_rvc/rvc/infer/typing_extra.py +0 -57
ultimate_rvc/rvc/lib/__init__.py +0 -0
ultimate_rvc/rvc/lib/algorithm/__init__.py +0 -0
ultimate_rvc/rvc/lib/algorithm/generators/__init__.py +0 -0
ultimate_rvc/rvc/lib/predictors/__init__.py +0 -0
ultimate_rvc/rvc/lib/tools/__init__.py +0 -0
ultimate_rvc/typing_extra.py +0 -154

{ultimate_rvc/rvc/configs → configs}/32000.json RENAMED Viewed

File without changes

{ultimate_rvc/rvc/configs → configs}/40000.json RENAMED Viewed

File without changes

{ultimate_rvc/rvc/configs → configs}/48000.json RENAMED Viewed

File without changes

lib/jobs.py CHANGED Viewed

@@ -34,7 +34,7 @@ def get_vc():
     global _vc_instance
     if _vc_instance is None:
         logger.info("Loading VoiceConverter…")
-        from ultimate_rvc.rvc.infer.infer import VoiceConverter
         _vc_instance = VoiceConverter()
         logger.info("VoiceConverter ready.")
     return _vc_instance

     global _vc_instance
     if _vc_instance is None:
         logger.info("Loading VoiceConverter…")
+        from lib.rvc.converter import VoiceConverter
         _vc_instance = VoiceConverter()
         logger.info("VoiceConverter ready.")
     return _vc_instance

lib/rvc/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+"""RVC Voice Conversion Library.
+This is a minimal rewrite of the ultimate-rvc library for voice conversion.
+"""
+from __future__ import annotations
+from pathlib import Path
+# Base directory for RVC resources
+BASE_DIR = Path(__file__).parent.parent.parent
+# Models directory
+MODELS_DIR = BASE_DIR / "rvc_models"
+# Configs directory (for model configs)
+CONFIGS_DIR = BASE_DIR / "configs"

lib/rvc/algorithm/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Algorithm modules for RVC."""
2	+ from __future__ import annotations

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/attentions.py RENAMED Viewed

@@ -2,7 +2,7 @@ import math
 import torch
-from ultimate_rvc.rvc.lib.algorithm.commons import convert_pad_shape
 class MultiHeadAttention(torch.nn.Module):

 import torch
+from lib.rvc.lib.algorithm.commons import convert_pad_shape
 class MultiHeadAttention(torch.nn.Module):

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/commons.py RENAMED Viewed

File without changes

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/encoders.py RENAMED Viewed

@@ -3,10 +3,10 @@ import math
 import torch
-from ultimate_rvc.rvc.lib.algorithm.attentions import FFN, MultiHeadAttention
-from ultimate_rvc.rvc.lib.algorithm.commons import sequence_mask
-from ultimate_rvc.rvc.lib.algorithm.modules import WaveNet
-from ultimate_rvc.rvc.lib.algorithm.normalization import LayerNorm
 logger = logging.getLogger(__name__)

 import torch
+from lib.rvc.lib.algorithm.attentions import FFN, MultiHeadAttention
+from lib.rvc.lib.algorithm.commons import sequence_mask
+from lib.rvc.lib.algorithm.modules import WaveNet
+from lib.rvc.lib.algorithm.normalization import LayerNorm
 logger = logging.getLogger(__name__)

lib/rvc/algorithm/generators/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Generator modules for RVC vocoders."""
2	+ from __future__ import annotations

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/hifigan.py RENAMED Viewed

@@ -6,8 +6,8 @@ import torch
 from torch.nn.utils import remove_weight_norm
 from torch.nn.utils.parametrizations import weight_norm
-from ultimate_rvc.rvc.lib.algorithm.commons import init_weights
-from ultimate_rvc.rvc.lib.algorithm.residuals import LRELU_SLOPE, ResBlock
 class HiFiGANGenerator(torch.nn.Module):

 from torch.nn.utils import remove_weight_norm
 from torch.nn.utils.parametrizations import weight_norm
+from lib.rvc.lib.algorithm.commons import init_weights
+from lib.rvc.lib.algorithm.residuals import LRELU_SLOPE, ResBlock
 class HiFiGANGenerator(torch.nn.Module):

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/hifigan_mrf.py RENAMED Viewed

File without changes

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/hifigan_nsf.py RENAMED Viewed

@@ -7,9 +7,9 @@ from torch.nn.utils import remove_weight_norm
 from torch.nn.utils.parametrizations import weight_norm
 from torch.utils.checkpoint import checkpoint
-from ultimate_rvc.rvc.lib.algorithm.commons import init_weights
-from ultimate_rvc.rvc.lib.algorithm.generators.hifigan import SineGenerator
-from ultimate_rvc.rvc.lib.algorithm.residuals import LRELU_SLOPE, ResBlock
 class SourceModuleHnNSF(torch.nn.Module):

 from torch.nn.utils.parametrizations import weight_norm
 from torch.utils.checkpoint import checkpoint
+from lib.rvc.lib.algorithm.commons import init_weights
+from lib.rvc.lib.algorithm.generators.hifigan import SineGenerator
+from lib.rvc.lib.algorithm.residuals import LRELU_SLOPE, ResBlock
 class SourceModuleHnNSF(torch.nn.Module):

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/generators/refinegan.py RENAMED Viewed

@@ -8,7 +8,7 @@ from torch.nn.utils import remove_weight_norm
 from torch.nn.utils.parametrizations import weight_norm
 from torch.utils.checkpoint import checkpoint
-from ultimate_rvc.rvc.lib.algorithm.commons import get_padding, init_weights
 class ResBlock(nn.Module):

 from torch.nn.utils.parametrizations import weight_norm
 from torch.utils.checkpoint import checkpoint
+from lib.rvc.lib.algorithm.commons import get_padding, init_weights
 class ResBlock(nn.Module):

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/modules.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import torch
-from ultimate_rvc.rvc.lib.algorithm.commons import fused_add_tanh_sigmoid_multiply
 class WaveNet(torch.nn.Module):

 import torch
+from lib.rvc.lib.algorithm.commons import fused_add_tanh_sigmoid_multiply
 class WaveNet(torch.nn.Module):

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/normalization.py RENAMED Viewed

File without changes

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/residuals.py RENAMED Viewed

@@ -6,8 +6,8 @@ import torch
 from torch.nn.utils import remove_weight_norm
 from torch.nn.utils.parametrizations import weight_norm
-from ultimate_rvc.rvc.lib.algorithm.commons import get_padding, init_weights
-from ultimate_rvc.rvc.lib.algorithm.modules import WaveNet
 LRELU_SLOPE = 0.1

 from torch.nn.utils import remove_weight_norm
 from torch.nn.utils.parametrizations import weight_norm
+from lib.rvc.lib.algorithm.commons import get_padding, init_weights
+from lib.rvc.lib.algorithm.modules import WaveNet
 LRELU_SLOPE = 0.1

{ultimate_rvc/rvc/lib → lib/rvc}/algorithm/synthesizers.py RENAMED Viewed

@@ -4,13 +4,13 @@ import logging
 import torch
-from ultimate_rvc.rvc.lib.algorithm.commons import rand_slice_segments, slice_segments
-from ultimate_rvc.rvc.lib.algorithm.encoders import PosteriorEncoder, TextEncoder
-from ultimate_rvc.rvc.lib.algorithm.generators.hifigan import HiFiGANGenerator
-from ultimate_rvc.rvc.lib.algorithm.generators.hifigan_mrf import HiFiGANMRFGenerator
-from ultimate_rvc.rvc.lib.algorithm.generators.hifigan_nsf import HiFiGANNSFGenerator
-from ultimate_rvc.rvc.lib.algorithm.generators.refinegan import RefineGANGenerator
-from ultimate_rvc.rvc.lib.algorithm.residuals import ResidualCouplingBlock
 logger = logging.getLogger(__name__)

 import torch
+from lib.rvc.lib.algorithm.commons import rand_slice_segments, slice_segments
+from lib.rvc.lib.algorithm.encoders import PosteriorEncoder, TextEncoder
+from lib.rvc.lib.algorithm.generators.hifigan import HiFiGANGenerator
+from lib.rvc.lib.algorithm.generators.hifigan_mrf import HiFiGANMRFGenerator
+from lib.rvc.lib.algorithm.generators.hifigan_nsf import HiFiGANNSFGenerator
+from lib.rvc.lib.algorithm.generators.refinegan import RefineGANGenerator
+from lib.rvc.lib.algorithm.residuals import ResidualCouplingBlock
 logger = logging.getLogger(__name__)

{ultimate_rvc → lib}/rvc/common.py RENAMED Viewed

@@ -1,9 +1,9 @@
 """Common constants and functions for the RVC package."""
 from __future__ import annotations
 from pathlib import Path
 RVC_DIR = Path(__file__).resolve().parent
-RVC_CONFIGS_DIR = RVC_DIR / "configs"
 RVC_TRAINING_MODELS_DIR = RVC_DIR / "train" / "models"

 """Common constants and functions for the RVC package."""
 from __future__ import annotations
 from pathlib import Path
 RVC_DIR = Path(__file__).resolve().parent
+RVC_CONFIGS_DIR = Path(__file__).resolve().parent.parent.parent / "configs"
+RVC_MODELS_DIR = Path(__file__).resolve().parent.parent.parent / "rvc_models"
 RVC_TRAINING_MODELS_DIR = RVC_DIR / "train" / "models"

{ultimate_rvc/rvc/configs → lib/rvc}/config.py RENAMED Viewed

@@ -4,7 +4,7 @@ import pathlib
 import torch
-from ultimate_rvc.rvc.common import RVC_CONFIGS_DIR
 version_config_paths = [
     os.path.join("48000.json"),

 import torch
+from lib.rvc.common import RVC_CONFIGS_DIR
 version_config_paths = [
     os.path.join("48000.json"),

ultimate_rvc/rvc/infer/infer.py → lib/rvc/converter.py RENAMED Viewed

@@ -1,4 +1,5 @@
-from typing import TYPE_CHECKING, Unpack
 import logging
 import os
@@ -7,14 +8,11 @@ import sys
 import time
 import traceback
-import soxr
-import numpy as np
-import torch
 import librosa
 import soundfile as sf
 from pedalboard import (
     Bitcrush,
     Chorus,
@@ -28,30 +26,29 @@ from pedalboard import (
     PitchShift,
     Reverb,
 )
-now_dir = pathlib.Path.cwd()
-sys.path.append(str(now_dir))
 import lazy_loader as lazy
-from ultimate_rvc.rvc.configs.config import Config
-from ultimate_rvc.rvc.infer.pipeline import Pipeline as VC
-from ultimate_rvc.rvc.infer.typing_extra import ConvertAudioKwArgs
-from ultimate_rvc.rvc.lib.algorithm.synthesizers import Synthesizer
-from ultimate_rvc.rvc.lib.tools.split_audio import merge_audio, process_audio
-from ultimate_rvc.rvc.lib.utils import load_audio_infer, load_embedding
-from ultimate_rvc.typing_extra import F0Method
 if TYPE_CHECKING:
     import noisereduce as nr
 else:
     nr = lazy.load("noisereduce")
-# logging.getLogger("httpx").setLevel(logging.WARNING)
-# logging.getLogger("httpcore").setLevel(logging.WARNING)
-# logging.getLogger("faiss").setLevel(logging.WARNING)
-# logging.getLogger("faiss.loader").setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
 class VoiceConverter:
     """
@@ -62,18 +59,16 @@ class VoiceConverter:
         """
         Initializes the VoiceConverter with default configuration, and sets up models and parameters.
         """
-        self.config = Config()  # Load configuration
-        self.hubert_model = (
-            None  # Initialize the Hubert model (for embedding extraction)
-        )
-        self.last_embedder_model = None  # Last used embedder model
-        self.tgt_sr = None  # Target sampling rate for the output audio
-        self.net_g = None  # Generator network for voice conversion
-        self.vc = None  # Voice conversion pipeline instance
-        self.cpt = None  # Checkpoint for loading model weights
-        self.version = None  # Model version
-        self.n_spk = None  # Number of speakers in the model
-        self.use_f0 = None  # Whether the model uses F0
         self.loaded_model = None
     def load_hubert(self, embedder_model: str, embedder_model_custom: str = None):
@@ -88,18 +83,16 @@ class VoiceConverter:
         self.hubert_model = load_embedding(embedder_model, embedder_model_custom)
         self.hubert_model = self.hubert_model.to(self.config.device).float()
         self.hubert_model.eval()
-        # Disable gradient tracking permanently for inference-only model
         for param in self.hubert_model.parameters():
             param.requires_grad_(False)
-        # Compile with torch.compile if available (torch 2.0+) for CPU kernel fusion
         if hasattr(torch, "compile"):
             try:
                 self.hubert_model = torch.compile(
                     self.hubert_model, mode="reduce-overhead", fullgraph=False
                 )
                 logger.info("HuBERT compiled with torch.compile (reduce-overhead)")
-            except Exception as _ce:
-                logger.info("torch.compile skipped: %s", _ce)
     @staticmethod
     def remove_audio_noise(data, sr, reduction_strength=0.7):
@@ -113,7 +106,6 @@ class VoiceConverter:
         """
         try:
             reduced_noise = nr.reduce_noise(
                 y=data,
                 sr=sr,
@@ -140,15 +132,7 @@ class VoiceConverter:
                 print(f"Saving audio as {output_format}...")
                 audio, sample_rate = librosa.load(input_path, sr=None)
                 common_sample_rates = [
-                    8000,
-                    11025,
-                    12000,
-                    16000,
-                    22050,
-                    24000,
-                    32000,
-                    44100,
-                    48000,
                 ]
                 target_sr = min(common_sample_rates, key=lambda x: abs(x - sample_rate))
                 audio = librosa.resample(
@@ -158,7 +142,7 @@ class VoiceConverter:
                     res_type="soxr_vhq",
                 )
                 sf.write(output_path, audio, target_sr, format=output_format.lower())
-            return output_path
         except Exception as error:
             print(f"An error occurred converting the audio format: {error}")
@@ -251,7 +235,7 @@ class VoiceConverter:
         sid: int = 0,
         proposed_pitch: bool = False,
         proposed_pitch_threshold: float = 155.0,
-        **kwargs: Unpack[ConvertAudioKwArgs],
     ):
         """
         Performs voice conversion on the input audio.
@@ -327,7 +311,7 @@ class VoiceConverter:
                 sid=sid,
                 audio=c,
                 pitch=pitch,
-                f0_method=f0_method or F0Method.RMVPE,
                 file_index=file_index,
                 index_rate=index_rate,
                 pitch_guidance=self.use_f0,
@@ -389,73 +373,6 @@ class VoiceConverter:
             elapsed_time,
         )
-    def convert_audio_batch(
-        self,
-        audio_input_paths: str,
-        audio_output_path: str,
-        **kwargs,
-    ):
-        """
-        Performs voice conversion on a batch of input audio files.
-        Args:
-            audio_input_paths (str): List of paths to the input audio files.
-            audio_output_path (str): Path to the output audio file.
-            resample_sr (int, optional): Resample sampling rate. Default is 0.
-            sid (int, optional): Speaker ID. Default is 0.
-            **kwargs: Additional keyword arguments.
-        """
-        pid = os.getpid()
-        try:
-            with pathlib.Path(os.path.join(now_dir, "assets", "infer_pid.txt")).open(
-                "w",
-            ) as pid_file:
-                pid_file.write(str(pid))
-            start_time = time.time()
-            print(f"Converting audio batch '{audio_input_paths}'...")
-            audio_files = [
-                f
-                for f in os.listdir(audio_input_paths)
-                if f.lower().endswith(
-                    (
-                        "wav",
-                        "mp3",
-                        "flac",
-                        "ogg",
-                        "opus",
-                        "m4a",
-                        "mp4",
-                        "aac",
-                        "alac",
-                        "wma",
-                        "aiff",
-                        "webm",
-                        "ac3",
-                    ),
-                )
-            ]
-            print(f"Detected {len(audio_files)} audio files for inference.")
-            for a in audio_files:
-                new_input = os.path.join(audio_input_paths, a)
-                new_output = os.path.splitext(a)[0] + "_output.wav"
-                new_output = os.path.join(audio_output_path, new_output)
-                if pathlib.Path(new_output).exists():
-                    continue
-                self.convert_audio(
-                    audio_input_path=new_input,
-                    audio_output_path=new_output,
-                    **kwargs,
-                )
-            print(f"Conversion completed at '{audio_input_paths}'.")
-            elapsed_time = time.time() - start_time
-            print(f"Batch conversion completed in {elapsed_time:.2f} seconds.")
-        except Exception as error:
-            print(f"An error occurred during audio batch conversion: {error}")
-            print(traceback.format_exc())
-        finally:
-            pathlib.Path(os.path.join(now_dir, "assets", "infer_pid.txt")).unlink()
     def get_vc(self, weight_root, sid):
         """
         Loads the voice conversion model and sets up the pipeline.
@@ -509,9 +426,7 @@ class VoiceConverter:
         try:
             self.cpt = torch.load(weight_root, map_location="cpu", weights_only=False)
         except Exception:
-            # Fallback for models saved with newer pickle protocols (e.g. protocol 83)
             import pickle
-            import io
             try:
                 with open(weight_root, "rb") as f:
                     self.cpt = pickle.load(f)
@@ -541,18 +456,16 @@ class VoiceConverter:
             self.net_g.load_state_dict(self.cpt["weight"], strict=False)
             self.net_g = self.net_g.to(self.config.device).float()
             self.net_g.eval()
-            # Disable gradient tracking for all synthesizer params
             for param in self.net_g.parameters():
                 param.requires_grad_(False)
-            # Compile synthesizer for faster CPU inference
             if hasattr(torch, "compile"):
                 try:
                     self.net_g = torch.compile(
                         self.net_g, mode="reduce-overhead", fullgraph=False
                     )
                     logger.info("Synthesizer compiled with torch.compile")
-                except Exception as _ce:
-                    logger.info("torch.compile skipped for net_g: %s", _ce)
     def setup_vc_instance(self):
         """

+"""Voice Converter for RVC."""
+from __future__ import annotations
 import logging
 import os
 import time
 import traceback
 import librosa
+import numpy as np
+import soxr
 import soundfile as sf
+import torch
 from pedalboard import (
     Bitcrush,
     Chorus,
     PitchShift,
     Reverb,
 )
+from typing import TYPE_CHECKING, Unpack
 import lazy_loader as lazy
+from lib.rvc.config import Config
+from lib.rvc.pipeline import Pipeline as VC
+from lib.rvc.lib.algorithm.synthesizers import Synthesizer
+from lib.rvc.lib.tools.split_audio import merge_audio, process_audio
+from lib.rvc.lib.utils import load_audio_infer, load_embedding
 if TYPE_CHECKING:
     import noisereduce as nr
 else:
     nr = lazy.load("noisereduce")
+now_dir = pathlib.Path.cwd()
+sys.path.append(str(now_dir))
 logger = logging.getLogger(__name__)
+# Type alias for F0 method
+F0Method = str
 class VoiceConverter:
     """
         """
         Initializes the VoiceConverter with default configuration, and sets up models and parameters.
         """
+        self.config = Config()
+        self.hubert_model = None
+        self.last_embedder_model = None
+        self.tgt_sr = None
+        self.net_g = None
+        self.vc = None
+        self.cpt = None
+        self.version = None
+        self.n_spk = None
+        self.use_f0 = None
         self.loaded_model = None
     def load_hubert(self, embedder_model: str, embedder_model_custom: str = None):
         self.hubert_model = load_embedding(embedder_model, embedder_model_custom)
         self.hubert_model = self.hubert_model.to(self.config.device).float()
         self.hubert_model.eval()
         for param in self.hubert_model.parameters():
             param.requires_grad_(False)
         if hasattr(torch, "compile"):
             try:
                 self.hubert_model = torch.compile(
                     self.hubert_model, mode="reduce-overhead", fullgraph=False
                 )
                 logger.info("HuBERT compiled with torch.compile (reduce-overhead)")
+            except Exception as e:
+                logger.info("torch.compile skipped: %s", e)
     @staticmethod
     def remove_audio_noise(data, sr, reduction_strength=0.7):
         """
         try:
             reduced_noise = nr.reduce_noise(
                 y=data,
                 sr=sr,
                 print(f"Saving audio as {output_format}...")
                 audio, sample_rate = librosa.load(input_path, sr=None)
                 common_sample_rates = [
+                    8000, 11025, 12000, 16000, 22050, 24000, 32000, 44100, 48000,
                 ]
                 target_sr = min(common_sample_rates, key=lambda x: abs(x - sample_rate))
                 audio = librosa.resample(
                     res_type="soxr_vhq",
                 )
                 sf.write(output_path, audio, target_sr, format=output_format.lower())
+                return output_path
         except Exception as error:
             print(f"An error occurred converting the audio format: {error}")
         sid: int = 0,
         proposed_pitch: bool = False,
         proposed_pitch_threshold: float = 155.0,
+        **kwargs,
     ):
         """
         Performs voice conversion on the input audio.
                 sid=sid,
                 audio=c,
                 pitch=pitch,
+                f0_method=f0_method or "rmvpe",
                 file_index=file_index,
                 index_rate=index_rate,
                 pitch_guidance=self.use_f0,
             elapsed_time,
         )
     def get_vc(self, weight_root, sid):
         """
         Loads the voice conversion model and sets up the pipeline.
         try:
             self.cpt = torch.load(weight_root, map_location="cpu", weights_only=False)
         except Exception:
             import pickle
             try:
                 with open(weight_root, "rb") as f:
                     self.cpt = pickle.load(f)
             self.net_g.load_state_dict(self.cpt["weight"], strict=False)
             self.net_g = self.net_g.to(self.config.device).float()
             self.net_g.eval()
             for param in self.net_g.parameters():
                 param.requires_grad_(False)
             if hasattr(torch, "compile"):
                 try:
                     self.net_g = torch.compile(
                         self.net_g, mode="reduce-overhead", fullgraph=False
                     )
                     logger.info("Synthesizer compiled with torch.compile")
+                except Exception as e:
+                    logger.info("torch.compile skipped for net_g: %s", e)
     def setup_vc_instance(self):
         """

{ultimate_rvc/rvc/infer → lib/rvc}/pipeline.py RENAMED Viewed

@@ -1,23 +1,19 @@
 import pathlib
 import sys
-import numpy as np
-from scipy import signal
 import faiss
 import torch
 import torch.nn.functional as F
-import librosa
 now_dir = pathlib.Path.cwd()
 sys.path.append(str(now_dir))
-import logging
-from ultimate_rvc.rvc.lib.predictors.f0 import CREPE, FCPE, RMVPE
-# logging.getLogger("faiss").setLevel(logging.WARNING)
 logger = logging.getLogger(__name__)
 # Constants for high-pass filter

+import logging
 import pathlib
 import sys
 import faiss
+import librosa
+import numpy as np
 import torch
 import torch.nn.functional as F
+from scipy import signal
 now_dir = pathlib.Path.cwd()
 sys.path.append(str(now_dir))
+from lib.rvc.predictors.f0 import CREPE, FCPE, RMVPE
 logger = logging.getLogger(__name__)
 # Constants for high-pass filter

{ultimate_rvc/rvc/lib → lib/rvc}/predictors/F0Extractor.py RENAMED Viewed

@@ -12,11 +12,11 @@ import torchcrepe
 import librosa
-from ultimate_rvc.common import RVC_MODELS_DIR
-from ultimate_rvc.rvc.configs.config import Config
 # from tools.anyf0.rmvpe import RMVPE
-from ultimate_rvc.rvc.lib.predictors.RMVPE import RMVPE0Predictor
 config = Config()

 import librosa
+from lib.rvc.common import RVC_MODELS_DIR
+from lib.rvc.configs.config import Config
 # from tools.anyf0.rmvpe import RMVPE
+from lib.rvc.lib.predictors.RMVPE import RMVPE0Predictor
 config = Config()

{ultimate_rvc/rvc/lib → lib/rvc}/predictors/FCPE.py RENAMED Viewed

File without changes

{ultimate_rvc/rvc/lib → lib/rvc}/predictors/RMVPE.py RENAMED Viewed

File without changes

lib/rvc/predictors/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """F0 pitch prediction modules."""
2	+ from __future__ import annotations

{ultimate_rvc/rvc/lib → lib/rvc}/predictors/f0.py RENAMED Viewed

@@ -5,8 +5,8 @@ from torchfcpe import spawn_infer_model_from_pt
 import torch
 import torchcrepe
-from ultimate_rvc.common import RVC_MODELS_DIR
-from ultimate_rvc.rvc.lib.predictors.RMVPE import RMVPE0Predictor
 class RMVPE:

 import torch
 import torchcrepe
+from lib.rvc.common import RVC_MODELS_DIR
+from lib.rvc.lib.predictors.RMVPE import RMVPE0Predictor
 class RMVPE:

lib/rvc/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Audio processing tools."""
2	+ from __future__ import annotations

{ultimate_rvc/rvc/lib → lib/rvc}/tools/split_audio.py RENAMED Viewed

File without changes

{ultimate_rvc/rvc/lib → lib/rvc}/utils.py RENAMED Viewed

@@ -18,7 +18,7 @@ from transformers import HubertModel
 import librosa
 import soundfile as sf
-from ultimate_rvc.common import RVC_MODELS_DIR
 # Remove this to see warnings about transformers models
 warnings.filterwarnings("ignore")

 import librosa
 import soundfile as sf
+from lib.rvc.common import RVC_MODELS_DIR
 # Remove this to see warnings about transformers models
 warnings.filterwarnings("ignore")

ultimate_rvc/__init__.py DELETED Viewed

File without changes

ultimate_rvc/common.py DELETED Viewed

@@ -1,37 +0,0 @@
-"""Common variables used in the Ultimate RVC project."""
-from __future__ import annotations
-import os
-import sys
-from pathlib import Path
-BASE_DIR = Path.cwd()
-VENV_DIR = Path(sys.prefix)
-MODELS_DIR = Path(os.getenv("URVC_MODELS_DIR") or BASE_DIR / "models")
-RVC_MODELS_DIR = MODELS_DIR / "rvc"
-VOICE_MODELS_DIR = Path(
-    os.getenv("URVC_VOICE_MODELS_DIR") or RVC_MODELS_DIR / "voice_models",
-)
-EMBEDDER_MODELS_DIR = RVC_MODELS_DIR / "embedders"
-CUSTOM_EMBEDDER_MODELS_DIR = EMBEDDER_MODELS_DIR / "custom"
-PRETRAINED_MODELS_DIR = RVC_MODELS_DIR / "pretraineds"
-CUSTOM_PRETRAINED_MODELS_DIR = PRETRAINED_MODELS_DIR / "custom"
-SEPARATOR_MODELS_DIR = MODELS_DIR / "audio_separator"
-TRAINING_MODELS_DIR = RVC_MODELS_DIR / "training"
-AUDIO_DIR = Path(os.getenv("URVC_AUDIO_DIR") or BASE_DIR / "audio")
-TEMP_DIR = Path(os.getenv("URVC_TEMP_DIR") or BASE_DIR / "temp")
-CONFIG_DIR = Path(os.getenv("URVC_CONFIG_DIR") or BASE_DIR / "config")
-NODE_PATH = Path(
-    (
-        os.getenv("GRADIO_NODE_PATH")
-        or (
-            VENV_DIR
-            / f"lib/python{sys.version_info.major}.{sys.version_info.minor}"
-            / "site-packages/nodejs_wheel/bin/node"
-        )
-        if sys.platform == "linux"
-        else VENV_DIR / "Lib/site-packages/nodejs_wheel/node.exe"
-    ),
-)

ultimate_rvc/rvc/__init__.py DELETED Viewed

@@ -1,4 +0,0 @@
-"""
-The rvc package is a collection of tools for voice cloning using the RVC
-method.
-"""

ultimate_rvc/rvc/configs/__init__.py DELETED Viewed

File without changes

ultimate_rvc/rvc/infer/__init__.py DELETED Viewed

File without changes

ultimate_rvc/rvc/infer/typing_extra.py DELETED Viewed

@@ -1,57 +0,0 @@
-"""Extra type definitions for the `ultimate_rvc.rvc.infer` package."""
-from typing import TypedDict
-class ConvertAudioKwArgs(TypedDict, total=False):
-    """Keyword arguments for the `convert_audio` function."""
-    # pre-processing arguments
-    formant_shifting: bool
-    formant_qfrency: float
-    formant_timbre: float
-    # reverb post-processing arguments
-    reverb: bool
-    reverb_room_size: float
-    reverb_damping: float
-    reverb_wet_level: float
-    reverb_dry_level: float
-    reverb_width: float
-    reverb_freeze_mode: int
-    # pitch shift post-processing arguments
-    pitch_shift: bool
-    pitch_shift_semitones: int
-    # limiter post-processing arguments
-    limiter: bool
-    limiter_threshold: float
-    limiter_release: float
-    # gain post-processing arguments
-    gain: bool
-    gain_db: int
-    # distortion post-processing arguments
-    distortion: bool
-    distortion_gain: int
-    # chorus post-processing arguments
-    chorus: bool
-    chorus_rate: float
-    chorus_depth: float
-    chorus_delay: int
-    chorus_feedback: float
-    chorus_mix: float
-    # bitcrush post-processing arguments
-    bitcrush: bool
-    bitcrush_bit_depth: int
-    # clipping post-processing arguments
-    clipping: bool
-    clipping_threshold: int
-    # compressor post-processing arguments
-    compressor: bool
-    compressor_threshold: int
-    compressor_ratio: int
-    compressor_attack: float
-    compressor_release: int
-    # delay post-processing arguments
-    delay: bool
-    delay_seconds: float
-    delay_feedback: float
-    delay_mix: float

ultimate_rvc/rvc/lib/__init__.py DELETED Viewed

File without changes

ultimate_rvc/rvc/lib/algorithm/__init__.py DELETED Viewed

File without changes

ultimate_rvc/rvc/lib/algorithm/generators/__init__.py DELETED Viewed

File without changes

ultimate_rvc/rvc/lib/predictors/__init__.py DELETED Viewed

File without changes

ultimate_rvc/rvc/lib/tools/__init__.py DELETED Viewed

File without changes

ultimate_rvc/typing_extra.py DELETED Viewed

@@ -1,154 +0,0 @@
-"""Extra typing for the Ultimate RVC project."""
-from __future__ import annotations
-from collections.abc import Mapping, Sequence
-from enum import IntEnum, StrEnum
-from os import PathLike
-type StrPath = str | PathLike[str]
-type Json = Mapping[str, Json] | Sequence[Json] | str | int | float | bool | None
-class SeparationModel(StrEnum):
-    """Enumeration of audio separation models."""
-    UVR_MDX_NET_VOC_FT = "UVR-MDX-NET-Voc_FT.onnx"
-    UVR_MDX_NET_KARA_2 = "UVR_MDXNET_KARA_2.onnx"
-    REVERB_HQ_BY_FOXJOY = "Reverb_HQ_By_FoxJoy.onnx"
-class SegmentSize(IntEnum):
-    """Enumeration of segment sizes for audio separation."""
-    SEG_64 = 64
-    SEG_128 = 128
-    SEG_256 = 256
-    SEG_512 = 512
-    SEG_1024 = 1024
-    SEG_2048 = 2048
-class F0Method(StrEnum):
-    """Enumeration of pitch extraction methods."""
-    RMVPE = "rmvpe"
-    CREPE = "crepe"
-    CREPE_TINY = "crepe-tiny"
-    FCPE = "fcpe"
-class EmbedderModel(StrEnum):
-    """Enumeration of audio embedding models."""
-    CONTENTVEC = "contentvec"
-    SPIN = "spin"
-    SPIN_V2 = "spin-v2"
-    CHINESE_HUBERT_BASE = "chinese-hubert-base"
-    JAPANESE_HUBERT_BASE = "japanese-hubert-base"
-    KOREAN_HUBERT_BASE = "korean-hubert-base"
-    CUSTOM = "custom"
-class RVCContentType(StrEnum):
-    """Enumeration of valid content to convert with RVC."""
-    VOCALS = "vocals"
-    VOICE = "voice"
-    SPEECH = "speech"
-    AUDIO = "audio"
-class SampleRate(IntEnum):
-    """Enumeration of supported audio sample rates."""
-    HZ_16K = 16000
-    HZ_44K = 44100
-    HZ_48K = 48000
-    HZ_96K = 96000
-    HZ_192K = 192000
-class AudioExt(StrEnum):
-    """Enumeration of supported audio file formats."""
-    MP3 = "mp3"
-    WAV = "wav"
-    FLAC = "flac"
-    OGG = "ogg"
-    M4A = "m4a"
-    AAC = "aac"
-class DeviceType(StrEnum):
-    """Enumeration of device types for training voice models."""
-    AUTOMATIC = "Automatic"
-    CPU = "CPU"
-    GPU = "GPU"
-class PrecisionType(StrEnum):
-    """Enumeration of precision types for training voice models."""
-    FP32 = "fp32"
-    FP16 = "fp16"
-    BF16 = "bf16"
-class TrainingSampleRate(IntEnum):
-    """Enumeration of sample rates for training voice models."""
-    HZ_32K = 32000
-    HZ_40K = 40000
-    HZ_48K = 48000
-class AudioSplitMethod(StrEnum):
-    """
-    Enumeration of methods to use for splitting audio files during
-    dataset preprocessing.
-    """
-    SKIP = "Skip"
-    SIMPLE = "Simple"
-    AUTOMATIC = "Automatic"
-class AudioNormalizationMode(StrEnum):
-    """
-    Enumeration of audio normalization methods during
-    dataset preprocessing.
-    """
-    NONE = "none"
-    PRE = "pre"
-    POST = "post"
-class Vocoder(StrEnum):
-    """Enumeration of vocoders for training voice models."""
-    HIFI_GAN = "HiFi-GAN"
-    MRF_HIFI_GAN = "MRF HiFi-GAN"
-    REFINE_GAN = "RefineGAN"
-class IndexAlgorithm(StrEnum):
-    """Enumeration of indexing algorithms for training voice models."""
-    AUTO = "Auto"
-    FAISS = "Faiss"
-    KMEANS = "KMeans"
-class PretrainedType(StrEnum):
-    """
-    Enumeration of the possible types of pretrained models to finetune
-    voice models on.
-    """
-    NONE = "None"
-    DEFAULT = "Default"
-    CUSTOM = "Custom"