| import logging |
| import os |
| import pathlib |
| import re |
| import sys |
| import unicodedata |
| import warnings |
|
|
| import soxr |
|
|
| import wget |
|
|
| import numpy as np |
|
|
| from torch import nn |
| from transformers import HubertModel |
|
|
| import librosa |
| import soundfile as sf |
|
|
| from lib.rvc.common import RVC_MODELS_DIR |
|
|
| |
| warnings.filterwarnings("ignore") |
|
|
| logging.getLogger("fairseq").setLevel(logging.ERROR) |
| logging.getLogger("faiss.loader").setLevel(logging.ERROR) |
| logging.getLogger("transformers").setLevel(logging.ERROR) |
| logging.getLogger("torch").setLevel(logging.ERROR) |
|
|
| now_dir = pathlib.Path.cwd() |
| sys.path.append(str(now_dir)) |
|
|
| base_path = os.path.join(str(RVC_MODELS_DIR), "formant", "stftpitchshift") |
| stft = base_path + ".exe" if sys.platform == "win32" else base_path |
|
|
|
|
| class HubertModelWithFinalProj(HubertModel): |
| def __init__(self, config): |
| super().__init__(config) |
| self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size) |
|
|
|
|
| def load_audio_16k(file): |
| |
| try: |
| audio, sr = librosa.load(file, sr=16000) |
| except Exception as error: |
| raise RuntimeError(f"An error occurred loading the audio: {error}") |
|
|
| return audio.flatten() |
|
|
|
|
| def load_audio(file, sample_rate): |
| try: |
| file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
| audio, sr = sf.read(file) |
| if len(audio.shape) > 1: |
| audio = librosa.to_mono(audio.T) |
| if sr != sample_rate: |
| audio = librosa.resample( |
| audio, |
| orig_sr=sr, |
| target_sr=sample_rate, |
| res_type="soxr_vhq", |
| ) |
| except Exception as error: |
| raise RuntimeError(f"An error occurred loading the audio: {error}") |
|
|
| return audio.flatten() |
|
|
|
|
| def load_audio_infer( |
| file, |
| sample_rate, |
| **kwargs, |
| ): |
| formant_shifting = kwargs.get("formant_shifting", False) |
| try: |
| file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ") |
| if not pathlib.Path(file).is_file(): |
| raise FileNotFoundError(f"File not found: {file}") |
| audio, sr = sf.read(file) |
| if len(audio.shape) > 1: |
| audio = librosa.to_mono(audio.T) |
| if sr != sample_rate: |
| audio = librosa.resample( |
| audio, |
| orig_sr=sr, |
| target_sr=sample_rate, |
| res_type="soxr_vhq", |
| ) |
| if formant_shifting: |
| formant_qfrency = kwargs.get("formant_qfrency", 0.8) |
| formant_timbre = kwargs.get("formant_timbre", 0.8) |
|
|
| from stftpitchshift import StftPitchShift |
|
|
| pitchshifter = StftPitchShift(1024, 32, sample_rate) |
| audio = pitchshifter.shiftpitch( |
| audio, |
| factors=1, |
| quefrency=formant_qfrency * 1e-3, |
| distortion=formant_timbre, |
| ) |
| except Exception as error: |
| raise RuntimeError(f"An error occurred loading the audio: {error}") |
| return np.array(audio).flatten() |
|
|
|
|
| def format_title(title): |
| formatted_title = unicodedata.normalize("NFC", title) |
| formatted_title = re.sub(r"[\u2500-\u257F]+", "", formatted_title) |
| formatted_title = re.sub(r"[^\w\s.-]", "", formatted_title, flags=re.UNICODE) |
| formatted_title = re.sub(r"\s+", "_", formatted_title) |
| return formatted_title |
|
|
|
|
| def load_embedding(embedder_model, custom_embedder=None): |
| embedder_root = os.path.join(str(RVC_MODELS_DIR), "embedders") |
| embedding_list = { |
| "contentvec": os.path.join(embedder_root, "contentvec"), |
| "spin": os.path.join(embedder_root, "spin"), |
| "spin-v2": os.path.join(embedder_root, "spin-v2"), |
| "chinese-hubert-base": os.path.join(embedder_root, "chinese_hubert_base"), |
| "japanese-hubert-base": os.path.join(embedder_root, "japanese_hubert_base"), |
| "korean-hubert-base": os.path.join(embedder_root, "korean_hubert_base"), |
| } |
|
|
| online_embedders = { |
| "contentvec": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/contentvec/pytorch_model.bin" |
| ), |
| "spin": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin/pytorch_model.bin" |
| ), |
| "spin-v2": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin-v2/pytorch_model.bin" |
| ), |
| "chinese-hubert-base": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/chinese_hubert_base/pytorch_model.bin" |
| ), |
| "japanese-hubert-base": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/japanese_hubert_base/pytorch_model.bin" |
| ), |
| "korean-hubert-base": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/korean_hubert_base/pytorch_model.bin" |
| ), |
| } |
|
|
| config_files = { |
| "contentvec": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/contentvec/config.json" |
| ), |
| "spin": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin/config.json" |
| ), |
| "spin-v2": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin-v2/config.json" |
| ), |
| "chinese-hubert-base": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/chinese_hubert_base/config.json" |
| ), |
| "japanese-hubert-base": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/japanese_hubert_base/config.json" |
| ), |
| "korean-hubert-base": ( |
| "https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/korean_hubert_base/config.json" |
| ), |
| } |
|
|
| if embedder_model == "custom": |
| if pathlib.Path(custom_embedder).exists(): |
| model_path = custom_embedder |
| else: |
| print(f"Custom embedder not found: {custom_embedder}, using contentvec") |
| model_path = embedding_list["contentvec"] |
| else: |
| model_path = embedding_list[embedder_model] |
| bin_file = os.path.join(model_path, "pytorch_model.bin") |
| json_file = os.path.join(model_path, "config.json") |
| pathlib.Path(model_path).mkdir(exist_ok=True, parents=True) |
| if not pathlib.Path(bin_file).exists(): |
| url = online_embedders[embedder_model] |
| print(f"Downloading {url} to {model_path}...") |
| wget.download(url, out=bin_file) |
| if not pathlib.Path(json_file).exists(): |
| url = config_files[embedder_model] |
| print(f"Downloading {url} to {model_path}...") |
| wget.download(url, out=json_file) |
|
|
| models = HubertModelWithFinalProj.from_pretrained(model_path) |
| return models |
|
|