RVC

Running

RVC / lib /rvc /utils.py

ozipoetra

refactor: create own RVC library from ultimate_rvc

9937fea about 2 months ago

7.17 kB

	import logging
	import os
	import pathlib
	import re
	import sys
	import unicodedata
	import warnings

	import soxr

	import wget

	import numpy as np

	from torch import nn
	from transformers import HubertModel

	import librosa
	import soundfile as sf

	from lib.rvc.common import RVC_MODELS_DIR

	# Remove this to see warnings about transformers models
	warnings.filterwarnings("ignore")

	logging.getLogger("fairseq").setLevel(logging.ERROR)
	logging.getLogger("faiss.loader").setLevel(logging.ERROR)
	logging.getLogger("transformers").setLevel(logging.ERROR)
	logging.getLogger("torch").setLevel(logging.ERROR)

	now_dir = pathlib.Path.cwd()
	sys.path.append(str(now_dir))

	base_path = os.path.join(str(RVC_MODELS_DIR), "formant", "stftpitchshift")
	stft = base_path + ".exe" if sys.platform == "win32" else base_path


	class HubertModelWithFinalProj(HubertModel):
	def __init__(self, config):
	super().__init__(config)
	self.final_proj = nn.Linear(config.hidden_size, config.classifier_proj_size)


	def load_audio_16k(file):
	# this is used by f0 and feature extractions that load preprocessed 16k files, so there's no need to resample
	try:
	audio, sr = librosa.load(file, sr=16000)
	except Exception as error:
	raise RuntimeError(f"An error occurred loading the audio: {error}")

	return audio.flatten()


	def load_audio(file, sample_rate):
	try:
	file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
	audio, sr = sf.read(file)
	if len(audio.shape) > 1:
	audio = librosa.to_mono(audio.T)
	if sr != sample_rate:
	audio = librosa.resample(
	audio,
	orig_sr=sr,
	target_sr=sample_rate,
	res_type="soxr_vhq",
	)
	except Exception as error:
	raise RuntimeError(f"An error occurred loading the audio: {error}")

	return audio.flatten()


	def load_audio_infer(
	file,
	sample_rate,
	**kwargs,
	):
	formant_shifting = kwargs.get("formant_shifting", False)
	try:
	file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
	if not pathlib.Path(file).is_file():
	raise FileNotFoundError(f"File not found: {file}")
	audio, sr = sf.read(file)
	if len(audio.shape) > 1:
	audio = librosa.to_mono(audio.T)
	if sr != sample_rate:
	audio = librosa.resample(
	audio,
	orig_sr=sr,
	target_sr=sample_rate,
	res_type="soxr_vhq",
	)
	if formant_shifting:
	formant_qfrency = kwargs.get("formant_qfrency", 0.8)
	formant_timbre = kwargs.get("formant_timbre", 0.8)

	from stftpitchshift import StftPitchShift

	pitchshifter = StftPitchShift(1024, 32, sample_rate)
	audio = pitchshifter.shiftpitch(
	audio,
	factors=1,
	quefrency=formant_qfrency * 1e-3,
	distortion=formant_timbre,
	)
	except Exception as error:
	raise RuntimeError(f"An error occurred loading the audio: {error}")
	return np.array(audio).flatten()


	def format_title(title):
	formatted_title = unicodedata.normalize("NFC", title)
	formatted_title = re.sub(r"[\u2500-\u257F]+", "", formatted_title)
	formatted_title = re.sub(r"[^\w\s.-]", "", formatted_title, flags=re.UNICODE)
	formatted_title = re.sub(r"\s+", "_", formatted_title)
	return formatted_title


	def load_embedding(embedder_model, custom_embedder=None):
	embedder_root = os.path.join(str(RVC_MODELS_DIR), "embedders")
	embedding_list = {
	"contentvec": os.path.join(embedder_root, "contentvec"),
	"spin": os.path.join(embedder_root, "spin"),
	"spin-v2": os.path.join(embedder_root, "spin-v2"),
	"chinese-hubert-base": os.path.join(embedder_root, "chinese_hubert_base"),
	"japanese-hubert-base": os.path.join(embedder_root, "japanese_hubert_base"),
	"korean-hubert-base": os.path.join(embedder_root, "korean_hubert_base"),
	}

	online_embedders = {
	"contentvec": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/contentvec/pytorch_model.bin"
	),
	"spin": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin/pytorch_model.bin"
	),
	"spin-v2": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin-v2/pytorch_model.bin"
	),
	"chinese-hubert-base": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/chinese_hubert_base/pytorch_model.bin"
	),
	"japanese-hubert-base": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/japanese_hubert_base/pytorch_model.bin"
	),
	"korean-hubert-base": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/korean_hubert_base/pytorch_model.bin"
	),
	}

	config_files = {
	"contentvec": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/contentvec/config.json"
	),
	"spin": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin/config.json"
	),
	"spin-v2": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/spin-v2/config.json"
	),
	"chinese-hubert-base": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/chinese_hubert_base/config.json"
	),
	"japanese-hubert-base": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/japanese_hubert_base/config.json"
	),
	"korean-hubert-base": (
	"https://huggingface.co/JackismyShephard/ultimate-rvc/resolve/main/Resources/embedders/korean_hubert_base/config.json"
	),
	}

	if embedder_model == "custom":
	if pathlib.Path(custom_embedder).exists():
	model_path = custom_embedder
	else:
	print(f"Custom embedder not found: {custom_embedder}, using contentvec")
	model_path = embedding_list["contentvec"]
	else:
	model_path = embedding_list[embedder_model]
	bin_file = os.path.join(model_path, "pytorch_model.bin")
	json_file = os.path.join(model_path, "config.json")
	pathlib.Path(model_path).mkdir(exist_ok=True, parents=True)
	if not pathlib.Path(bin_file).exists():
	url = online_embedders[embedder_model]
	print(f"Downloading {url} to {model_path}...")
	wget.download(url, out=bin_file)
	if not pathlib.Path(json_file).exists():
	url = config_files[embedder_model]
	print(f"Downloading {url} to {model_path}...")
	wget.download(url, out=json_file)

	models = HubertModelWithFinalProj.from_pretrained(model_path)
	return models