NeoPy's picture
EXP
05aac64 verified
raw
history blame
4.26 kB
import os
import gc
import sys
import torch
import codecs
import librosa
import requests
import numpy as np
import soundfile as sf
import torch.nn.functional as F
sys.path.append(os.getcwd())
from modules import opencl
def change_rms(source_audio, source_rate, target_audio, target_rate, rate):
rms2 = F.interpolate(torch.from_numpy(librosa.feature.rms(y=target_audio, frame_length=target_rate // 2 * 2, hop_length=target_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze()
return (target_audio * (torch.pow(F.interpolate(torch.from_numpy(librosa.feature.rms(y=source_audio, frame_length=source_rate // 2 * 2, hop_length=source_rate // 2)).float().unsqueeze(0), size=target_audio.shape[0], mode="linear").squeeze(), 1 - rate) * torch.pow(torch.maximum(rms2, torch.zeros_like(rms2) + 1e-6), rate - 1)).numpy())
def clear_gpu_cache():
gc.collect()
if torch.cuda.is_available(): torch.cuda.empty_cache()
elif torch.backends.mps.is_available(): torch.mps.empty_cache()
elif opencl.is_available(): opencl.pytorch_ocl.empty_cache()
def HF_download_file(url, output_path=None):
url = url.replace("/blob/", "/resolve/").replace("?download=true", "").strip()
output_path = os.path.basename(url) if output_path is None else (os.path.join(output_path, os.path.basename(url)) if os.path.isdir(output_path) else output_path)
response = requests.get(url, stream=True, timeout=300)
if response.status_code == 200:
with open(output_path, "wb") as f:
for chunk in response.iter_content(chunk_size=10 * 1024 * 1024):
f.write(chunk)
return output_path
else: raise ValueError(response.status_code)
def check_predictors(method):
def download(predictors):
if not os.path.exists(os.path.join("models", predictors)):
HF_download_file(codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/cerqvpgbef/", "rot13") + predictors, os.path.join("models", predictors))
model_dict = {
**dict.fromkeys(["rmvpe", "rmvpe-legacy"], "rmvpe.pt"),
**dict.fromkeys(["fcpe"], "fcpe.pt"),
**dict.fromkeys(["fcpe-legacy"], "fcpe_legacy.pt"),
**dict.fromkeys(["crepe-full", "mangio-crepe-full"], "crepe_full.pth"),
**dict.fromkeys(["crepe-large", "mangio-crepe-large"], "crepe_large.pth"),
**dict.fromkeys(["crepe-medium", "mangio-crepe-medium"], "crepe_medium.pth"),
**dict.fromkeys(["crepe-small", "mangio-crepe-small"], "crepe_small.pth"),
**dict.fromkeys(["crepe-tiny", "mangio-crepe-tiny"], "crepe_tiny.pth"),
}
if method in model_dict: download(model_dict[method])
def check_embedders(hubert):
if hubert in ["contentvec_base", "hubert_base", "japanese_hubert_base", "korean_hubert_base", "chinese_hubert_base", "portuguese_hubert_base", "spin"]:
hubert += ".pt"
model_path = os.path.join("models", hubert)
if not os.path.exists(model_path):
HF_download_file("".join([codecs.decode("uggcf://uhttvatsnpr.pb/NauC/Ivrganzrfr-EIP-Cebwrpg/erfbyir/znva/rzorqqref/", "rot13"), "fairseq/", hubert]), model_path)
def load_audio(file, sample_rate=16000):
try:
file = file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
if not os.path.isfile(file): raise FileNotFoundError(f"[ERROR] Not found audio: {file}")
try:
audio, sr = sf.read(file, dtype=np.float32)
except:
audio, sr = librosa.load(file, sr=None)
if len(audio.shape) > 1: audio = librosa.to_mono(audio.T)
if sr != sample_rate: audio = librosa.resample(audio, orig_sr=sr, target_sr=sample_rate, res_type="soxr_vhq")
except Exception as e:
raise RuntimeError(f"[ERROR] Error reading audio file: {e}")
return audio.flatten()
class Autotune:
def __init__(self, ref_freqs):
self.ref_freqs = ref_freqs
self.note_dict = self.ref_freqs
def autotune_f0(self, f0, f0_autotune_strength):
autotuned_f0 = np.zeros_like(f0)
for i, freq in enumerate(f0):
autotuned_f0[i] = freq + (min(self.note_dict, key=lambda x: abs(x - freq)) - freq) * f0_autotune_strength
return autotuned_f0