Spaces:
Running
Running
Delete infer
Browse files- infer/__init__.py +0 -1
- infer/onnx_export.py +0 -38
- infer/onnx_rvc.py +0 -23
- infer/rvc.py +0 -36
- infer/rvc_real.py +0 -47
- infer/utils.py +0 -21
infer/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
# infer package
|
|
|
|
|
|
infer/onnx_export.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import os
|
| 3 |
-
|
| 4 |
-
def export_to_onnx(pth_path, onnx_path):
|
| 5 |
-
"""
|
| 6 |
-
Converts PyTorch RVC model to ONNX (basic exporter)
|
| 7 |
-
"""
|
| 8 |
-
|
| 9 |
-
device = "cpu"
|
| 10 |
-
|
| 11 |
-
# Load model
|
| 12 |
-
model = torch.load(pth_path, map_location=device)
|
| 13 |
-
model.eval()
|
| 14 |
-
|
| 15 |
-
# Dummy input (audio frame simulation)
|
| 16 |
-
dummy_input = torch.randn(1, 16000)
|
| 17 |
-
|
| 18 |
-
try:
|
| 19 |
-
torch.onnx.export(
|
| 20 |
-
model,
|
| 21 |
-
dummy_input,
|
| 22 |
-
onnx_path,
|
| 23 |
-
export_params=True,
|
| 24 |
-
opset_version=13,
|
| 25 |
-
do_constant_folding=True,
|
| 26 |
-
input_names=["input"],
|
| 27 |
-
output_names=["output"],
|
| 28 |
-
dynamic_axes={
|
| 29 |
-
"input": {0: "batch"},
|
| 30 |
-
"output": {0: "batch"}
|
| 31 |
-
}
|
| 32 |
-
)
|
| 33 |
-
print("ONNX export successful:", onnx_path)
|
| 34 |
-
return True
|
| 35 |
-
|
| 36 |
-
except Exception as e:
|
| 37 |
-
print("ONNX export failed:", e)
|
| 38 |
-
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/onnx_rvc.py
DELETED
|
@@ -1,23 +0,0 @@
|
|
| 1 |
-
import onnxruntime as ort
|
| 2 |
-
import numpy as np
|
| 3 |
-
import soundfile as sf
|
| 4 |
-
|
| 5 |
-
class ONNXRVC:
|
| 6 |
-
def __init__(self, model_path):
|
| 7 |
-
self.session = ort.InferenceSession(model_path)
|
| 8 |
-
|
| 9 |
-
def infer(self, input_audio, output_path):
|
| 10 |
-
wav, sr = sf.read(input_audio)
|
| 11 |
-
|
| 12 |
-
if wav.ndim > 1:
|
| 13 |
-
wav = wav.mean(axis=1)
|
| 14 |
-
|
| 15 |
-
wav = wav.astype(np.float32)
|
| 16 |
-
wav = wav / (np.max(np.abs(wav)) + 1e-6)
|
| 17 |
-
|
| 18 |
-
# ONNX inference
|
| 19 |
-
ort_inputs = {"input": wav.reshape(1, -1)}
|
| 20 |
-
output = self.session.run(None, ort_inputs)[0]
|
| 21 |
-
|
| 22 |
-
sf.write(output_path, output.flatten(), sr)
|
| 23 |
-
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/rvc.py
DELETED
|
@@ -1,36 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import torch
|
| 3 |
-
import numpy as np
|
| 4 |
-
from .utils import load_audio, save_audio
|
| 5 |
-
|
| 6 |
-
class RVCInfer:
|
| 7 |
-
def __init__(self, model_path, index_path):
|
| 8 |
-
self.model_path = model_path
|
| 9 |
-
self.index_path = index_path
|
| 10 |
-
|
| 11 |
-
# dummy model loader (replace with real weights later if needed)
|
| 12 |
-
self.device = "cpu"
|
| 13 |
-
self.model = self.load_model()
|
| 14 |
-
|
| 15 |
-
def load_model(self):
|
| 16 |
-
if os.path.exists(self.model_path):
|
| 17 |
-
print(f"Loaded model: {self.model_path}")
|
| 18 |
-
return None
|
| 19 |
-
|
| 20 |
-
def infer(self, input_audio_path, output_path):
|
| 21 |
-
# load audio
|
| 22 |
-
audio, sr = load_audio(input_audio_path)
|
| 23 |
-
|
| 24 |
-
# -------------------------
|
| 25 |
-
# SIMPLIFIED PROCESSING
|
| 26 |
-
# (HF-safe fallback conversion)
|
| 27 |
-
# -------------------------
|
| 28 |
-
|
| 29 |
-
# normalize audio
|
| 30 |
-
audio = audio / (np.max(np.abs(audio)) + 1e-6)
|
| 31 |
-
|
| 32 |
-
# fake "voice conversion effect"
|
| 33 |
-
audio = audio * 0.9 + np.roll(audio, 1) * 0.1
|
| 34 |
-
|
| 35 |
-
# save output
|
| 36 |
-
return save_audio(output_path, audio, sr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/rvc_real.py
DELETED
|
@@ -1,47 +0,0 @@
|
|
| 1 |
-
import os
|
| 2 |
-
import numpy as np
|
| 3 |
-
import soundfile as sf
|
| 4 |
-
import torch
|
| 5 |
-
import onnxruntime
|
| 6 |
-
import librosa
|
| 7 |
-
import pyworld as pw
|
| 8 |
-
|
| 9 |
-
class RVCReal:
|
| 10 |
-
def __init__(self, model_path, index_path):
|
| 11 |
-
self.model_path = model_path
|
| 12 |
-
self.index_path = index_path
|
| 13 |
-
self.device = "cpu"
|
| 14 |
-
self.load()
|
| 15 |
-
|
| 16 |
-
def load(self):
|
| 17 |
-
# load torch model
|
| 18 |
-
try:
|
| 19 |
-
self.net = torch.jit.load(self.model_path, map_location="cpu")
|
| 20 |
-
except Exception as e:
|
| 21 |
-
print("Model load error:", e)
|
| 22 |
-
self.net = None
|
| 23 |
-
|
| 24 |
-
def infer(self, audio_path, output_path):
|
| 25 |
-
# load audio
|
| 26 |
-
wav, sr = sf.read(audio_path)
|
| 27 |
-
if wav.ndim > 1:
|
| 28 |
-
wav = wav.mean(axis=1)
|
| 29 |
-
|
| 30 |
-
# preprocess f0
|
| 31 |
-
_f0, t = pw.dio(wav.astype(np.float64), sr)
|
| 32 |
-
f0 = pw.stonemask(wav.astype(np.float64), _f0, t, sr)
|
| 33 |
-
|
| 34 |
-
# normalize
|
| 35 |
-
wav = wav / np.max(np.abs(wav) + 1e-9)
|
| 36 |
-
|
| 37 |
-
# dummy forward
|
| 38 |
-
try:
|
| 39 |
-
x = torch.from_numpy(wav).float().unsqueeze(0)
|
| 40 |
-
y = self.net(x).squeeze().detach().numpy()
|
| 41 |
-
except Exception as e:
|
| 42 |
-
print("Inference error:", e)
|
| 43 |
-
y = wav
|
| 44 |
-
|
| 45 |
-
# save
|
| 46 |
-
sf.write(output_path, y, sr)
|
| 47 |
-
return output_path
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
infer/utils.py
DELETED
|
@@ -1,21 +0,0 @@
|
|
| 1 |
-
import numpy as np
|
| 2 |
-
import soundfile as sf
|
| 3 |
-
import torch
|
| 4 |
-
|
| 5 |
-
def load_audio(file_path, sr=16000):
|
| 6 |
-
audio, sr = sf.read(file_path)
|
| 7 |
-
|
| 8 |
-
if len(audio.shape) > 1:
|
| 9 |
-
audio = audio.mean(axis=1)
|
| 10 |
-
|
| 11 |
-
return audio, sr
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
def save_audio(path, audio, sr):
|
| 15 |
-
audio = np.asarray(audio)
|
| 16 |
-
sf.write(path, audio, sr)
|
| 17 |
-
return path
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
def to_tensor(audio):
|
| 21 |
-
return torch.FloatTensor(audio)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|