Singhp08 commited on
Commit
b4af44f
·
verified ·
1 Parent(s): 8458a1d

Delete infer

Browse files
infer/__init__.py DELETED
@@ -1 +0,0 @@
1
- # infer package
 
 
infer/onnx_export.py DELETED
@@ -1,38 +0,0 @@
1
- import torch
2
- import os
3
-
4
- def export_to_onnx(pth_path, onnx_path):
5
- """
6
- Converts PyTorch RVC model to ONNX (basic exporter)
7
- """
8
-
9
- device = "cpu"
10
-
11
- # Load model
12
- model = torch.load(pth_path, map_location=device)
13
- model.eval()
14
-
15
- # Dummy input (audio frame simulation)
16
- dummy_input = torch.randn(1, 16000)
17
-
18
- try:
19
- torch.onnx.export(
20
- model,
21
- dummy_input,
22
- onnx_path,
23
- export_params=True,
24
- opset_version=13,
25
- do_constant_folding=True,
26
- input_names=["input"],
27
- output_names=["output"],
28
- dynamic_axes={
29
- "input": {0: "batch"},
30
- "output": {0: "batch"}
31
- }
32
- )
33
- print("ONNX export successful:", onnx_path)
34
- return True
35
-
36
- except Exception as e:
37
- print("ONNX export failed:", e)
38
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
infer/onnx_rvc.py DELETED
@@ -1,23 +0,0 @@
1
- import onnxruntime as ort
2
- import numpy as np
3
- import soundfile as sf
4
-
5
- class ONNXRVC:
6
- def __init__(self, model_path):
7
- self.session = ort.InferenceSession(model_path)
8
-
9
- def infer(self, input_audio, output_path):
10
- wav, sr = sf.read(input_audio)
11
-
12
- if wav.ndim > 1:
13
- wav = wav.mean(axis=1)
14
-
15
- wav = wav.astype(np.float32)
16
- wav = wav / (np.max(np.abs(wav)) + 1e-6)
17
-
18
- # ONNX inference
19
- ort_inputs = {"input": wav.reshape(1, -1)}
20
- output = self.session.run(None, ort_inputs)[0]
21
-
22
- sf.write(output_path, output.flatten(), sr)
23
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
infer/rvc.py DELETED
@@ -1,36 +0,0 @@
1
- import os
2
- import torch
3
- import numpy as np
4
- from .utils import load_audio, save_audio
5
-
6
- class RVCInfer:
7
- def __init__(self, model_path, index_path):
8
- self.model_path = model_path
9
- self.index_path = index_path
10
-
11
- # dummy model loader (replace with real weights later if needed)
12
- self.device = "cpu"
13
- self.model = self.load_model()
14
-
15
- def load_model(self):
16
- if os.path.exists(self.model_path):
17
- print(f"Loaded model: {self.model_path}")
18
- return None
19
-
20
- def infer(self, input_audio_path, output_path):
21
- # load audio
22
- audio, sr = load_audio(input_audio_path)
23
-
24
- # -------------------------
25
- # SIMPLIFIED PROCESSING
26
- # (HF-safe fallback conversion)
27
- # -------------------------
28
-
29
- # normalize audio
30
- audio = audio / (np.max(np.abs(audio)) + 1e-6)
31
-
32
- # fake "voice conversion effect"
33
- audio = audio * 0.9 + np.roll(audio, 1) * 0.1
34
-
35
- # save output
36
- return save_audio(output_path, audio, sr)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
infer/rvc_real.py DELETED
@@ -1,47 +0,0 @@
1
- import os
2
- import numpy as np
3
- import soundfile as sf
4
- import torch
5
- import onnxruntime
6
- import librosa
7
- import pyworld as pw
8
-
9
- class RVCReal:
10
- def __init__(self, model_path, index_path):
11
- self.model_path = model_path
12
- self.index_path = index_path
13
- self.device = "cpu"
14
- self.load()
15
-
16
- def load(self):
17
- # load torch model
18
- try:
19
- self.net = torch.jit.load(self.model_path, map_location="cpu")
20
- except Exception as e:
21
- print("Model load error:", e)
22
- self.net = None
23
-
24
- def infer(self, audio_path, output_path):
25
- # load audio
26
- wav, sr = sf.read(audio_path)
27
- if wav.ndim > 1:
28
- wav = wav.mean(axis=1)
29
-
30
- # preprocess f0
31
- _f0, t = pw.dio(wav.astype(np.float64), sr)
32
- f0 = pw.stonemask(wav.astype(np.float64), _f0, t, sr)
33
-
34
- # normalize
35
- wav = wav / np.max(np.abs(wav) + 1e-9)
36
-
37
- # dummy forward
38
- try:
39
- x = torch.from_numpy(wav).float().unsqueeze(0)
40
- y = self.net(x).squeeze().detach().numpy()
41
- except Exception as e:
42
- print("Inference error:", e)
43
- y = wav
44
-
45
- # save
46
- sf.write(output_path, y, sr)
47
- return output_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
infer/utils.py DELETED
@@ -1,21 +0,0 @@
1
- import numpy as np
2
- import soundfile as sf
3
- import torch
4
-
5
- def load_audio(file_path, sr=16000):
6
- audio, sr = sf.read(file_path)
7
-
8
- if len(audio.shape) > 1:
9
- audio = audio.mean(axis=1)
10
-
11
- return audio, sr
12
-
13
-
14
- def save_audio(path, audio, sr):
15
- audio = np.asarray(audio)
16
- sf.write(path, audio, sr)
17
- return path
18
-
19
-
20
- def to_tensor(audio):
21
- return torch.FloatTensor(audio)