Approximetal commited on
Commit
9f66cd3
·
verified ·
1 Parent(s): 661ef4d

Update inference_gradio.py

Browse files
Files changed (1) hide show
  1. inference_gradio.py +42 -25
inference_gradio.py CHANGED
@@ -42,26 +42,24 @@ os.environ["ESPEAKNG_DATA_PATH"] = str(ESPEAK_DATA_DIR)
42
  class UVR5:
43
  """Small wrapper around the bundled uvr5 implementation for denoising."""
44
 
45
- def __init__(self, model_dir: Path, code_dir: Path):
46
- # Keep paths as strings; actual model is loaded lazily.
47
- self.model_dir = str(model_dir)
48
- self.code_dir = str(code_dir)
49
  self.model = None
50
  self.device = "cpu"
51
-
52
- def load_model(self, device: str = "cpu"):
53
- import sys
54
- import json
55
- import torch as _torch
56
-
57
  if self.code_dir not in sys.path:
58
  sys.path.append(self.code_dir)
59
-
60
- if self.model is not None:
 
61
  return self.model
62
-
63
- from multiprocess_cuda_infer import ModelData, Inference
64
 
 
 
65
  model_path = os.path.join(self.model_dir, "Kim_Vocal_1.onnx")
66
  config_path = os.path.join(self.model_dir, "MDX-Net-Kim-Vocal1.json")
67
  with open(config_path, "r", encoding="utf-8") as f:
@@ -70,24 +68,43 @@ class UVR5:
70
  model_path=model_path,
71
  audio_path=self.model_dir,
72
  result_path=self.model_dir,
73
- device="cpu",
74
  process_method="MDX-Net",
75
- # keep base_dir and model_dir the same (paths under `pretrained_models`)
 
 
76
  base_dir=self.model_dir,
77
  **configs,
78
  )
79
 
80
- uvr5_model = Inference(model_data, "cpu")
81
- self.model = uvr5_model.load_model(model_path, 1, device="cpu")
82
- self.device = "cpu"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  return self.model
84
-
85
  def denoise(self, audio_info):
86
- print("denoise UVR5: ", audio_info)
87
- # # On Spaces, force CPU; locally prefer CUDA if available.
88
- self.model = self.load_model()
 
89
  input_audio = load_wav(audio_info, sr=44100, channel=2)
90
- output_audio = self.model.demix_base({0: input_audio.squeeze()}, is_match_mix=False, device="cpu")
 
 
91
  return output_audio.squeeze().T.cpu().numpy(), 44100
92
 
93
 
@@ -207,7 +224,7 @@ def get_available_projects():
207
  def infer(
208
  project, file_checkpoint, exp_name, ref_text, ref_audio, denoise_audio, gen_text, nfe_step, use_ema, separate_langs, frontend, speed, cfg_strength, use_acc_grl, ref_ratio, no_ref_audio, sway_sampling_coef, use_prosody_encoder, seed
209
  ):
210
- global tts_api, last_ema
211
 
212
  # Resolve checkpoint path (local or HF URL)
213
  ckpt_path = file_checkpoint
 
42
  class UVR5:
43
  """Small wrapper around the bundled uvr5 implementation for denoising."""
44
 
45
+ def __init__(self, model_dir):
46
+ # Code directory is always the local `uvr5` folder in this repo
47
+ self.code_dir = os.path.join(os.path.dirname(__file__), "uvr5")
48
+ self.model_dir = model_dir
49
  self.model = None
50
  self.device = "cpu"
51
+
52
+ def load_model(self, device="cpu"):
53
+ import sys, json, os, torch
 
 
 
54
  if self.code_dir not in sys.path:
55
  sys.path.append(self.code_dir)
56
+
57
+ # Reuse an already-loaded model if it matches the requested device.
58
+ if self.model is not None and self.device == device:
59
  return self.model
 
 
60
 
61
+ from multiprocess_cuda_infer import ModelData, Inference
62
+ # In the minimal LEMAS-TTS layout, UVR5 weights live under:
63
  model_path = os.path.join(self.model_dir, "Kim_Vocal_1.onnx")
64
  config_path = os.path.join(self.model_dir, "MDX-Net-Kim-Vocal1.json")
65
  with open(config_path, "r", encoding="utf-8") as f:
 
68
  model_path=model_path,
69
  audio_path=self.model_dir,
70
  result_path=self.model_dir,
71
+ device=device,
72
  process_method="MDX-Net",
73
+ # Keep base_dir and model_dir the same so all UVR5 metadata
74
+ # (model_data.json, model_name_mapper.json, etc.) are resolved
75
+ # under `pretrained_models/uvr5`, matching LEMAS-TTS inference.
76
  base_dir=self.model_dir,
77
  **configs,
78
  )
79
 
80
+ uvr5_model = Inference(model_data, device)
81
+ # On HF Spaces with stateless GPU, we must not initialize CUDA in the
82
+ # main process. The heavy UVR5 loading happens lazily inside
83
+ # @spaces.GPU functions; this guard is kept only for the CPU path to
84
+ # avoid any accidental CUDA init.
85
+ if IS_SPACES and device == "cpu":
86
+ orig_is_available = torch.cuda.is_available
87
+ torch.cuda.is_available = lambda: False
88
+ try:
89
+ uvr5_model.load_model(model_path, 1)
90
+ finally:
91
+ torch.cuda.is_available = orig_is_available
92
+ else:
93
+ uvr5_model.load_model(model_path, 1)
94
+
95
+ self.model = uvr5_model
96
+ self.device = device
97
  return self.model
98
+
99
  def denoise(self, audio_info):
100
+ # Prefer GPU if available; on Spaces this runs inside @spaces.GPU so
101
+ # CUDA can be safely initialized here.
102
+ device = "cuda" if torch.cuda.is_available() else "cpu"
103
+ model = self.load_model(device=device)
104
  input_audio = load_wav(audio_info, sr=44100, channel=2)
105
+ output_audio = model.demix_base({0:input_audio.squeeze()}, is_match_mix=False, device=device)
106
+ # transform = torchaudio.transforms.Resample(44100, 16000)
107
+ # output_audio = transform(output_audio)
108
  return output_audio.squeeze().T.cpu().numpy(), 44100
109
 
110
 
 
224
  def infer(
225
  project, file_checkpoint, exp_name, ref_text, ref_audio, denoise_audio, gen_text, nfe_step, use_ema, separate_langs, frontend, speed, cfg_strength, use_acc_grl, ref_ratio, no_ref_audio, sway_sampling_coef, use_prosody_encoder, seed
226
  ):
227
+ global tts_api
228
 
229
  # Resolve checkpoint path (local or HF URL)
230
  ckpt_path = file_checkpoint