Approximetal commited on
Commit
c232233
·
verified ·
1 Parent(s): 43f8341

Update inference_gradio.py

Browse files
Files changed (1) hide show
  1. inference_gradio.py +69 -32
inference_gradio.py CHANGED
@@ -22,27 +22,32 @@ last_checkpoint = ""
22
  last_device = ""
23
  last_ema = None
24
 
 
 
 
25
  # Device detection
26
- device = (
27
- "cuda"
28
- if torch.cuda.is_available()
29
- else "xpu"
30
- if torch.xpu.is_available()
31
- else "mps"
32
- if torch.backends.mps.is_available()
33
- else "cpu"
34
- )
 
 
 
 
35
 
36
  REPO_ROOT = Path(__file__).resolve().parent
37
 
38
  # HF location for large TTS checkpoints (too big for Space storage)
39
  HF_PRETRAINED_ROOT = "hf://LEMAS-Project/LEMAS-TTS/pretrained_models"
40
 
41
- # # 1) 指向 `pretrained_models` 里的 libespeak-ng.so(本地路径)
42
- # ESPEAK_LIB = Path(PRETRAINED_ROOT) / "espeak-ng-lib" / "libespeak-ng.so"
43
- # os.environ["PHONEMIZER_ESPEAK_LIBRARY"] = str(ESPEAK_LIB)
44
-
45
- # 2) 指向 `pretrained_models` 里的 espeak-ng-data(本地路径)
46
  ESPEAK_DATA_DIR = Path(PRETRAINED_ROOT) / "espeak-ng-data"
47
  os.environ["ESPEAK_DATA_PATH"] = str(ESPEAK_DATA_DIR)
48
  os.environ["ESPEAKNG_DATA_PATH"] = str(ESPEAK_DATA_DIR)
@@ -52,44 +57,76 @@ class UVR5:
52
  """Small wrapper around the bundled uvr5 implementation for denoising."""
53
 
54
  def __init__(self, model_dir: Path, code_dir: Path):
55
- self.model = self.load_model(str(model_dir), str(code_dir))
 
 
 
 
56
 
57
- def load_model(self, model_dir: str, code_dir: str):
58
  import sys
59
  import json
 
60
 
61
- if code_dir not in sys.path:
62
- sys.path.append(code_dir)
 
 
 
 
63
 
64
  from multiprocess_cuda_infer import ModelData, Inference
65
 
66
- model_path = os.path.join(model_dir, "Kim_Vocal_1.onnx")
67
- config_path = os.path.join(model_dir, "MDX-Net-Kim-Vocal1.json")
68
  with open(config_path, "r", encoding="utf-8") as f:
69
  configs = json.load(f)
70
  model_data = ModelData(
71
  model_path=model_path,
72
- audio_path=model_dir,
73
- result_path=model_dir,
74
- device="cpu",
75
  process_method="MDX-Net",
76
- base_dir=model_dir, # keep base_dir and model_dir the same (paths under `pretrained_models`)
 
77
  **configs,
78
  )
79
 
80
- uvr5_model = Inference(model_data, "cpu")
81
- uvr5_model.load_model(model_path, 1)
82
- return uvr5_model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  def denoise(self, audio_info):
85
  print("denoise UVR5: ", audio_info)
 
 
 
 
 
 
 
86
  input_audio = load_wav(audio_info, sr=44100, channel=2)
87
- output_audio = self.model.demix_base({0: input_audio.squeeze()}, is_match_mix=False)
88
- return output_audio.squeeze().T.numpy(), 44100
 
89
 
90
  denoise_model = UVR5(
91
- model_dir=str(Path(PRETRAINED_ROOT) / "uvr5"),
92
- code_dir=str(REPO_ROOT / "uvr5"),
93
  )
94
 
95
  def load_wav(audio_info, sr=16000, channel=1):
@@ -194,7 +231,7 @@ def get_available_projects():
194
  # Fallback: if no local data dir, default to known HF projects
195
  if not project_list:
196
  project_list = ["multilingual_grl", "multilingual_prosody"]
197
- project_list.sort(reverse=True)
198
  print("project_list:", project_list)
199
  return project_list
200
 
 
22
  last_device = ""
23
  last_ema = None
24
 
25
+ # Detect whether we are running inside a HF Space with stateless GPU.
26
+ IS_SPACES = os.getenv("SYSTEM") == "spaces"
27
+
28
  # Device detection
29
+ if IS_SPACES:
30
+ # On Spaces main process we must not initialize CUDA; keep TTS on CPU.
31
+ device = "cpu"
32
+ else:
33
+ device = (
34
+ "cuda"
35
+ if torch.cuda.is_available()
36
+ else "xpu"
37
+ if torch.xpu.is_available()
38
+ else "mps"
39
+ if torch.backends.mps.is_available()
40
+ else "cpu"
41
+ )
42
 
43
  REPO_ROOT = Path(__file__).resolve().parent
44
 
45
  # HF location for large TTS checkpoints (too big for Space storage)
46
  HF_PRETRAINED_ROOT = "hf://LEMAS-Project/LEMAS-TTS/pretrained_models"
47
 
48
+ # 指向 `pretrained_models` 里的 espeak-ng-data(本地自带的字典)
49
+ # 动态库交给系统安装的 espeak-ng 来提供(通过 apt),不强行指定 PHONEMIZER_ESPEAK_LIBRARY,
50
+ # 避免本地复制的 .so Space 基础镜像不兼容。
 
 
51
  ESPEAK_DATA_DIR = Path(PRETRAINED_ROOT) / "espeak-ng-data"
52
  os.environ["ESPEAK_DATA_PATH"] = str(ESPEAK_DATA_DIR)
53
  os.environ["ESPEAKNG_DATA_PATH"] = str(ESPEAK_DATA_DIR)
 
57
  """Small wrapper around the bundled uvr5 implementation for denoising."""
58
 
59
  def __init__(self, model_dir: Path, code_dir: Path):
60
+ # Keep paths as strings; actual model is loaded lazily.
61
+ self.model_dir = str(model_dir)
62
+ self.code_dir = str(code_dir)
63
+ self.model = None
64
+ self.device = "cpu"
65
 
66
+ def load_model(self, device: str = "cpu"):
67
  import sys
68
  import json
69
+ import torch as _torch
70
 
71
+ if self.code_dir not in sys.path:
72
+ sys.path.append(self.code_dir)
73
+
74
+ # Reuse an already-loaded model if it matches the requested device.
75
+ if self.model is not None and self.device == device:
76
+ return self.model
77
 
78
  from multiprocess_cuda_infer import ModelData, Inference
79
 
80
+ model_path = os.path.join(self.model_dir, "Kim_Vocal_1.onnx")
81
+ config_path = os.path.join(self.model_dir, "MDX-Net-Kim-Vocal1.json")
82
  with open(config_path, "r", encoding="utf-8") as f:
83
  configs = json.load(f)
84
  model_data = ModelData(
85
  model_path=model_path,
86
+ audio_path=self.model_dir,
87
+ result_path=self.model_dir,
88
+ device=device,
89
  process_method="MDX-Net",
90
+ # keep base_dir and model_dir the same (paths under `pretrained_models`)
91
+ base_dir=self.model_dir,
92
  **configs,
93
  )
94
 
95
+ uvr5_model = Inference(model_data, device)
96
+ # On HF Spaces with stateless GPU, we must not initialize CUDA in the
97
+ # main process. When running there and staying on CPU, temporarily
98
+ # spoof torch.cuda.is_available() so UVR5 never touches CUDA APIs.
99
+ if IS_SPACES and device == "cpu":
100
+ orig_is_available = _torch.cuda.is_available
101
+ _torch.cuda.is_available = lambda: False
102
+ try:
103
+ uvr5_model.load_model(model_path, 1)
104
+ finally:
105
+ _torch.cuda.is_available = orig_is_available
106
+ else:
107
+ uvr5_model.load_model(model_path, 1)
108
+
109
+ self.model = uvr5_model
110
+ self.device = device
111
+ return self.model
112
 
113
  def denoise(self, audio_info):
114
  print("denoise UVR5: ", audio_info)
115
+ # On Spaces, force CPU; locally prefer CUDA if available.
116
+ if IS_SPACES:
117
+ dev = "cpu"
118
+ else:
119
+ dev = "cuda" if torch.cuda.is_available() else "cpu"
120
+ model = self.load_model(device=dev)
121
+
122
  input_audio = load_wav(audio_info, sr=44100, channel=2)
123
+ output_audio = model.demix_base({0: input_audio.squeeze()}, is_match_mix=False, device=dev)
124
+ return output_audio.squeeze().T.cpu().numpy(), 44100
125
+
126
 
127
  denoise_model = UVR5(
128
+ model_dir=Path(PRETRAINED_ROOT) / "uvr5",
129
+ code_dir=REPO_ROOT / "uvr5",
130
  )
131
 
132
  def load_wav(audio_info, sr=16000, channel=1):
 
231
  # Fallback: if no local data dir, default to known HF projects
232
  if not project_list:
233
  project_list = ["multilingual_grl", "multilingual_prosody"]
234
+ project_list.sort()
235
  print("project_list:", project_list)
236
  return project_list
237