noblebarkrr commited on
Commit
a14ef04
·
verified ·
1 Parent(s): 7345263

Update infer.py

Browse files
Files changed (1) hide show
  1. infer.py +94 -127
infer.py CHANGED
@@ -1,147 +1,114 @@
1
- import asyncio
2
- import gc
3
- import os
4
- from functools import lru_cache
5
-
6
- import edge_tts
7
- import gradio as gr
8
- import numpy as np
9
  import torch
 
10
  from fairseq import checkpoint_utils
11
  from scipy.io import wavfile
12
 
13
- # Используем относительный импорт
14
- from .config import Config
15
- from .pipeline import VC
16
  from rvc.lib.algorithm.synthesizers import Synthesizer
17
  from rvc.lib.my_utils import load_audio
 
18
 
19
- # Конфигурация потоков и памяти
20
- torch.set_num_threads(4)
21
- os.environ["OMP_NUM_THREADS"] = "4"
22
- os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
23
-
24
- RVC_MODELS_DIR = os.path.join(os.getcwd(), "models", "RVC_models")
25
- EMBEDDERS_DIR = os.path.join(os.getcwd(), "rvc", "models", "embedders")
26
- HUBERT_BASE_PATH = os.path.join(EMBEDDERS_DIR, "hubert_base.pt")
27
- OUTPUT_DIR = os.path.join(os.getcwd(), "output", "RVC_output")
28
-
29
- config = Config()
30
-
31
- # Остальной
32
-
33
- # Кэшируем все тяжелые модели
34
- @lru_cache(maxsize=2)
35
- def load_hubert():
36
- models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
37
- [HUBERT_BASE_PATH], suffix=""
 
 
 
 
38
  )
39
- hubert = models[0].to(config.device).float().eval()
 
 
40
  return hubert
41
 
42
- @lru_cache(maxsize=2)
43
- def load_rvc_model(rvc_model):
44
- model_dir = os.path.join(RVC_MODELS_DIR, rvc_model)
45
- model_files = os.listdir(model_dir)
46
-
47
- model_path = next((os.path.join(model_dir, f) for f in model_files if f.endswith(".pth")), None)
48
- index_path = next((os.path.join(model_dir, f) for f in model_files if f.endswith(".index")), None)
49
-
50
- if not model_path:
51
- raise ValueError(f"Model {rvc_model} not found!")
52
-
53
  cpt = torch.load(model_path, map_location="cpu", weights_only=True)
 
 
 
 
 
 
54
  tgt_sr = cpt["config"][-1]
55
  cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
56
-
 
 
 
57
  net_g = Synthesizer(
58
- *cpt["config"],
59
- use_f0=cpt.get("f0", 1),
60
- input_dim=768 if cpt.get("version", "v1") == "v2" else 256
 
61
  )
62
- net_g.load_state_dict(cpt["weight"], strict=False)
63
- net_g = net_g.to(config.device).float().eval()
64
-
65
- return cpt, net_g, tgt_sr, index_path
66
 
 
 
 
 
 
 
 
 
 
67
  def rvc_infer(
68
- voice_rvc=None,
69
- voice_tts=None,
70
- input_audio=None,
71
- input_text=None,
72
- f0_method="rmvpe",
73
- hop_length=128,
74
- pitch=0,
75
- index_rate=0.5,
76
- volume_envelope=0.25,
77
- protect=0.33,
78
- filter_radius=3,
 
 
 
 
 
79
  f0_min=50,
80
  f0_max=1100,
81
- output_format="wav",
82
- use_tts=False,
83
- progress=gr.Progress()
84
  ):
85
- try:
86
- # Инициализация прогресса
87
- progress(0, desc="[⚙️] Инициализация...")
88
-
89
- # Загрузка моделей
90
- hubert = load_hubert()
91
- cpt, net_g, tgt_sr, index_path = load_rvc_model(voice_rvc)
92
- vc = VC(tgt_sr, config)
93
-
94
- # Обработка аудио
95
- if use_tts:
96
- progress(0.2, desc="[🎙️] Синтез речи...")
97
- input_audio = os.path.join(OUTPUT_DIR, "tts_temp.wav")
98
- asyncio.run(self.text_to_speech(input_text, voice_tts, input_audio))
99
-
100
- audio = load_audio(input_audio, 16000)
101
-
102
- # Ускоренный инференс
103
- progress(0.5, desc="[🌌] Преобразование голоса...")
104
- with torch.inference_mode():
105
- audio_opt = vc.pipeline(
106
- hubert,
107
- net_g,
108
- 0,
109
- audio,
110
- input_audio,
111
- pitch,
112
- f0_method,
113
- index_path,
114
- index_rate,
115
- cpt.get("f0", 1),
116
- filter_radius,
117
- volume_envelope,
118
- cpt.get("version", "v1"),
119
- protect,
120
- hop_length,
121
- f0_min=f0_min,
122
- f0_max=f0_max,
123
- )
124
-
125
- # Сохранение результата
126
- output_audio = os.path.join(OUTPUT_DIR, f"Voice_Converted.{output_format}")
127
- wavfile.write(output_audio, tgt_sr, audio_opt)
128
-
129
- # Оптимизированная конвертация формата
130
- if output_format != "wav":
131
- self.convert_audio_format(output_audio, output_format)
132
-
133
- # Очистка памяти
134
- del hubert, cpt, net_g, vc, audio_opt
135
- gc.collect()
136
-
137
- progress(1.0, desc=f"[✅] Готово: {output_audio}")
138
- return output_audio
139
-
140
- except Exception as e:
141
- raise gr.Error(f"Ошибка: {str(e)}")
142
-
143
- # Оптимизированная конвертация формата
144
- def convert_audio_format(input_path, output_format):
145
- import soundfile as sf
146
- data, sr = sf.read(input_path)
147
- sf.write(input_path, data, sr, format=output_format)
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ from multiprocessing import cpu_count
3
  from fairseq import checkpoint_utils
4
  from scipy.io import wavfile
5
 
 
 
 
6
  from rvc.lib.algorithm.synthesizers import Synthesizer
7
  from rvc.lib.my_utils import load_audio
8
+ from .pipeline import VC
9
 
10
+ # Конфигурация устройства и параметров
11
+ class Config:
12
+ def __init__(self):
13
+ self.device = self.get_device()
14
+ self.is_half = False # Отключаем half precision для CPU
15
+ self.n_cpu = cpu_count()
16
+ self.gpu_name = None
17
+ self.gpu_mem = None
18
+ self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
19
+
20
+ def get_device(self):
21
+ return "cpu" # Используем только CPU
22
+
23
+ def device_config(self):
24
+ print("Используется CPU")
25
+ self.device = "cpu"
26
+ self.is_half = False
27
+ return (1, 6, 38, 41) # Уменьшаем параметры для CPU
28
+
29
+ # Загрузка модели Hubert
30
+ def load_hubert(device, is_half, model_path):
31
+ models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
32
+ [model_path], suffix=""
33
  )
34
+ hubert = models[0].to(device)
35
+ hubert = hubert.float() # Используем float для CPU
36
+ hubert.eval()
37
  return hubert
38
 
39
+ # Получение голосового преобразователя
40
+ def get_vc(device, is_half, config, model_path):
 
 
 
 
 
 
 
 
 
41
  cpt = torch.load(model_path, map_location="cpu", weights_only=True)
42
+ if "config" not in cpt or "weight" not in cpt:
43
+ raise ValueError(
44
+ f"Некорректный формат для {model_path}. "
45
+ "Используйте голосовую модель, обученную с использованием RVC v2."
46
+ )
47
+
48
  tgt_sr = cpt["config"][-1]
49
  cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
50
+ pitch_guidance = cpt.get("f0", 1)
51
+ version = cpt.get("version", "v1")
52
+ input_dim = 768 if version == "v2" else 256
53
+
54
  net_g = Synthesizer(
55
+ *cpt["config"],
56
+ use_f0=pitch_guidance,
57
+ input_dim=input_dim,
58
+ is_half=is_half,
59
  )
 
 
 
 
60
 
61
+ del net_g.enc_q
62
+ print(net_g.load_state_dict(cpt["weight"], strict=False))
63
+ net_g.eval().to(device)
64
+ net_g = net_g.float() # Используем float для CPU
65
+
66
+ vc = VC(tgt_sr, config)
67
+ return cpt, version, net_g, tgt_sr, vc
68
+
69
+ # Выполнение инференса с использованием RVC
70
  def rvc_infer(
71
+ index_path,
72
+ index_rate,
73
+ input_path,
74
+ output_path,
75
+ pitch,
76
+ f0_method,
77
+ cpt,
78
+ version,
79
+ net_g,
80
+ filter_radius,
81
+ tgt_sr,
82
+ volume_envelope,
83
+ protect,
84
+ hop_length,
85
+ vc,
86
+ hubert_model,
87
  f0_min=50,
88
  f0_max=1100,
 
 
 
89
  ):
90
+ audio = load_audio(input_path, 16000)
91
+ pitch_guidance = cpt.get("f0", 1)
92
+ audio_opt = vc.pipeline(
93
+ hubert_model,
94
+ net_g,
95
+ 0,
96
+ audio,
97
+ input_path,
98
+ pitch,
99
+ f0_method,
100
+ index_path,
101
+ index_rate,
102
+ pitch_guidance,
103
+ filter_radius,
104
+ tgt_sr,
105
+ 0,
106
+ volume_envelope,
107
+ version,
108
+ protect,
109
+ hop_length,
110
+ f0_file=None,
111
+ f0_min=f0_min,
112
+ f0_max=f0_max,
113
+ )
114
+ wavfile.write(output_path, tgt_sr, audio_opt)