Spaces:

noblebarkrr
/

vbach_lite_ui

Sleeping

App Files Files Community

noblebarkrr commited on Feb 7, 2025

Commit

a14ef04

verified ·

1 Parent(s): 7345263

Update infer.py

Browse files

Files changed (1) hide show

infer.py +94 -127

infer.py CHANGED Viewed

@@ -1,147 +1,114 @@
-import asyncio
-import gc
-import os
-from functools import lru_cache
-import edge_tts
-import gradio as gr
-import numpy as np
 import torch
 from fairseq import checkpoint_utils
 from scipy.io import wavfile
-# Используем относительный импорт
-from .config import Config
-from .pipeline import VC
 from rvc.lib.algorithm.synthesizers import Synthesizer
 from rvc.lib.my_utils import load_audio
-# Конфигурация потоков и памяти
-torch.set_num_threads(4)
-os.environ["OMP_NUM_THREADS"] = "4"
-os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
-RVC_MODELS_DIR = os.path.join(os.getcwd(), "models", "RVC_models")
-EMBEDDERS_DIR = os.path.join(os.getcwd(), "rvc", "models", "embedders")
-HUBERT_BASE_PATH = os.path.join(EMBEDDERS_DIR, "hubert_base.pt")
-OUTPUT_DIR = os.path.join(os.getcwd(), "output", "RVC_output")
-config = Config()
-# Остальной
-# Кэшируем все тяжелые модели
-@lru_cache(maxsize=2)
-def load_hubert():
-    models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
-        [HUBERT_BASE_PATH], suffix=""
     )
-    hubert = models[0].to(config.device).float().eval()
     return hubert
-@lru_cache(maxsize=2)
-def load_rvc_model(rvc_model):
-    model_dir = os.path.join(RVC_MODELS_DIR, rvc_model)
-    model_files = os.listdir(model_dir)
-    model_path = next((os.path.join(model_dir, f) for f in model_files if f.endswith(".pth")), None)
-    index_path = next((os.path.join(model_dir, f) for f in model_files if f.endswith(".index")), None)
-    if not model_path:
-        raise ValueError(f"Model {rvc_model} not found!")
     cpt = torch.load(model_path, map_location="cpu", weights_only=True)
     tgt_sr = cpt["config"][-1]
     cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
     net_g = Synthesizer(
-        *cpt["config"],
-        use_f0=cpt.get("f0", 1),
-        input_dim=768 if cpt.get("version", "v1") == "v2" else 256
     )
-    net_g.load_state_dict(cpt["weight"], strict=False)
-    net_g = net_g.to(config.device).float().eval()
-    return cpt, net_g, tgt_sr, index_path
 def rvc_infer(
-    voice_rvc=None,
-    voice_tts=None,
-    input_audio=None,
-    input_text=None,
-    f0_method="rmvpe",
-    hop_length=128,
-    pitch=0,
-    index_rate=0.5,
-    volume_envelope=0.25,
-    protect=0.33,
-    filter_radius=3,
     f0_min=50,
     f0_max=1100,
-    output_format="wav",
-    use_tts=False,
-    progress=gr.Progress()
 ):
-    try:
-        # Инициализация прогресса
-        progress(0, desc="[⚙️] Инициализация...")
-        # Загрузка моделей
-        hubert = load_hubert()
-        cpt, net_g, tgt_sr, index_path = load_rvc_model(voice_rvc)
-        vc = VC(tgt_sr, config)
-        # Обработка аудио
-        if use_tts:
-            progress(0.2, desc="[🎙️] Синтез речи...")
-            input_audio = os.path.join(OUTPUT_DIR, "tts_temp.wav")
-            asyncio.run(self.text_to_speech(input_text, voice_tts, input_audio))
-        audio = load_audio(input_audio, 16000)
-        # Ускоренный инференс
-        progress(0.5, desc="[🌌] Преобразование голоса...")
-        with torch.inference_mode():
-            audio_opt = vc.pipeline(
-                hubert,
-                net_g,
-                0,
-                audio,
-                input_audio,
-                pitch,
-                f0_method,
-                index_path,
-                index_rate,
-                cpt.get("f0", 1),
-                filter_radius,
-                volume_envelope,
-                cpt.get("version", "v1"),
-                protect,
-                hop_length,
-                f0_min=f0_min,
-                f0_max=f0_max,
-            )
-        # Сохранение результата
-        output_audio = os.path.join(OUTPUT_DIR, f"Voice_Converted.{output_format}")
-        wavfile.write(output_audio, tgt_sr, audio_opt)
-        # Оптимизированная конвертация формата
-        if output_format != "wav":
-            self.convert_audio_format(output_audio, output_format)
-        # Очистка памяти
-        del hubert, cpt, net_g, vc, audio_opt
-        gc.collect()
-        progress(1.0, desc=f"[✅] Готово: {output_audio}")
-        return output_audio
-    except Exception as e:
-        raise gr.Error(f"Ошибка: {str(e)}")
-# Оптимизированная конвертация формата
-def convert_audio_format(input_path, output_format):
-    import soundfile as sf
-    data, sr = sf.read(input_path)
-    sf.write(input_path, data, sr, format=output_format)

 import torch
+from multiprocessing import cpu_count
 from fairseq import checkpoint_utils
 from scipy.io import wavfile
 from rvc.lib.algorithm.synthesizers import Synthesizer
 from rvc.lib.my_utils import load_audio
+from .pipeline import VC
+# Конфигурация устройства и параметров
+class Config:
+    def __init__(self):
+        self.device = self.get_device()
+        self.is_half = False  # Отключаем half precision для CPU
+        self.n_cpu = cpu_count()
+        self.gpu_name = None
+        self.gpu_mem = None
+        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
+    def get_device(self):
+        return "cpu"  # Используем только CPU
+    def device_config(self):
+        print("Используется CPU")
+        self.device = "cpu"
+        self.is_half = False
+        return (1, 6, 38, 41)  # Уменьшаем параметры для CPU
+# Загрузка модели Hubert
+def load_hubert(device, is_half, model_path):
+    models, saved_cfg, task = checkpoint_utils.load_model_ensemble_and_task(
+        [model_path], suffix=""
     )
+    hubert = models[0].to(device)
+    hubert = hubert.float()  # Используем float для CPU
+    hubert.eval()
     return hubert
+# Получение голосового преобразователя
+def get_vc(device, is_half, config, model_path):
     cpt = torch.load(model_path, map_location="cpu", weights_only=True)
+    if "config" not in cpt or "weight" not in cpt:
+        raise ValueError(
+            f"Некорректный формат для {model_path}. "
+            "Используйте голосовую модель, обученную с использованием RVC v2."
+        )
     tgt_sr = cpt["config"][-1]
     cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]
+    pitch_guidance = cpt.get("f0", 1)
+    version = cpt.get("version", "v1")
+    input_dim = 768 if version == "v2" else 256
     net_g = Synthesizer(
+        *cpt["config"],
+        use_f0=pitch_guidance,
+        input_dim=input_dim,
+        is_half=is_half,
     )
+    del net_g.enc_q
+    print(net_g.load_state_dict(cpt["weight"], strict=False))
+    net_g.eval().to(device)
+    net_g = net_g.float()  # Используем float для CPU
+    vc = VC(tgt_sr, config)
+    return cpt, version, net_g, tgt_sr, vc
+# Выполнение инференса с использованием RVC
 def rvc_infer(
+    index_path,
+    index_rate,
+    input_path,
+    output_path,
+    pitch,
+    f0_method,
+    cpt,
+    version,
+    net_g,
+    filter_radius,
+    tgt_sr,
+    volume_envelope,
+    protect,
+    hop_length,
+    vc,
+    hubert_model,
     f0_min=50,
     f0_max=1100,
 ):
+    audio = load_audio(input_path, 16000)
+    pitch_guidance = cpt.get("f0", 1)
+    audio_opt = vc.pipeline(
+        hubert_model,
+        net_g,
+        0,
+        audio,
+        input_path,
+        pitch,
+        f0_method,
+        index_path,
+        index_rate,
+        pitch_guidance,
+        filter_radius,
+        tgt_sr,
+        0,
+        volume_envelope,
+        version,
+        protect,
+        hop_length,
+        f0_file=None,
+        f0_min=f0_min,
+        f0_max=f0_max,
+    )
+    wavfile.write(output_path, tgt_sr, audio_opt)