Thatneos
/

rvc

Model card Files Files and versions

xet

Community

Thatguy099 commited on Mar 18, 2025

Commit

dc81f37

verified ·

1 Parent(s): ffef93c

Update lib/infer.py

Browse files

Files changed (1) hide show

lib/infer.py +262 -221

lib/infer.py CHANGED Viewed

@@ -1,221 +1,262 @@
-import os
-import shutil
-import gc
-import torch
-from multiprocessing import cpu_count
-from lib.modules import VC
-from lib.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
-class Configs:
-    def __init__(self, device, is_half):
-        self.device = device
-        self.is_half = is_half
-        self.n_cpu = 0
-        self.gpu_name = None
-        self.gpu_mem = None
-        self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
-    def device_config(self) -> tuple:
-        if torch.cuda.is_available():
-            i_device = int(self.device.split(":")[-1])
-            self.gpu_name = torch.cuda.get_device_name(i_device)
-            #if (
-#                    ("16" in self.gpu_name and "V100" not in self.gpu_name.upper())
-#                    or "P40" in self.gpu_name.upper()
-#                    or "1060" in self.gpu_name
-#                    or "1070" in self.gpu_name
-#                    or "1080" in self.gpu_name
-#            ):
-#                print("16 series/10 series P40 forced single precision")
-#                self.is_half = False
-#                for config_file in ["32k.json", "40k.json", "48k.json"]:
-#                    with open(BASE_DIR / "src" / "configs" / config_file, "r") as f:
-#                        strr = f.read().replace("true", "false")
-#                    with open(BASE_DIR / "src" / "configs" / config_file, "w") as f:
-#                        f.write(strr)
-#                with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
-#                    strr = f.read().replace("3.7", "3.0")
-#                with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
-#                    f.write(strr)
-#            else:
-#                self.gpu_name = None
-#            self.gpu_mem = int(
-#                torch.cuda.get_device_properties(i_device).total_memory
-#                / 1024
-#                / 1024
-#                / 1024
-#                + 0.4
-#            )
-#            if self.gpu_mem <= 4:
-#                with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "r") as f:
-#                    strr = f.read().replace("3.7", "3.0")
-#                with open(BASE_DIR / "src" / "trainset_preprocess_pipeline_print.py", "w") as f:
-#                    f.write(strr)
-        elif torch.backends.mps.is_available():
-            print("No supported N-card found, use MPS for inference")
-            self.device = "mps"
-        else:
-            print("No supported N-card found, use CPU for inference")
-            self.device = "cpu"
-        if self.n_cpu == 0:
-            self.n_cpu = cpu_count()
-        if self.is_half:
-            # 6G memory config
-            x_pad = 3
-            x_query = 10
-            x_center = 60
-            x_max = 65
-        else:
-            # 5G memory config
-            x_pad = 1
-            x_query = 6
-            x_center = 38
-            x_max = 41
-        if self.gpu_mem != None and self.gpu_mem <= 4:
-            x_pad = 1
-            x_query = 5
-            x_center = 30
-            x_max = 32
-        return x_pad, x_query, x_center, x_max
-def get_model(voice_model):
-    model_dir = os.path.join(os.getcwd(), "models", voice_model)
-    model_filename, index_filename = None, None
-    for file in os.listdir(model_dir):
-        ext = os.path.splitext(file)[1]
-        if ext == '.pth':
-            model_filename = file
-        if ext == '.index':
-            index_filename = file
-    if model_filename is None:
-        print(f'No model file exists in {models_dir}.')
-        return None, None
-    return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
-def infer_audio(
-    model_name,
-    audio_path,
-    f0_change=0,
-    f0_method="rmvpe+",
-    min_pitch="50",
-    max_pitch="1100",
-    crepe_hop_length=128,
-    index_rate=0.75,
-    filter_radius=3,
-    rms_mix_rate=0.25,
-    protect=0.33,
-    split_infer=False,
-    min_silence=500,
-    silence_threshold=-50,
-    seek_step=1,
-    keep_silence=100,
-    do_formant=False,
-    quefrency=0,
-    timbre=1,
-    f0_autotune=False,
-    audio_format="wav",
-    resample_sr=0,
-    hubert_model_path="assets/hubert/hubert_base.pt",
-    rmvpe_model_path="assets/rmvpe/rmvpe.pt",
-    fcpe_model_path="assets/fcpe/fcpe.pt"
-    ):
-    os.environ["rmvpe_model_path"] = rmvpe_model_path
-    os.environ["fcpe_model_path"] = fcpe_model_path
-    configs = Configs('cuda:0', True)
-    vc = VC(configs)
-    pth_path, index_path = get_model(model_name)
-    vc_data = vc.get_vc(pth_path, protect, 0.5)
-    if split_infer:
-        inferred_files = []
-        temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
-        os.makedirs(temp_dir, exist_ok=True)
-        print("Splitting audio to silence and nonsilent segments.")
-        silence_files, nonsilent_files = split_silence_nonsilent(audio_path, min_silence, silence_threshold, seek_step, keep_silence)
-        print(f"Total silence segments: {len(silence_files)}.\nTotal nonsilent segments: {len(nonsilent_files)}.")
-        for i, nonsilent_file in enumerate(nonsilent_files):
-            print(f"Inferring nonsilent audio {i+1}")
-            inference_info, audio_data, output_path = vc.vc_single(
-            0,
-            nonsilent_file,
-            f0_change,
-            f0_method,
-            index_path,
-            index_path,
-            index_rate,
-            filter_radius,
-            resample_sr,
-            rms_mix_rate,
-            protect,
-            audio_format,
-            crepe_hop_length,
-            do_formant,
-            quefrency,
-            timbre,
-            min_pitch,
-            max_pitch,
-            f0_autotune,
-            hubert_model_path
-            )
-            if inference_info[0] == "Success.":
-                print("Inference ran successfully.")
-                print(inference_info[1])
-                print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
-            else:
-                print(f"An error occurred while processing.\n{inference_info[0]}")
-                return None
-            inferred_files.append(output_path)
-        print("Adjusting inferred audio lengths.")
-        adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
-        print("Combining silence and inferred audios.")
-        output_count = 1
-        while True:
-            output_path = os.path.join(os.getcwd(), "output", f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}{f0_method.capitalize()}_{output_count}.{audio_format}")
-            if not os.path.exists(output_path):
-                break
-            output_count += 1
-        output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
-        [shutil.move(inferred_file, temp_dir) for inferred_file in inferred_files]
-        shutil.rmtree(temp_dir)
-    else:
-        inference_info, audio_data, output_path = vc.vc_single(
-            0,
-            audio_path,
-            f0_change,
-            f0_method,
-            index_path,
-            index_path,
-            index_rate,
-            filter_radius,
-            resample_sr,
-            rms_mix_rate,
-            protect,
-            audio_format,
-            crepe_hop_length,
-            do_formant,
-            quefrency,
-            timbre,
-            min_pitch,
-            max_pitch,
-            f0_autotune,
-            hubert_model_path
-        )
-        if inference_info[0] == "Success.":
-            print("Inference ran successfully.")
-            print(inference_info[1])
-            print("Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
-        else:
-            print(f"An error occurred while processing.\n{inference_info[0]}")
-            del configs, vc
-            gc.collect()
-            return inference_info[0]
-    del configs, vc
-    gc.collect()
-    return output_path

+import os
+import shutil
+import gc
+import torch
+from multiprocessing import cpu_count
+from lib.modules import VC
+from lib.split_audio import split_silence_nonsilent, adjust_audio_lengths, combine_silence_nonsilent
+import logging
+from datetime import datetime
+import traceback
+# Configure logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s - %(levelname)s - %(process)d - %(funcName)s:%(lineno)d - %(message)s',
+    handlers=[
+        logging.FileHandler(f'debug_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log'),
+        logging.StreamHandler()
+    ]
+)
+class Configs:
+    def __init__(self, device, is_half):
+        logging.debug(f"Initializing Configs with device={device}, is_half={is_half}")
+        self.device = device
+        self.is_half = is_half
+        self.n_cpu = 0
+        self.gpu_name = None
+        self.gpu_mem = None
+        try:
+            self.x_pad, self.x_query, self.x_center, self.x_max = self.device_config()
+            logging.debug(f"Device configuration: pad={self.x_pad}, query={self.x_query}, "
+                         f"center={self.x_center}, max={self.x_max}")
+        except Exception as e:
+            logging.error(f"Failed to configure device: {str(e)}")
+            raise
+    def device_config(self) -> tuple:
+        if torch.cuda.is_available():
+            i_device = int(self.device.split(":")[-1])
+            self.gpu_name = torch.cuda.get_device_name(i_device)
+            logging.debug(f"GPU detected: {self.gpu_name}")
+        elif torch.backends.mps.is_available():
+            logging.warning("No supported N-card found, falling back to MPS")
+            self.device = "mps"
+        else:
+            logging.warning("No supported N-card found, falling back to CPU")
+            self.device = "cpu"
+        if self.n_cpu == 0:
+            self.n_cpu = cpu_count()
+            logging.debug(f"Detected {self.n_cpu} CPU cores")
+        # Memory configuration settings
+        if self.is_half:
+            x_pad = 3
+            x_query = 10
+            x_center = 60
+            x_max = 65
+        else:
+            x_pad = 1
+            x_query = 6
+            x_center = 38
+            x_max = 41
+        if self.gpu_mem is not None and self.gpu_mem <= 4:
+            x_pad = 1
+            x_query = 5
+            x_center = 30
+            x_max = 32
+        return x_pad, x_query, x_center, x_max
+def get_model(voice_model):
+    model_dir = os.path.join(os.getcwd(), "models", voice_model)
+    logging.debug(f"Searching for model files in directory: {model_dir}")
+    model_filename, index_filename = None, None
+    try:
+        for file in os.listdir(model_dir):
+            ext = os.path.splitext(file)[1]
+            if ext == '.pth':
+                model_filename = file
+                logging.debug(f"Found model file: {file}")
+            elif ext == '.index':
+                index_filename = file
+                logging.debug(f"Found index file: {file}")
+        if model_filename is None:
+            logging.error(f"No model file exists in {model_dir}")
+            raise FileNotFoundError(f"No model file exists in {model_dir}")
+        return os.path.join(model_dir, model_filename), os.path.join(model_dir, index_filename) if index_filename else ''
+    except Exception as e:
+        logging.error(f"Failed to retrieve model files: {str(e)}")
+        raise
+def infer_audio(
+    model_name,
+    audio_path,
+    f0_change=0,
+    f0_method="rmvpe+",
+    min_pitch="50",
+    max_pitch="1100",
+    crepe_hop_length=128,
+    index_rate=0.75,
+    filter_radius=3,
+    rms_mix_rate=0.25,
+    protect=0.33,
+    split_infer=False,
+    min_silence=500,
+    silence_threshold=-50,
+    seek_step=1,
+    keep_silence=100,
+    do_formant=False,
+    quefrency=0,
+    timbre=1,
+    f0_autotune=False,
+    audio_format="wav",
+    resample_sr=0,
+    hubert_model_path="assets/hubert/hubert_base.pt",
+    rmvpe_model_path="assets/rmvpe/rmvpe.pt",
+    fcpe_model_path="assets/fcpe/fcpe.pt"
+):
+    logging.info(f"Starting inference with parameters:")
+    logging.info(f"- Model: {model_name}")
+    logging.info(f"- Audio path: {audio_path}")
+    logging.info(f"- F0 change: {f0_change}, Method: {f0_method}")
+    logging.info(f"- Split inference: {split_infer}")
+    os.environ["rmvpe_model_path"] = rmvpe_model_path
+    os.environ["fcpe_model_path"] = fcpe_model_path
+    try:
+        configs = Configs('cuda:0', True)
+        vc = VC(configs)
+        pth_path, index_path = get_model(model_name)
+        vc_data = vc.get_vc(pth_path, protect, 0.5)
+        if split_infer:
+            logging.info("Split inference mode enabled")
+            inferred_files = []
+            temp_dir = os.path.join(os.getcwd(), "seperate", "temp")
+            os.makedirs(temp_dir, exist_ok=True)
+            try:
+                silence_files, nonsilent_files = split_silence_nonsilent(
+                    audio_path, min_silence, silence_threshold, seek_step, keep_silence
+                )
+                logging.debug(f"Silence segments: {len(silence_files)}")
+                logging.debug(f"Nonsilent segments: {len(nonsilent_files)}")
+                for i, nonsilent_file in enumerate(nonsilent_files):
+                    logging.info(f"Processing segment {i+1}/{len(nonsilent_files)}")
+                    start_time = datetime.now()
+                    inference_info, audio_data, output_path = vc.vc_single(
+                        0,
+                        nonsilent_file,
+                        f0_change,
+                        f0_method,
+                        index_path,
+                        index_path,
+                        index_rate,
+                        filter_radius,
+                        resample_sr,
+                        rms_mix_rate,
+                        protect,
+                        audio_format,
+                        crepe_hop_length,
+                        do_formant,
+                        quefrency,
+                        timbre,
+                        min_pitch,
+                        max_pitch,
+                        f0_autotune,
+                        hubert_model_path
+                    )
+                    process_time = (datetime.now() - start_time).total_seconds()
+                    logging.debug(f"Segment processing time: {process_time:.2f}s")
+                    if inference_info[0] == "Success.":
+                        logging.info("Segment processed successfully")
+                        logging.debug(inference_info[1])
+                        logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
+                        inferred_files.append(output_path)
+                    else:
+                        logging.error(f"Error processing segment {i+1}: {inference_info[0]}")
+                        raise RuntimeError(f"Error processing segment {i+1}")
+                logging.info("Adjusting inferred audio lengths")
+                adjusted_inferred_files = adjust_audio_lengths(nonsilent_files, inferred_files)
+                logging.info("Combining silence and inferred audios")
+                output_count = 1
+                while True:
+                    output_path = os.path.join(
+                        os.getcwd(),
+                        "output",
+                        f"{os.path.splitext(os.path.basename(audio_path))[0]}{model_name}"
+                        f"{f0_method.capitalize()}_{output_count}.{audio_format}"
+                    )
+                    if not os.path.exists(output_path):
+                        break
+                    output_count += 1
+                output_path = combine_silence_nonsilent(silence_files, adjusted_inferred_files, keep_silence, output_path)
+                # Cleanup temporary files
+                for inferred_file in inferred_files:
+                    shutil.move(inferred_file, temp_dir)
+                shutil.rmtree(temp_dir)
+            except Exception as e:
+                logging.error(f"Split inference failed: {str(e)}")
+                raise
+        else:
+            logging.info("Single inference mode")
+            start_time = datetime.now()
+            inference_info, audio_data, output_path = vc.vc_single(
+                0,
+                audio_path,
+                f0_change,
+                f0_method,
+                index_path,
+                index_path,
+                index_rate,
+                filter_radius,
+                resample_sr,
+                rms_mix_rate,
+                protect,
+                audio_format,
+                crepe_hop_length,
+                do_formant,
+                quefrency,
+                timbre,
+                min_pitch,
+                max_pitch,
+                f0_autotune,
+                hubert_model_path
+            )
+            process_time = (datetime.now() - start_time).total_seconds()
+            logging.debug(f"Total processing time: {process_time:.2f}s")
+            if inference_info[0] == "Success.":
+                logging.info("Inference completed successfully")
+                logging.debug(inference_info[1])
+                logging.debug(f"Times:\nnpy: %.2fs f0: %.2fs infer: %.2fs\nTotal time: %.2fs" % (*inference_info[2],))
+            else:
+                logging.error(f"Inference failed: {inference_info[0]}")
+                raise RuntimeError(inference_info[0])
+        del configs, vc
+        gc.collect()
+        return output_path
+    except Exception as e:
+        logging.error(f"Inference failed: {str(e)}")
+        logging.error(traceback.format_exc())
+        raise