Spaces:

JacobLinCool
/

ZeroRVC

Paused

App Files Files Community

JacobLinCool commited on Jul 3, 2024

Commit

a0ad823

1 Parent(s): 28cee66

feat: hubert features

Browse files

Files changed (2) hide show

app.py +15 -1
infer/modules/train/extract_feature_print.py +72 -113

app.py CHANGED Viewed

@@ -7,6 +7,7 @@ import shutil
 from glob import glob
 from infer.modules.train.preprocess import PreProcess
 from infer.modules.train.extract.extract_f0_rmvpe import FeatureInput
 from infer.modules.train.train import train
 from infer.lib.train.process_ckpt import extract_small_model
 from zero import zero
@@ -60,6 +61,19 @@ def extract_features(exp_dir: str) -> str:
     fi.logfile.seek(0)
     log = fi.logfile.read()
     if err:
         log = f"Error: {err}\n{log}"
@@ -195,7 +209,7 @@ with gr.Blocks() as app:
         with gr.Column():
             train_btn = gr.Button(value="Train", variant="primary")
         with gr.Column():
-            latest_model = gr.File(label="Latest model")
     with gr.Row():
         with gr.Column():

 from glob import glob
 from infer.modules.train.preprocess import PreProcess
 from infer.modules.train.extract.extract_f0_rmvpe import FeatureInput
+from infer.modules.train.extract_feature_print import HubertFeatureExtractor
 from infer.modules.train.train import train
 from infer.lib.train.process_ckpt import extract_small_model
 from zero import zero
     fi.logfile.seek(0)
     log = fi.logfile.read()
+    if err:
+        log = f"Error: {err}\n{log}"
+        return log
+    hfe = HubertFeatureExtractor(exp_dir)
+    try:
+        hfe.run()
+    except Exception as e:
+        err = e
+    hfe.logfile.seek(0)
+    log += hfe.logfile.read()
     if err:
         log = f"Error: {err}\n{log}"
         with gr.Column():
             train_btn = gr.Button(value="Train", variant="primary")
         with gr.Column():
+            latest_model = gr.File(label="Latest checkpoint")
     with gr.Row():
         with gr.Column():

infer/modules/train/extract_feature_print.py CHANGED Viewed

@@ -1,65 +1,30 @@
 import os
-import sys
 import traceback
-os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
-os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"
-device = sys.argv[1]
-n_part = int(sys.argv[2])
-i_part = int(sys.argv[3])
-if len(sys.argv) == 7:
-    exp_dir = sys.argv[4]
-    version = sys.argv[5]
-    is_half = sys.argv[6].lower() == "true"
-else:
-    i_gpu = sys.argv[4]
-    exp_dir = sys.argv[5]
-    os.environ["CUDA_VISIBLE_DEVICES"] = str(i_gpu)
-    version = sys.argv[6]
-    is_half = sys.argv[7].lower() == "true"
 import fairseq
 import numpy as np
 import soundfile as sf
 import torch
 import torch.nn.functional as F
-if "privateuseone" not in device:
-    device = "cpu"
-    if torch.cuda.is_available():
-        device = "cuda"
-    elif torch.backends.mps.is_available():
-        device = "mps"
-else:
-    import torch_directml
-    device = torch_directml.device(torch_directml.default_device())
-    def forward_dml(ctx, x, scale):
-        ctx.scale = scale
-        res = x.clone().detach()
-        return res
-    fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml
-f = open("%s/extract_f0_feature.log" % exp_dir, "a+")
-def printt(strr):
-    print(strr)
-    f.write("%s\n" % strr)
-    f.flush()
-printt(" ".join(sys.argv))
 model_path = "assets/hubert/hubert_base.pt"
-printt("exp_dir: " + exp_dir)
-wavPath = "%s/1_16k_wavs" % exp_dir
-outPath = (
-    "%s/3_feature256" % exp_dir if version == "v1" else "%s/3_feature768" % exp_dir
 )
-os.makedirs(outPath, exist_ok=True)
 # wave must be 16k, hop_size=320
@@ -77,66 +42,60 @@ def readwave(wav_path, normalize=False):
     return feats
-# HuBERT model
-printt("load model(s) from {}".format(model_path))
-# if hubert model is exist
-if os.access(model_path, os.F_OK) == False:
-    printt(
-        "Error: Extracting is shut down because %s does not exist, you may download it from https://huggingface.co/lj1995/VoiceConversionWebUI/tree/main"
-        % model_path
-    )
-    exit(0)
-models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
-    [model_path],
-    suffix="",
-)
-model = models[0]
-model = model.to(device)
-printt("move model to %s" % device)
-if is_half:
-    if device not in ["mps", "cpu"]:
-        model = model.half()
-model.eval()
-todo = sorted(list(os.listdir(wavPath)))[i_part::n_part]
-n = max(1, len(todo) // 10)  # 最多打印十条
-if len(todo) == 0:
-    printt("no-feature-todo")
-else:
-    printt("all-feature-%s" % len(todo))
-    for idx, file in enumerate(todo):
-        try:
-            if file.endswith(".wav"):
-                wav_path = "%s/%s" % (wavPath, file)
-                out_path = "%s/%s" % (outPath, file.replace("wav", "npy"))
-                if os.path.exists(out_path):
-                    continue
-                feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
-                padding_mask = torch.BoolTensor(feats.shape).fill_(False)
-                inputs = {
-                    "source": (
-                        feats.half().to(device)
-                        if is_half and device not in ["mps", "cpu"]
-                        else feats.to(device)
-                    ),
-                    "padding_mask": padding_mask.to(device),
-                    "output_layer": 9 if version == "v1" else 12,  # layer 9
-                }
-                with torch.no_grad():
-                    logits = model.extract_features(**inputs)
-                    feats = (
-                        model.final_proj(logits[0]) if version == "v1" else logits[0]
-                    )
-                feats = feats.squeeze(0).float().cpu().numpy()
-                if np.isnan(feats).sum() == 0:
-                    np.save(out_path, feats, allow_pickle=False)
-                else:
-                    printt("%s-contains nan" % file)
-                if idx % n == 0:
-                    printt("now-%s,all-%s,%s,%s" % (len(todo), idx, file, feats.shape))
-        except:
-            printt(traceback.format_exc())
-    printt("all-feature-done")

 import os
 import traceback
 import fairseq
 import numpy as np
 import soundfile as sf
 import torch
 import torch.nn.functional as F
+device = "cpu"
+if torch.cuda.is_available():
+    device = "cuda"
+elif torch.backends.mps.is_available():
+    device = "mps"
 model_path = "assets/hubert/hubert_base.pt"
+models, saved_cfg, task = fairseq.checkpoint_utils.load_model_ensemble_and_task(
+    [model_path],
+    suffix="",
 )
+model = models[0]
+model = model.to(device)
+is_half = False
+if is_half:
+    if device not in ["mps", "cpu"]:
+        model = model.half()
+model.eval()
 # wave must be 16k, hop_size=320
     return feats
+class HubertFeatureExtractor:
+    def __init__(self, exp_dir: str):
+        self.exp_dir = exp_dir
+        self.logfile = open("%s/extract_f0_feature.log" % exp_dir, "a+")
+        self.wavPath = "%s/1_16k_wavs" % exp_dir
+        self.outPath = "%s/3_feature768" % exp_dir
+        os.makedirs(self.outPath, exist_ok=True)
+    def println(self, strr):
+        print(strr)
+        self.logfile.write("%s\n" % strr)
+        self.logfile.flush()
+    def run(self):
+        todo = sorted(list(os.listdir(self.wavPath)))
+        n = max(1, len(todo) // 10)  # 最多打印十条
+        if len(todo) == 0:
+            self.println("no-feature-todo")
+        else:
+            self.println("all-feature-%s" % len(todo))
+            for idx, file in enumerate(todo):
+                try:
+                    if file.endswith(".wav"):
+                        wav_path = "%s/%s" % (self.wavPath, file)
+                        out_path = "%s/%s" % (self.outPath, file.replace("wav", "npy"))
+                        if os.path.exists(out_path):
+                            continue
+                        feats = readwave(wav_path, normalize=saved_cfg.task.normalize)
+                        padding_mask = torch.BoolTensor(feats.shape).fill_(False)
+                        inputs = {
+                            "source": (
+                                feats.half().to(device)
+                                if is_half and device not in ["mps", "cpu"]
+                                else feats.to(device)
+                            ),
+                            "padding_mask": padding_mask.to(device),
+                            "output_layer": 12,
+                        }
+                        with torch.no_grad():
+                            logits = model.extract_features(**inputs)
+                            feats = logits[0]
+                        feats = feats.squeeze(0).float().cpu().numpy()
+                        if np.isnan(feats).sum() == 0:
+                            np.save(out_path, feats, allow_pickle=False)
+                        else:
+                            self.println("%s-contains nan" % file)
+                        if idx % n == 0:
+                            self.println(
+                                "now-%s,all-%s,%s,%s"
+                                % (len(todo), idx, file, feats.shape)
+                            )
+                except:
+                    self.println(traceback.format_exc())
+            self.println("all-feature-done")