Yoshitaka16
/

hubert_base

Model card Files Files and versions

xet

Community

Yoshitaka16 commited on Sep 11, 2025

Commit

1d438ef

verified ·

1 Parent(s): 9cd255d

Update F0Extractor.py

Browse files

Files changed (1) hide show

F0Extractor.py +70 -54

F0Extractor.py CHANGED Viewed

@@ -43,57 +43,73 @@ class F0Extractor:
         return resampy.resample(self.x, self.sample_rate, 16000)
     def extract_f0(self):
-    f0 = None
-    method = self.method
-    if method == "crepe":
-        wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(config.device)
-        f0 = torchcrepe.predict(
-            wav16k_torch,
-            sample_rate=16000,
-            hop_length=160,
-            batch_size=512,
-            fmin=self.f0_min,
-            fmax=self.f0_max,
-            device=config.device,
-        )
-        f0 = f0[0].cpu().numpy()
-    elif method == "fcpe":
-        audio = librosa.to_mono(self.x)
-        audio_length = len(audio)
-        f0_target_length = (audio_length // self.hop_length) + 1
-        audio = (
-            torch.from_numpy(audio)
-            .float()
-            .unsqueeze(0)
-            .unsqueeze(-1)
-            .to(config.device)
-        )
-        model = torchfcpe.spawn_bundled_infer_model(device=config.device)
-        f0 = model.infer(
-            audio,
-            sr=self.sample_rate,
-            decoder_mode="local_argmax",
-            threshold=0.006,
-            f0_min=self.f0_min,
-            f0_max=self.f0_max,
-            interp_uv=False,
-            output_interp_target_length=f0_target_length,
-        )
-        f0 = f0.squeeze().cpu().numpy()
-    elif method == "rmvpe":
-        model_rmvpe = RMVPE0Predictor(
-            os.path.join(str(RVC_MODELS_DIR), "predictors", "rmvpe.pt"),
-            device=config.device,
-        )
-        f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03)
-    elif method == "djcm":
-        from ultimate_rvc.rvc.lib.predictors.djcm_module import DJCM
-        model_djcm = DJCM(
-            model_path=os.path.join(str(RVC_MODELS_DIR), "predictors", "djcm.pt"),
-            device=config.device
-        )
-        f0 = model_djcm.infer_from_audio(self.wav16k)
-    else:
-        raise ValueError(f"Unknown method: {self.method}")
-    return self.hz_to_cents(f0, librosa.midi_to_hz(0))

         return resampy.resample(self.x, self.sample_rate, 16000)
     def extract_f0(self):
+        f0 = None
+        method = self.method
+        if method == "crepe":
+            wav16k_torch = torch.FloatTensor(self.wav16k).unsqueeze(0).to(config.device)
+            f0 = torchcrepe.predict(
+                wav16k_torch,
+                sample_rate=16000,
+                hop_length=160,
+                batch_size=512,
+                fmin=self.f0_min,
+                fmax=self.f0_max,
+                device=config.device,
+            )
+            f0 = f0[0].cpu().numpy()
+        elif method == "fcpe":
+            audio = librosa.to_mono(self.x)
+            audio_length = len(audio)
+            f0_target_length = (audio_length // self.hop_length) + 1
+            audio = (
+                torch.from_numpy(audio)
+                .float()
+                .unsqueeze(0)
+                .unsqueeze(-1)
+                .to(config.device)
+            )
+            model = torchfcpe.spawn_bundled_infer_model(device=config.device)
+            f0 = model.infer(
+                audio,
+                sr=self.sample_rate,
+                decoder_mode="local_argmax",
+                threshold=0.006,
+                f0_min=self.f0_min,
+                f0_max=self.f0_max,
+                interp_uv=False,
+                output_interp_target_length=f0_target_length,
+            )
+            f0 = f0.squeeze().cpu().numpy()
+        elif method == "rmvpe":
+            model_rmvpe = RMVPE0Predictor(
+                os.path.join(str(RVC_MODELS_DIR), "predictors", "rmvpe.pt"),
+                device=config.device,
+            )
+            f0 = model_rmvpe.infer_from_audio(self.wav16k, thred=0.03)
+        elif method == "djcm":
+            from ultimate_rvc.rvc.lib.predictors.djcm_module import DJCM
+            model_djcm = DJCM(
+                model_path=os.path.join(str(RVC_MODELS_DIR), "predictors", "djcm.pt"),
+                device=config.device
+            )
+            f0 = model_djcm.infer_from_audio(self.wav16k)
+        else:
+            raise ValueError(f"Unknown method: {self.method}")
+        return self.hz_to_cents(f0, librosa.midi_to_hz(0))
+    def plot_f0(self, f0):
+        from matplotlib import pyplot as plt
+        plt.figure(figsize=(10, 4))
+        plt.plot(f0)
+        plt.title(self.method)
+        plt.xlabel("Time (frames)")
+        plt.ylabel("F0 (cents)")
+        plt.show()
+    def hz_to_cents(F, F_ref=55.0):
+        F_temp = np.array(F).astype(float)
+        F_temp[F_temp == 0] = np.nan
+        F_cents = 1200 * np.log2(F_temp / F_ref)
+        return F_cents