Yoshitaka16
/

hubert_base

Model card Files Files and versions

xet

Community

Yoshitaka16 commited on Sep 11, 2025

Commit

580ce58

verified ·

1 Parent(s): c4cc266

Update extract.py

Browse files

Files changed (1) hide show

extract.py +9 -55

extract.py CHANGED Viewed

@@ -1,10 +1,9 @@
 """
 Module which exposes functionality for extracting training features from
-audio datasets.
 """
 from __future__ import annotations
 from multiprocessing import cpu_count
 from ultimate_rvc.core.common import (
@@ -24,7 +23,6 @@ from ultimate_rvc.typing_extra import (
     TrainingF0Method,
 )
 def extract_features(
     model_name: str,
     f0_method: TrainingF0Method = TrainingF0Method.RMVPE,
@@ -36,47 +34,6 @@ def extract_features(
     hardware_acceleration: DeviceType = DeviceType.AUTOMATIC,
     gpu_ids: set[int] | None = None,
 ) -> None:
-    """
-    Extract features from the preprocessed dataset associated with a
-    voice model to be trained.
-    Parameters
-    ----------
-    model_name : str
-        The name of the voice model to be trained.
-    f0_method : TrainingF0Method, default=TrainingF0Method.RMVPE
-        The method to use for extracting pitch features.
-    hop_length : int, default=128
-        The hop length to use for extracting pitch features. Only used
-        with the CREPE pitch extraction method.
-    embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
-        The model to use for extracting audio embeddings.
-    custom_embedder_model : StrPath, optional
-        The name of the custom embedder model to use for extracting
-        audio embeddings.
-    include_mutes : int, default=2
-        The number of mute audio files to include in the generated
-        training file list. Adding silent files enables the voice model
-        to handle pure silence in inferred audio files. If the
-        preprocessed audio dataset already contains segments of pure
-        silence, set this to 0.
-    cpu_cores : int, default=cpu_count()
-        The number of CPU cores to use for feature extraction.
-    hardware_acceleration : DeviceType, default=DeviceType.AUTOMATIC
-        The type of hardware acceleration to use for feature extraction.
-        `AUTOMATIC` will select the first available GPU and fall back to
-        CPU if no GPUs are available.
-    gpu_ids : set[int], optional
-        Set of ids of the GPUs to use for feature extraction when `GPU`
-        is selected for hardware acceleration.
-    Raises
-    ------
-    ModelAsssociatedEntityNotFoundError
-        If no preprocessed dataset audio files are associated with the
-        voice model identified by the provided name.
-    """
     model_path = validate_model(model_name, Entity.TRAINING_MODEL)
     sliced_audios16k_path = model_path / "sliced_audios_16k"
     if not sliced_audios16k_path.is_dir() or not any(sliced_audios16k_path.iterdir()):
@@ -100,19 +57,20 @@ def extract_features(
         chosen_embedder_model = str(custom_embedder_model_path)
         embedder_model_id = f"custom_{combined_file_hash}"
     f0_method_id = f0_method
     if f0_method in {TrainingF0Method.CREPE, TrainingF0Method.CREPE_TINY}:
         f0_method_id = f"{f0_method}_{hop_length}"
     device_type, device_ids = validate_devices(hardware_acceleration, gpu_ids)
     devices = (
         [f"{device_type}:{device_id}" for device_id in device_ids]
         if device_ids
         else [device_type]
     )
-    # NOTE The lazy_import function does not work with the package below
-    # so we import it here manually
     from ultimate_rvc.rvc.train.extract import extract  # noqa: PLC0415
     file_infos = extract.initialize_extraction(
@@ -125,24 +83,20 @@ def extract_features(
         chosen_embedder_model,
         combined_file_hash,
     )
     display_progress("[~] Extracting pitch features...")
     extract.run_pitch_extraction(file_infos, devices, f0_method, hop_length, cpu_cores)
     display_progress("[~] Extracting audio embeddings...")
     extract.run_embedding_extraction(
         file_infos,
         devices,
         embedder_model,
-        (
-            str(custom_embedder_model_path)
-            if custom_embedder_model_path is not None
-            else None
-        ),
         cpu_cores,
     )
-    # NOTE The lazy_import function does not work with the package below
-    # so we import it here manually
-    from ultimate_rvc.rvc.train.extract import preparing_files  # noqa: PLC0415
     preparing_files.generate_config(str(model_path))
     preparing_files.generate_filelist(
         str(model_path),

 """
 Module which exposes functionality for extracting training features from
+audio datasets, now with DJCM support.
 """
 from __future__ import annotations
 from multiprocessing import cpu_count
 from ultimate_rvc.core.common import (
     TrainingF0Method,
 )
 def extract_features(
     model_name: str,
     f0_method: TrainingF0Method = TrainingF0Method.RMVPE,
     hardware_acceleration: DeviceType = DeviceType.AUTOMATIC,
     gpu_ids: set[int] | None = None,
 ) -> None:
     model_path = validate_model(model_name, Entity.TRAINING_MODEL)
     sliced_audios16k_path = model_path / "sliced_audios_16k"
     if not sliced_audios16k_path.is_dir() or not any(sliced_audios16k_path.iterdir()):
         chosen_embedder_model = str(custom_embedder_model_path)
         embedder_model_id = f"custom_{combined_file_hash}"
+    # Generate f0_method_id
     f0_method_id = f0_method
     if f0_method in {TrainingF0Method.CREPE, TrainingF0Method.CREPE_TINY}:
         f0_method_id = f"{f0_method}_{hop_length}"
+    elif f0_method == TrainingF0Method.DJCM:
+        f0_method_id = "djcm"  # DJCM tidak butuh hop_length
     device_type, device_ids = validate_devices(hardware_acceleration, gpu_ids)
     devices = (
         [f"{device_type}:{device_id}" for device_id in device_ids]
         if device_ids
         else [device_type]
     )
     from ultimate_rvc.rvc.train.extract import extract  # noqa: PLC0415
     file_infos = extract.initialize_extraction(
         chosen_embedder_model,
         combined_file_hash,
     )
     display_progress("[~] Extracting pitch features...")
     extract.run_pitch_extraction(file_infos, devices, f0_method, hop_length, cpu_cores)
     display_progress("[~] Extracting audio embeddings...")
     extract.run_embedding_extraction(
         file_infos,
         devices,
         embedder_model,
+        str(custom_embedder_model_path) if custom_embedder_model_path else None,
         cpu_cores,
     )
+    from ultimate_rvc.rvc.train.extract import preparing_files  # noqa: PLC0415
     preparing_files.generate_config(str(model_path))
     preparing_files.generate_filelist(
         str(model_path),