Yoshitaka16 commited on
Commit
580ce58
·
verified ·
1 Parent(s): c4cc266

Update extract.py

Browse files
Files changed (1) hide show
  1. extract.py +9 -55
extract.py CHANGED
@@ -1,10 +1,9 @@
1
  """
2
  Module which exposes functionality for extracting training features from
3
- audio datasets.
4
  """
5
 
6
  from __future__ import annotations
7
-
8
  from multiprocessing import cpu_count
9
 
10
  from ultimate_rvc.core.common import (
@@ -24,7 +23,6 @@ from ultimate_rvc.typing_extra import (
24
  TrainingF0Method,
25
  )
26
 
27
-
28
  def extract_features(
29
  model_name: str,
30
  f0_method: TrainingF0Method = TrainingF0Method.RMVPE,
@@ -36,47 +34,6 @@ def extract_features(
36
  hardware_acceleration: DeviceType = DeviceType.AUTOMATIC,
37
  gpu_ids: set[int] | None = None,
38
  ) -> None:
39
- """
40
- Extract features from the preprocessed dataset associated with a
41
- voice model to be trained.
42
-
43
- Parameters
44
- ----------
45
- model_name : str
46
- The name of the voice model to be trained.
47
- f0_method : TrainingF0Method, default=TrainingF0Method.RMVPE
48
- The method to use for extracting pitch features.
49
- hop_length : int, default=128
50
- The hop length to use for extracting pitch features. Only used
51
- with the CREPE pitch extraction method.
52
- embedder_model : EmbedderModel, default=EmbedderModel.CONTENTVEC
53
- The model to use for extracting audio embeddings.
54
- custom_embedder_model : StrPath, optional
55
- The name of the custom embedder model to use for extracting
56
- audio embeddings.
57
- include_mutes : int, default=2
58
- The number of mute audio files to include in the generated
59
- training file list. Adding silent files enables the voice model
60
- to handle pure silence in inferred audio files. If the
61
- preprocessed audio dataset already contains segments of pure
62
- silence, set this to 0.
63
- cpu_cores : int, default=cpu_count()
64
- The number of CPU cores to use for feature extraction.
65
- hardware_acceleration : DeviceType, default=DeviceType.AUTOMATIC
66
- The type of hardware acceleration to use for feature extraction.
67
- `AUTOMATIC` will select the first available GPU and fall back to
68
- CPU if no GPUs are available.
69
- gpu_ids : set[int], optional
70
- Set of ids of the GPUs to use for feature extraction when `GPU`
71
- is selected for hardware acceleration.
72
-
73
- Raises
74
- ------
75
- ModelAsssociatedEntityNotFoundError
76
- If no preprocessed dataset audio files are associated with the
77
- voice model identified by the provided name.
78
-
79
- """
80
  model_path = validate_model(model_name, Entity.TRAINING_MODEL)
81
  sliced_audios16k_path = model_path / "sliced_audios_16k"
82
  if not sliced_audios16k_path.is_dir() or not any(sliced_audios16k_path.iterdir()):
@@ -100,19 +57,20 @@ def extract_features(
100
  chosen_embedder_model = str(custom_embedder_model_path)
101
  embedder_model_id = f"custom_{combined_file_hash}"
102
 
 
103
  f0_method_id = f0_method
104
  if f0_method in {TrainingF0Method.CREPE, TrainingF0Method.CREPE_TINY}:
105
  f0_method_id = f"{f0_method}_{hop_length}"
 
 
106
 
107
  device_type, device_ids = validate_devices(hardware_acceleration, gpu_ids)
108
-
109
  devices = (
110
  [f"{device_type}:{device_id}" for device_id in device_ids]
111
  if device_ids
112
  else [device_type]
113
  )
114
- # NOTE The lazy_import function does not work with the package below
115
- # so we import it here manually
116
  from ultimate_rvc.rvc.train.extract import extract # noqa: PLC0415
117
 
118
  file_infos = extract.initialize_extraction(
@@ -125,24 +83,20 @@ def extract_features(
125
  chosen_embedder_model,
126
  combined_file_hash,
127
  )
 
128
  display_progress("[~] Extracting pitch features...")
129
  extract.run_pitch_extraction(file_infos, devices, f0_method, hop_length, cpu_cores)
 
130
  display_progress("[~] Extracting audio embeddings...")
131
  extract.run_embedding_extraction(
132
  file_infos,
133
  devices,
134
  embedder_model,
135
- (
136
- str(custom_embedder_model_path)
137
- if custom_embedder_model_path is not None
138
- else None
139
- ),
140
  cpu_cores,
141
  )
142
- # NOTE The lazy_import function does not work with the package below
143
- # so we import it here manually
144
- from ultimate_rvc.rvc.train.extract import preparing_files # noqa: PLC0415
145
 
 
146
  preparing_files.generate_config(str(model_path))
147
  preparing_files.generate_filelist(
148
  str(model_path),
 
1
  """
2
  Module which exposes functionality for extracting training features from
3
+ audio datasets, now with DJCM support.
4
  """
5
 
6
  from __future__ import annotations
 
7
  from multiprocessing import cpu_count
8
 
9
  from ultimate_rvc.core.common import (
 
23
  TrainingF0Method,
24
  )
25
 
 
26
  def extract_features(
27
  model_name: str,
28
  f0_method: TrainingF0Method = TrainingF0Method.RMVPE,
 
34
  hardware_acceleration: DeviceType = DeviceType.AUTOMATIC,
35
  gpu_ids: set[int] | None = None,
36
  ) -> None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  model_path = validate_model(model_name, Entity.TRAINING_MODEL)
38
  sliced_audios16k_path = model_path / "sliced_audios_16k"
39
  if not sliced_audios16k_path.is_dir() or not any(sliced_audios16k_path.iterdir()):
 
57
  chosen_embedder_model = str(custom_embedder_model_path)
58
  embedder_model_id = f"custom_{combined_file_hash}"
59
 
60
+ # Generate f0_method_id
61
  f0_method_id = f0_method
62
  if f0_method in {TrainingF0Method.CREPE, TrainingF0Method.CREPE_TINY}:
63
  f0_method_id = f"{f0_method}_{hop_length}"
64
+ elif f0_method == TrainingF0Method.DJCM:
65
+ f0_method_id = "djcm" # DJCM tidak butuh hop_length
66
 
67
  device_type, device_ids = validate_devices(hardware_acceleration, gpu_ids)
 
68
  devices = (
69
  [f"{device_type}:{device_id}" for device_id in device_ids]
70
  if device_ids
71
  else [device_type]
72
  )
73
+
 
74
  from ultimate_rvc.rvc.train.extract import extract # noqa: PLC0415
75
 
76
  file_infos = extract.initialize_extraction(
 
83
  chosen_embedder_model,
84
  combined_file_hash,
85
  )
86
+
87
  display_progress("[~] Extracting pitch features...")
88
  extract.run_pitch_extraction(file_infos, devices, f0_method, hop_length, cpu_cores)
89
+
90
  display_progress("[~] Extracting audio embeddings...")
91
  extract.run_embedding_extraction(
92
  file_infos,
93
  devices,
94
  embedder_model,
95
+ str(custom_embedder_model_path) if custom_embedder_model_path else None,
 
 
 
 
96
  cpu_cores,
97
  )
 
 
 
98
 
99
+ from ultimate_rvc.rvc.train.extract import preparing_files # noqa: PLC0415
100
  preparing_files.generate_config(str(model_path))
101
  preparing_files.generate_filelist(
102
  str(model_path),