Upload 2 files
Browse files- processing_cm3p.py +3 -2
- tokenization_cm3p.py +2 -2
processing_cm3p.py
CHANGED
|
@@ -7,7 +7,6 @@ from pathlib import Path
|
|
| 7 |
from typing import Optional, Union, IO, TypedDict
|
| 8 |
|
| 9 |
import numpy as np
|
| 10 |
-
import soxr
|
| 11 |
from pandas import Series
|
| 12 |
from slider import Beatmap, HoldNote
|
| 13 |
from transformers import WhisperFeatureExtractor, AutoProcessor, BatchEncoding
|
|
@@ -151,6 +150,7 @@ class CM3PProcessorKwargs(CommonKwargs, CM3PBeatmapKwargs, CM3PTokenizerKwargs,
|
|
| 151 |
"truncation": TruncationStrategy.LONGEST_FIRST,
|
| 152 |
"window_length_sec": 30.0,
|
| 153 |
"window_stride_sec": 30.0,
|
|
|
|
| 154 |
},
|
| 155 |
"metadata_kwargs": {
|
| 156 |
"max_length": 128,
|
|
@@ -347,6 +347,7 @@ class CM3PProcessor(ProcessorMixin):
|
|
| 347 |
array = array.mean(axis=1)
|
| 348 |
# Resample if the sampling rate is different from the expected one
|
| 349 |
if s != sampling_rate:
|
|
|
|
| 350 |
array = soxr.resample(array, s, sampling_rate, quality="HQ")
|
| 351 |
audio_buffers.append(array)
|
| 352 |
|
|
@@ -433,6 +434,7 @@ class CM3PProcessor(ProcessorMixin):
|
|
| 433 |
|
| 434 |
window_length_sec = beatmap_kwargs.pop("window_length_sec")
|
| 435 |
window_stride_sec = beatmap_kwargs.pop("window_stride_sec")
|
|
|
|
| 436 |
max_length = beatmap_kwargs.get("max_length", 8000)
|
| 437 |
metadata_max_length = metadata_kwargs.get("max_length", 128)
|
| 438 |
sampling_rate = audio_kwargs["sampling_rate"]
|
|
@@ -506,7 +508,6 @@ class CM3PProcessor(ProcessorMixin):
|
|
| 506 |
|
| 507 |
# Loop through with sliding window
|
| 508 |
groups_search_index = 0
|
| 509 |
-
min_window_length_sec = 8
|
| 510 |
for start_sec in np.arange(0, song_length - min_window_length_sec, window_stride_sec):
|
| 511 |
end_sec = start_sec + window_length_sec
|
| 512 |
|
|
|
|
| 7 |
from typing import Optional, Union, IO, TypedDict
|
| 8 |
|
| 9 |
import numpy as np
|
|
|
|
| 10 |
from pandas import Series
|
| 11 |
from slider import Beatmap, HoldNote
|
| 12 |
from transformers import WhisperFeatureExtractor, AutoProcessor, BatchEncoding
|
|
|
|
| 150 |
"truncation": TruncationStrategy.LONGEST_FIRST,
|
| 151 |
"window_length_sec": 30.0,
|
| 152 |
"window_stride_sec": 30.0,
|
| 153 |
+
"min_window_length_sec": 1.0,
|
| 154 |
},
|
| 155 |
"metadata_kwargs": {
|
| 156 |
"max_length": 128,
|
|
|
|
| 347 |
array = array.mean(axis=1)
|
| 348 |
# Resample if the sampling rate is different from the expected one
|
| 349 |
if s != sampling_rate:
|
| 350 |
+
import soxr
|
| 351 |
array = soxr.resample(array, s, sampling_rate, quality="HQ")
|
| 352 |
audio_buffers.append(array)
|
| 353 |
|
|
|
|
| 434 |
|
| 435 |
window_length_sec = beatmap_kwargs.pop("window_length_sec")
|
| 436 |
window_stride_sec = beatmap_kwargs.pop("window_stride_sec")
|
| 437 |
+
min_window_length_sec = beatmap_kwargs.pop("min_window_length_sec", 1.0)
|
| 438 |
max_length = beatmap_kwargs.get("max_length", 8000)
|
| 439 |
metadata_max_length = metadata_kwargs.get("max_length", 128)
|
| 440 |
sampling_rate = audio_kwargs["sampling_rate"]
|
|
|
|
| 508 |
|
| 509 |
# Loop through with sliding window
|
| 510 |
groups_search_index = 0
|
|
|
|
| 511 |
for start_sec in np.arange(0, song_length - min_window_length_sec, window_stride_sec):
|
| 512 |
end_sec = start_sec + window_length_sec
|
| 513 |
|
tokenization_cm3p.py
CHANGED
|
@@ -295,7 +295,7 @@ class CM3PBeatmapTokenizer(PreTrainedTokenizer):
|
|
| 295 |
if not save_directory:
|
| 296 |
raise ValueError("The save_directory must be specified.")
|
| 297 |
|
| 298 |
-
vocab_file = f"{save_directory}/{filename_prefix or
|
| 299 |
with open(vocab_file, 'w', encoding='utf-8') as f:
|
| 300 |
json.dump(self.vocab, f, ensure_ascii=False)
|
| 301 |
|
|
@@ -796,7 +796,7 @@ class CM3PMetadataTokenizer(PreTrainedTokenizer):
|
|
| 796 |
if not save_directory:
|
| 797 |
raise ValueError("The save_directory must be specified.")
|
| 798 |
|
| 799 |
-
vocab_file = f"{save_directory}/{filename_prefix or
|
| 800 |
with open(vocab_file, 'w', encoding='utf-8') as f:
|
| 801 |
json.dump(self.vocab, f, ensure_ascii=False)
|
| 802 |
|
|
|
|
| 295 |
if not save_directory:
|
| 296 |
raise ValueError("The save_directory must be specified.")
|
| 297 |
|
| 298 |
+
vocab_file = f"{save_directory}/{filename_prefix or ''}vocab.json"
|
| 299 |
with open(vocab_file, 'w', encoding='utf-8') as f:
|
| 300 |
json.dump(self.vocab, f, ensure_ascii=False)
|
| 301 |
|
|
|
|
| 796 |
if not save_directory:
|
| 797 |
raise ValueError("The save_directory must be specified.")
|
| 798 |
|
| 799 |
+
vocab_file = f"{save_directory}/{filename_prefix or ''}vocab.json"
|
| 800 |
with open(vocab_file, 'w', encoding='utf-8') as f:
|
| 801 |
json.dump(self.vocab, f, ensure_ascii=False)
|
| 802 |
|