Spaces:
Sleeping
Sleeping
Ubuntu
commited on
Commit
·
24d0b1d
1
Parent(s):
f3d3b39
update tokenizer
Browse files- speech/tools/S3Tokenizer/.flake8 +0 -0
- speech/tools/S3Tokenizer/.github/workflows/python-publish.yml +0 -0
- speech/tools/S3Tokenizer/.github/workflows/unit_test_cpu.yaml +0 -0
- speech/tools/S3Tokenizer/.gitignore +0 -0
- speech/tools/S3Tokenizer/.pre-commit-config.yaml +0 -0
- speech/tools/S3Tokenizer/LICENSE +0 -0
- speech/tools/S3Tokenizer/MANIFEST.in +0 -0
- speech/tools/S3Tokenizer/README.md +0 -0
- speech/tools/S3Tokenizer/requirements.txt +0 -0
- speech/tools/S3Tokenizer/s3tokenizer/__init__.py +0 -0
- speech/tools/S3Tokenizer/s3tokenizer/assets/mel_filters.npz +0 -0
- speech/tools/S3Tokenizer/s3tokenizer/cli.py +24 -28
- speech/tools/S3Tokenizer/s3tokenizer/model.py +0 -0
- speech/tools/S3Tokenizer/s3tokenizer/model_v2.py +0 -0
- speech/tools/S3Tokenizer/s3tokenizer/utils.py +0 -0
- speech/tools/S3Tokenizer/setup.py +0 -0
- speech/tools/S3Tokenizer/test/test_batch_efficiency.py +0 -0
- speech/tools/S3Tokenizer/test/test_onnx.py +0 -0
speech/tools/S3Tokenizer/.flake8
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/.github/workflows/python-publish.yml
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/.github/workflows/unit_test_cpu.yaml
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/.gitignore
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/.pre-commit-config.yaml
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/LICENSE
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/MANIFEST.in
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/README.md
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/requirements.txt
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/s3tokenizer/__init__.py
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/s3tokenizer/assets/mel_filters.npz
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/s3tokenizer/cli.py
CHANGED
|
@@ -32,7 +32,6 @@ torchrun --nproc_per_node=8 --nnodes=1 \
|
|
| 32 |
|
| 33 |
import argparse
|
| 34 |
import os
|
| 35 |
-
from pathlib import Path
|
| 36 |
|
| 37 |
import torch
|
| 38 |
import torch.distributed as dist
|
|
@@ -50,12 +49,10 @@ class AudioDataset(Dataset):
|
|
| 50 |
|
| 51 |
# Define cache file path
|
| 52 |
if cache_file is None:
|
| 53 |
-
cache_file =
|
| 54 |
-
else:
|
| 55 |
-
cache_file = Path(cache_file)
|
| 56 |
|
| 57 |
# Try to load from cache first
|
| 58 |
-
if use_cache and
|
| 59 |
import pickle
|
| 60 |
print(f"Loading file list from cache: {cache_file}")
|
| 61 |
try:
|
|
@@ -80,7 +77,7 @@ class AudioDataset(Dataset):
|
|
| 80 |
with os.scandir(dirpath) as entries:
|
| 81 |
for entry in entries:
|
| 82 |
if entry.is_file() and any(entry.name.endswith(ext) for ext in extensions):
|
| 83 |
-
files.append(
|
| 84 |
except PermissionError:
|
| 85 |
pass
|
| 86 |
return files
|
|
@@ -112,7 +109,10 @@ class AudioDataset(Dataset):
|
|
| 112 |
try:
|
| 113 |
import pickle
|
| 114 |
print(f"Saving file list to cache: {cache_file}")
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
| 116 |
with open(cache_file, 'wb') as f:
|
| 117 |
pickle.dump(self.data, f)
|
| 118 |
except Exception as e:
|
|
@@ -124,7 +124,7 @@ class AudioDataset(Dataset):
|
|
| 124 |
def __getitem__(self, idx):
|
| 125 |
file_path = self.data[idx]
|
| 126 |
try:
|
| 127 |
-
audio = s3tokenizer.load_audio(
|
| 128 |
mel = s3tokenizer.log_mel_spectrogram(audio)
|
| 129 |
return file_path, mel
|
| 130 |
except Exception as e:
|
|
@@ -219,8 +219,8 @@ def get_args():
|
|
| 219 |
def save_tokens(file_path, codes, codes_len):
|
| 220 |
"""Save tokens as .pt file with _fsq suffix"""
|
| 221 |
# Remove extension and add _fsq.pt
|
| 222 |
-
|
| 223 |
-
output_path =
|
| 224 |
|
| 225 |
# Extract only valid codes (up to codes_len)
|
| 226 |
valid_codes = codes[:codes_len]
|
|
@@ -248,11 +248,11 @@ def main():
|
|
| 248 |
# Option 3: Load from pre-generated file list
|
| 249 |
print(f"Loading file list from: {args.file_list}")
|
| 250 |
with open(args.file_list, 'r') as f:
|
| 251 |
-
file_paths = [
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
|
| 257 |
# Create a simple dataset
|
| 258 |
class FileListDataset(Dataset):
|
|
@@ -261,9 +261,9 @@ def main():
|
|
| 261 |
skipped_existing = 0
|
| 262 |
for fp in file_paths:
|
| 263 |
if skip_existing:
|
| 264 |
-
output_path = fp.
|
| 265 |
-
|
| 266 |
-
|
| 267 |
skipped_existing += 1
|
| 268 |
continue
|
| 269 |
self.data.append(fp)
|
|
@@ -278,17 +278,13 @@ def main():
|
|
| 278 |
file_path = self.data[idx]
|
| 279 |
try:
|
| 280 |
# Check if file exists
|
| 281 |
-
if not
|
| 282 |
print(f"File not found: {file_path}")
|
| 283 |
return None, None
|
| 284 |
-
|
| 285 |
-
# Check if it's a file (not directory)
|
| 286 |
-
if not file_path.is_file():
|
| 287 |
-
print(f"Not a file: {file_path}")
|
| 288 |
-
return None, None
|
| 289 |
|
| 290 |
# Try to load audio
|
| 291 |
-
audio = s3tokenizer.load_audio(
|
| 292 |
mel = s3tokenizer.log_mel_spectrogram(audio)
|
| 293 |
return file_path, mel
|
| 294 |
except Exception as e:
|
|
@@ -311,7 +307,7 @@ def main():
|
|
| 311 |
original_count = len(dataset.data)
|
| 312 |
dataset.data = [
|
| 313 |
fp for fp in dataset.data
|
| 314 |
-
if not (fp
|
| 315 |
]
|
| 316 |
print(f"Skipping {original_count - len(dataset.data)} already processed files")
|
| 317 |
|
|
@@ -363,7 +359,7 @@ def main():
|
|
| 363 |
processed_count += 1
|
| 364 |
except Exception as e:
|
| 365 |
failed_count += 1
|
| 366 |
-
failed_files.append(
|
| 367 |
if rank == 0:
|
| 368 |
tqdm.write(f"Failed to save {file_path}: {e}")
|
| 369 |
|
|
@@ -377,7 +373,7 @@ def main():
|
|
| 377 |
print(f"Failed to process {failed_count} files")
|
| 378 |
|
| 379 |
# Save failed files list
|
| 380 |
-
failed_list_path =
|
| 381 |
with open(failed_list_path, 'w') as f:
|
| 382 |
for ff in failed_files:
|
| 383 |
f.write(f"{ff}\n")
|
|
|
|
| 32 |
|
| 33 |
import argparse
|
| 34 |
import os
|
|
|
|
| 35 |
|
| 36 |
import torch
|
| 37 |
import torch.distributed as dist
|
|
|
|
| 49 |
|
| 50 |
# Define cache file path
|
| 51 |
if cache_file is None:
|
| 52 |
+
cache_file = os.path.join(root_path, '.audio_file_cache.pkl')
|
|
|
|
|
|
|
| 53 |
|
| 54 |
# Try to load from cache first
|
| 55 |
+
if use_cache and os.path.exists(cache_file):
|
| 56 |
import pickle
|
| 57 |
print(f"Loading file list from cache: {cache_file}")
|
| 58 |
try:
|
|
|
|
| 77 |
with os.scandir(dirpath) as entries:
|
| 78 |
for entry in entries:
|
| 79 |
if entry.is_file() and any(entry.name.endswith(ext) for ext in extensions):
|
| 80 |
+
files.append(entry.path)
|
| 81 |
except PermissionError:
|
| 82 |
pass
|
| 83 |
return files
|
|
|
|
| 109 |
try:
|
| 110 |
import pickle
|
| 111 |
print(f"Saving file list to cache: {cache_file}")
|
| 112 |
+
# Ensure parent directory exists
|
| 113 |
+
cache_dir = os.path.dirname(cache_file)
|
| 114 |
+
if cache_dir and not os.path.exists(cache_dir):
|
| 115 |
+
os.makedirs(cache_dir, exist_ok=True)
|
| 116 |
with open(cache_file, 'wb') as f:
|
| 117 |
pickle.dump(self.data, f)
|
| 118 |
except Exception as e:
|
|
|
|
| 124 |
def __getitem__(self, idx):
|
| 125 |
file_path = self.data[idx]
|
| 126 |
try:
|
| 127 |
+
audio = s3tokenizer.load_audio(file_path)
|
| 128 |
mel = s3tokenizer.log_mel_spectrogram(audio)
|
| 129 |
return file_path, mel
|
| 130 |
except Exception as e:
|
|
|
|
| 219 |
def save_tokens(file_path, codes, codes_len):
|
| 220 |
"""Save tokens as .pt file with _fsq suffix"""
|
| 221 |
# Remove extension and add _fsq.pt
|
| 222 |
+
base_name = os.path.splitext(file_path)[0]
|
| 223 |
+
output_path = f"{base_name}_fsq.pt"
|
| 224 |
|
| 225 |
# Extract only valid codes (up to codes_len)
|
| 226 |
valid_codes = codes[:codes_len]
|
|
|
|
| 248 |
# Option 3: Load from pre-generated file list
|
| 249 |
print(f"Loading file list from: {args.file_list}")
|
| 250 |
with open(args.file_list, 'r') as f:
|
| 251 |
+
file_paths = []
|
| 252 |
+
for line in f:
|
| 253 |
+
line = line.strip()
|
| 254 |
+
if line:
|
| 255 |
+
file_paths.append(line)
|
| 256 |
|
| 257 |
# Create a simple dataset
|
| 258 |
class FileListDataset(Dataset):
|
|
|
|
| 261 |
skipped_existing = 0
|
| 262 |
for fp in file_paths:
|
| 263 |
if skip_existing:
|
| 264 |
+
output_path = fp.replace('.wav', '_fsq.pt')
|
| 265 |
+
if os.path.exists(output_path):
|
| 266 |
+
print(f'*******skip file {output_path}')
|
| 267 |
skipped_existing += 1
|
| 268 |
continue
|
| 269 |
self.data.append(fp)
|
|
|
|
| 278 |
file_path = self.data[idx]
|
| 279 |
try:
|
| 280 |
# Check if file exists
|
| 281 |
+
if not os.path.exists(file_path):
|
| 282 |
print(f"File not found: {file_path}")
|
| 283 |
return None, None
|
| 284 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 285 |
|
| 286 |
# Try to load audio
|
| 287 |
+
audio = s3tokenizer.load_audio(file_path)
|
| 288 |
mel = s3tokenizer.log_mel_spectrogram(audio)
|
| 289 |
return file_path, mel
|
| 290 |
except Exception as e:
|
|
|
|
| 307 |
original_count = len(dataset.data)
|
| 308 |
dataset.data = [
|
| 309 |
fp for fp in dataset.data
|
| 310 |
+
if not os.path.exists(os.path.join(os.path.dirname(fp), f"{os.path.splitext(os.path.basename(fp))[0]}_fsq.pt"))
|
| 311 |
]
|
| 312 |
print(f"Skipping {original_count - len(dataset.data)} already processed files")
|
| 313 |
|
|
|
|
| 359 |
processed_count += 1
|
| 360 |
except Exception as e:
|
| 361 |
failed_count += 1
|
| 362 |
+
failed_files.append(file_path)
|
| 363 |
if rank == 0:
|
| 364 |
tqdm.write(f"Failed to save {file_path}: {e}")
|
| 365 |
|
|
|
|
| 373 |
print(f"Failed to process {failed_count} files")
|
| 374 |
|
| 375 |
# Save failed files list
|
| 376 |
+
failed_list_path = os.path.join(args.root_path if not args.file_list else ".", "failed_files.txt")
|
| 377 |
with open(failed_list_path, 'w') as f:
|
| 378 |
for ff in failed_files:
|
| 379 |
f.write(f"{ff}\n")
|
speech/tools/S3Tokenizer/s3tokenizer/model.py
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/s3tokenizer/model_v2.py
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/s3tokenizer/utils.py
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/setup.py
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/test/test_batch_efficiency.py
CHANGED
|
File without changes
|
speech/tools/S3Tokenizer/test/test_onnx.py
CHANGED
|
File without changes
|