| import argparse |
| import glob |
| import os |
| import faiss |
| import numpy as np |
| from tqdm import tqdm |
|
|
| def build_index(speaker_dir, output_path): |
| print(f"Finding HuBERT features in {speaker_dir}...") |
| vec_files = glob.glob(os.path.join(speaker_dir, "*.vec.npy")) |
| |
| if not vec_files: |
| print(f"No .vec.npy files found in {speaker_dir}!") |
| return |
| |
| print(f"Found {len(vec_files)} files. Loading vectors...") |
| |
| all_vectors = [] |
| for f in tqdm(vec_files): |
| vec = np.load(f) |
| all_vectors.append(vec) |
| |
| all_vectors = np.concatenate(all_vectors, axis=0).astype(np.float32) |
| print(f"Total frames: {all_vectors.shape[0]}, Feature dimension: {all_vectors.shape[1]}") |
| |
| |
| |
| index = faiss.IndexFlatL2(all_vectors.shape[1]) |
| |
| print("Adding vectors to FAISS index...") |
| index.add(all_vectors) |
| |
| os.makedirs(os.path.dirname(output_path), exist_ok=True) |
| |
| print(f"Saving index to {output_path}...") |
| faiss.write_index(index, output_path) |
| |
| |
| vectors_path = output_path.replace(".index", "_vectors.npy") |
| print(f"Saving source vectors to {vectors_path}...") |
| np.save(vectors_path, all_vectors) |
| |
| print("Done!") |
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--speaker_dir", type=str, required=True, help="Path to speaker's HuBERT directory (e.g. data_svc/hubert/singer_0005)") |
| parser.add_argument("--output_path", type=str, required=True, help="Where to save the .index file (e.g. data_svc/hubert/singer_0005/feature.index)") |
| args = parser.parse_args() |
| |
| build_index(args.speaker_dir, args.output_path) |
|
|