| # HuBERT K-means Quantizer | |
| This model implements HuBERT with k-means quantization for converting speech to discrete tokens. | |
| ## Usage | |
| ```python | |
| from transformers import AutoModel | |
| import torch | |
| import torchaudio | |
| # Load the processor/tokenizer | |
| processor = AutoModel.from_pretrained("your-username/hubert-kmeans-200", trust_remote_code=True) | |
| # Load audio | |
| audio, sr = torchaudio.load("audio.wav") | |
| if sr != 16000: | |
| resampler = torchaudio.transforms.Resample(sr, 16000) | |
| audio = resampler(audio) | |
| # Process audio to get tokens | |
| outputs = processor(audio, return_tensors="pt", sample_rate=16000) | |
| tokens = outputs.input_values # or outputs.input_ids | |
| print(f"Tokens shape: {tokens.shape}") |