File size: 807 Bytes
2b7c9a4
 
3da6ae1
 
 
 
 
63cca50
3da6ae1
63cca50
3da6ae1
63cca50
3da6ae1
 
 
 
63cca50
3da6ae1
 
63cca50
3da6ae1
63cca50
3da6ae1
 
63cca50
3da6ae1
63cca50
2b7c9a4
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
```

from huggingface_hub import hf_hub_download
import joblib
from transformers import Wav2Vec2Processor, HubertModel
from torchaudio import load
import torch

hf_hub_download(repo_id="Ansu/mHubert-basque-k1000-L9", filename="kmeans/basque_hubert_k1000_L9.pt", local_dir="./")

kmeans = joblib.load("kmeans/basque_hubert_k1000_L9.pt")

model_name = "Ansu/mHubert-basque-k1000-L9"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = HubertModel.from_pretrained(model_name)
model.eval()

audio = load("path/to/audio")[0]
audio = audio.squeeze(0)

inputs = processor(audio, sampling_rate=16000, return_tensors="pt", padding=True)

with torch.no_grad():
    out = model(**inputs, output_hidden_states=True)

features = out.hidden_states[9].squeeze(0).numpy()

units = kmeans.predict(features)
```