Spaces:
Sleeping
Sleeping
File size: 864 Bytes
4ca6263 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
import torch
import librosa
from transformers import AutoFeatureExtractor, AutoModel
MODEL_ID = "microsoft/wavlm-base"
def load_audio(path: str, target_sr: int = 16000):
audio, sr = librosa.load(path, sr=target_sr, mono=True)
return audio, sr
def main():
print("Loading model:", MODEL_ID)
feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID)
model = AutoModel.from_pretrained(MODEL_ID)
model.eval()
audio, sr = load_audio("sample.wav")
print("Audio length (sec):", round(len(audio) / sr, 2))
inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt")
with torch.no_grad():
out = model(**inputs)
x = out.last_hidden_state # [batch, frames, hidden]
print("OK ✅ WavLM ran on CPU")
print("Embedding tensor shape:", tuple(x.shape))
if __name__ == "__main__":
main() |