Spaces:
Sleeping
Sleeping
| import torch | |
| import librosa | |
| from transformers import AutoFeatureExtractor, AutoModel | |
| MODEL_ID = "microsoft/wavlm-base" | |
| def load_audio(path: str, target_sr: int = 16000): | |
| audio, sr = librosa.load(path, sr=target_sr, mono=True) | |
| return audio, sr | |
| def main(): | |
| print("Loading model:", MODEL_ID) | |
| feature_extractor = AutoFeatureExtractor.from_pretrained(MODEL_ID) | |
| model = AutoModel.from_pretrained(MODEL_ID) | |
| model.eval() | |
| audio, sr = load_audio("sample.wav") | |
| print("Audio length (sec):", round(len(audio) / sr, 2)) | |
| inputs = feature_extractor(audio, sampling_rate=sr, return_tensors="pt") | |
| with torch.no_grad(): | |
| out = model(**inputs) | |
| x = out.last_hidden_state # [batch, frames, hidden] | |
| print("OK ✅ WavLM ran on CPU") | |
| print("Embedding tensor shape:", tuple(x.shape)) | |
| if __name__ == "__main__": | |
| main() |