File size: 1,353 Bytes
636a032 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
import torch
import torch.nn as nn
import librosa
from transformers import WhisperProcessor, WhisperModel
class WhisperClassifier(nn.Module):
def __init__(self, model_name="openai/whisper-small"):
super(WhisperClassifier, self).__init__()
self.whisper = WhisperModel.from_pretrained(model_name)
self.fc = nn.Linear(self.whisper.config.d_model, 1)
self.sigmoid = nn.Sigmoid()
def forward(self, input_features):
hidden_states = self.whisper.encoder(input_features).last_hidden_state
pooled_output = hidden_states.mean(dim=1)
logits = self.fc(pooled_output)
return self.sigmoid(logits).squeeze(1)
def predict(model, audio_path, processor, device='cpu'):
audio, sr = librosa.load(audio_path, sr=16000)
input_features = processor(audio, return_tensors="pt", sampling_rate=16000).input_features
input_features = input_features.to(device)
model.eval()
with torch.no_grad():
output = model(input_features)
prediction = 1 if output.item() > 0.5 else 0 # Convert probability to 0 or 1
return prediction
def load_model(model_path):
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = WhisperClassifier().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
return model, device
|