|
|
import numpy as np
|
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
import librosa
|
|
|
import noisereduce as nr
|
|
|
import torch
|
|
|
from transformers import AutoModelForAudioXVector
|
|
|
|
|
|
device = "cpu"
|
|
|
|
|
|
|
|
|
model = AutoModelForAudioXVector.from_pretrained(
|
|
|
"microsoft/wavlm-base-plus-sv"
|
|
|
).to(device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess(path):
|
|
|
y, sr = librosa.load(path, sr=16000)
|
|
|
y = nr.reduce_noise(y=y, sr=sr)
|
|
|
y, _ = librosa.effects.trim(y, top_db=25)
|
|
|
y = librosa.util.normalize(y)
|
|
|
return y
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_embedding(path):
|
|
|
y = preprocess(path)
|
|
|
audio = torch.tensor(y).float().unsqueeze(0).to(device)
|
|
|
|
|
|
with torch.no_grad():
|
|
|
outputs = model(audio)
|
|
|
emb = outputs.embeddings.cpu().numpy().squeeze()
|
|
|
|
|
|
emb = emb / np.linalg.norm(emb)
|
|
|
return emb.astype(np.float32)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def cosine_match(saved, new, threshold=0.75):
|
|
|
"""Returns similarity score + authentication status"""
|
|
|
|
|
|
score = float(cosine_similarity([saved], [new])[0][0])
|
|
|
|
|
|
if score >= threshold:
|
|
|
return score, "Authenticated"
|
|
|
elif score >= 0.55:
|
|
|
return score, "Ask for PIN"
|
|
|
else:
|
|
|
return score, "Failed" |