| import gdown | |
| import random | |
| import numpy as np | |
| from audio import read_mfcc | |
| from batcher import sample_from_mfcc | |
| from constants import SAMPLE_RATE, NUM_FRAMES | |
| from conv_models import DeepSpeakerModel | |
| from test import batch_cosine_similarity | |
| class speaker_recognition: | |
| def __init__(self): | |
| np.random.seed(123) | |
| random.seed(123) | |
| self.speakers = {} | |
| self.weights = "" | |
| self.by_name = True | |
| self.SAMPLE_RATE = SAMPLE_RATE | |
| self.NUM_FRAMES = NUM_FRAMES | |
| self.spin_up() | |
| def spin_up(self): | |
| if self.weights == "": | |
| output = "weights.h5" | |
| gdown.download("https://drive.google.com/uc?id=1F9NvdrarWZNktdX9KlRYWWHDwRkip_aP", output, quiet=False) | |
| self.weights = "weights.h5" | |
| self.model = DeepSpeakerModel() | |
| self.model.m.load_weights(self.weights, by_name=True) | |
| def create_speaker(self, data, id=""): | |
| id = id if id != "" else f"{len(self.speakers)}" | |
| self.speakers[id] = data | |
| return id | |
| def check_speakers(self, data, id="", threshold = 0.5): | |
| us = "" | |
| n = 0 | |
| for speaker in self.speakers: | |
| k = batch_cosine_similarity(self.speakers[speaker], data) | |
| if k > threshold: | |
| if k > n: | |
| n = k | |
| us = speaker | |
| else:pass | |
| if n == 0: | |
| id = self.create_speaker(data, id) | |
| return f"created new speaker : {id}" | |
| return (us, k[0]) | |
| def run_transform(self, audio, pcm = False): | |
| data = sample_from_mfcc(read_mfcc(audio, self.SAMPLE_RATE), self.NUM_FRAMES) | |
| data = self.model.m.predict(np.expand_dims(data, axis=0)) | |
| return data |