--- license: mit --- Pyannote and wespeaker models converted for Speaker diarization and identification for OpenVINO Load Audio File ```python import librosa import matplotlib.pyplot as plt import librosa.display import IPython.display as ipd sample_file = "tutorials_assets_sample.wav" audio, sr = librosa.load(sample_file) waveform = torch.from_numpy(audio[0:160000]).unsqueeze(0).unsqueeze(0) plt.figure(figsize=(14, 5)) librosa.display.waveshow(audio, sr=sr) ipd.Audio(sample_file) ``` Loading the pyannote model ```python core = ov.Core() model = core.read_model("pyannote-segmentation.xml") compiled_model = core.compile_model(model, "NPU") # or "NPU" if supported input_name = compiled_model.input(0) output_name = compiled_model.output(0) results = compiled_model({input_name: waveform}) output = results[output_name] output.sum(axis=1) ``` Loading the embedding model ```python core = ov.Core() embedding_openvino_model = core.read_model("pyannote-wespeaker.xml") embedding_openvino_model.reshape((1, 100, 80)) compiled_model = core.compile_model(embedding_openvino_model, "NPU") # or "NPU" if supported input_name = compiled_model.input(0) output_name = compiled_model.output(0) results = compiled_model({input_name: torch.zeros((1, 100, 80))}) output = results[output_name] output.sum(axis=1) ```