|
|
--- |
|
|
license: mit |
|
|
--- |
|
|
|
|
|
Pyannote and wespeaker models converted for Speaker diarization and identification for OpenVINO |
|
|
|
|
|
|
|
|
Load Audio File |
|
|
|
|
|
```python |
|
|
import librosa |
|
|
import matplotlib.pyplot as plt |
|
|
import librosa.display |
|
|
import IPython.display as ipd |
|
|
|
|
|
sample_file = "tutorials_assets_sample.wav" |
|
|
audio, sr = librosa.load(sample_file) |
|
|
waveform = torch.from_numpy(audio[0:160000]).unsqueeze(0).unsqueeze(0) |
|
|
plt.figure(figsize=(14, 5)) |
|
|
librosa.display.waveshow(audio, sr=sr) |
|
|
|
|
|
ipd.Audio(sample_file) |
|
|
``` |
|
|
|
|
|
Loading the pyannote model |
|
|
```python |
|
|
core = ov.Core() |
|
|
model = core.read_model("pyannote-segmentation.xml") |
|
|
compiled_model = core.compile_model(model, "NPU") # or "NPU" if supported |
|
|
input_name = compiled_model.input(0) |
|
|
output_name = compiled_model.output(0) |
|
|
|
|
|
results = compiled_model({input_name: waveform}) |
|
|
output = results[output_name] |
|
|
output.sum(axis=1) |
|
|
``` |
|
|
|
|
|
|
|
|
Loading the embedding model |
|
|
```python |
|
|
core = ov.Core() |
|
|
embedding_openvino_model = core.read_model("pyannote-wespeaker.xml") |
|
|
embedding_openvino_model.reshape((1, 100, 80)) |
|
|
compiled_model = core.compile_model(embedding_openvino_model, "NPU") # or "NPU" if supported |
|
|
input_name = compiled_model.input(0) |
|
|
output_name = compiled_model.output(0) |
|
|
|
|
|
results = compiled_model({input_name: torch.zeros((1, 100, 80))}) |
|
|
output = results[output_name] |
|
|
output.sum(axis=1) |
|
|
``` |