|
|
--- |
|
|
library_name: nemo |
|
|
license: cc-by-4.0 |
|
|
tags: |
|
|
- pytorch |
|
|
- NeMo |
|
|
--- |
|
|
Speaker Verification model trained on Japanese data. |
|
|
|
|
|
# Install |
|
|
```bash |
|
|
pip install nemo_toolkit['all'] |
|
|
``` |
|
|
|
|
|
# Inference |
|
|
|
|
|
```python |
|
|
import nemo.collections.asr as nemo_asr |
|
|
speaker_model = nemo_asr.models.EncDecSpeakerLabelModel.from_pretrained("Respair/RyuseiNet") |
|
|
emb = speaker_model.get_embedding("audio.wav") # speaker embedding |
|
|
# or |
|
|
speaker_model.verify_speakers("audio_1.wav","audio_2.wav") |
|
|
``` |
|
|
|
|
|
# Architecture |
|
|
|
|
|
Nvidia's Titanet Large |
|
|
|
|
|
# Data |
|
|
|
|
|
800 ~ 1000 hours |
|
|
|
|
|
# Compute |
|
|
GH200 |