Voice Activity Detection
LiteRT
LiteRT
multilingual
speaker-diarization
pyannote
diarization
on-device
soniqo
speech-cloud
speech-core
Instructions to use soniqo/Pyannote-Segmentation-LiteRT with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- LiteRT
How to use soniqo/Pyannote-Segmentation-LiteRT with LiteRT:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
| { | |
| "model": "pyannote-segmentation-3.0", | |
| "format": "tflite", | |
| "mode": "streaming", | |
| "sample_rate": 16000, | |
| "chunk_duration": 1.0, | |
| "full_window_duration": 10.0, | |
| "full_window_step": 5.0, | |
| "num_chunks_per_window": 10, | |
| "num_powerset_classes": 7, | |
| "max_local_speakers": 3, | |
| "frames_per_chunk": 56, | |
| "frames_per_window": 560, | |
| "lstm_state_shape": [ | |
| 2, | |
| 8, | |
| 1, | |
| 128 | |
| ], | |
| "inputs": { | |
| "audio": { | |
| "shape": [ | |
| 1, | |
| 1, | |
| 16000 | |
| ], | |
| "dtype": "float32" | |
| }, | |
| "lstm_state": { | |
| "shape": [ | |
| 2, | |
| 8, | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "note": "Pass zeros on first chunk. Carry forward between chunks." | |
| } | |
| }, | |
| "outputs": { | |
| "posteriors": { | |
| "shape": [ | |
| 1, | |
| 56, | |
| 7 | |
| ], | |
| "dtype": "float32" | |
| }, | |
| "lstm_state_out": { | |
| "shape": [ | |
| 2, | |
| 8, | |
| 1, | |
| 128 | |
| ], | |
| "dtype": "float32", | |
| "note": "Feed back as lstm_state for the next chunk." | |
| } | |
| }, | |
| "usage": "Run 10 consecutive 1-second chunks with state carried between calls to reconstruct a full 10-second segmentation window. Initialize lstm_state to zeros for the first chunk." | |
| } |