soniqo
/

Pyannote-Segmentation-LiteRT

Voice Activity Detection

speaker-diarization

Model card Files Files and versions

Pyannote-Segmentation-LiteRT / config.json

aufklarer's picture

Initial LiteRT upload

97d0228 verified about 1 month ago

history blame contribute delete

1.22 kB

	{
	"model": "pyannote-segmentation-3.0",
	"format": "tflite",
	"mode": "streaming",
	"sample_rate": 16000,
	"chunk_duration": 1.0,
	"full_window_duration": 10.0,
	"full_window_step": 5.0,
	"num_chunks_per_window": 10,
	"num_powerset_classes": 7,
	"max_local_speakers": 3,
	"frames_per_chunk": 56,
	"frames_per_window": 560,
	"lstm_state_shape": [
	2,
	8,
	1,
	128
	],
	"inputs": {
	"audio": {
	"shape": [
	1,
	1,
	16000
	],
	"dtype": "float32"
	},
	"lstm_state": {
	"shape": [
	2,
	8,
	1,
	128
	],
	"dtype": "float32",
	"note": "Pass zeros on first chunk. Carry forward between chunks."
	}
	},
	"outputs": {
	"posteriors": {
	"shape": [
	1,
	56,
	7
	],
	"dtype": "float32"
	},
	"lstm_state_out": {
	"shape": [
	2,
	8,
	1,
	128
	],
	"dtype": "float32",
	"note": "Feed back as lstm_state for the next chunk."
	}
	},
	"usage": "Run 10 consecutive 1-second chunks with state carried between calls to reconstruct a full 10-second segmentation window. Initialize lstm_state to zeros for the first chunk."
	}