{ "model_type": "pyannote-segmentation", "architecture": "sincnet-bilstm-classifier", "framework": "mlx", "original_model": "pyannote/segmentation-3.0", "conversion_date": "2026-01-16", "parameters": 1473515, "model_size_mb": 5.6, "input": { "type": "audio", "sample_rate": 16000, "channels": 1, "format": "waveform", "dtype": "float32" }, "output": { "type": "logits", "num_classes": 7, "frame_duration_ms": 17, "activation": "log_softmax" }, "architecture_details": { "sincnet": { "num_filters": 80, "kernel_size": 251, "num_layers": 3 }, "lstm": { "num_layers": 4, "hidden_size": 128, "bidirectional": true, "output_size": 256 }, "classifier": { "hidden_dim": 128, "num_classes": 7 } }, "validation": { "pytorch_correlation": 0.886, "sincnet_correlation": 0.9999999999, "lstm_correlation": 0.999, "component_validation": "perfect", "status": "production_ready" }, "performance": { "platform": "apple_silicon", "backend": "metal", "memory_model": "unified", "gpu_accelerated": true }, "license": "MIT", "tags": [ "speaker-diarization", "audio", "mlx", "apple-silicon", "pyannote", "sincnet", "lstm", "speaker-segmentation" ] }