{ "model_type": "speaker_encoder", "architecture": "LSTM", "input_dim": 40, "hidden_dim": 256, "num_layers": 3, "output_dim": 256, "dropout": 0.1, "sample_rate": 16000, "window_size": 0.04, "window_stride": 0.01, "n_mels": 40, "embedding_size": 256, "prenet_dims": [256, 256], "lstm_dims": 256, "num_lstm_layers": 3, "speaker_embedding_size": 256, "use_cuda": true, "model_name": "speaker_encoder", "version": "1.0", "authors": ["Arjit"], "description": "Speaker encoder model for voice conversion tasks" }