File size: 570 Bytes
6eb130f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
{
"model_type": "speaker_encoder",
"architecture": "LSTM",
"input_dim": 40,
"hidden_dim": 256,
"num_layers": 3,
"output_dim": 256,
"dropout": 0.1,
"sample_rate": 16000,
"window_size": 0.04,
"window_stride": 0.01,
"n_mels": 40,
"embedding_size": 256,
"prenet_dims": [256, 256],
"lstm_dims": 256,
"num_lstm_layers": 3,
"speaker_embedding_size": 256,
"use_cuda": true,
"model_name": "speaker_encoder",
"version": "1.0",
"authors": ["Arjit"],
"description": "Speaker encoder model for voice conversion tasks"
} |