File size: 1,713 Bytes
abfb0a4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | {
"model": {
"type": "nemotron_speech",
"vocab_size": 1025,
"num_mels": 128,
"fft_size": 512,
"hop_length": 160,
"win_length": 400,
"preemph": 0.97,
"log_eps": 5.96046448e-08,
"subsampling_factor": 8,
"left_context": 70,
"conv_context": 8,
"pre_encode_cache_size": 9,
"sample_rate": 16000,
"chunk_samples": 8960,
"blank_id": 1024,
"max_symbols_per_step": 10,
"encoder": {
"filename": "encoder.onnx",
"hidden_size": 1024,
"num_hidden_layers": 24,
"inputs": {
"audio_features": "audio_signal",
"input_lengths": "length",
"cache_last_channel": "cache_last_channel",
"cache_last_time": "cache_last_time",
"cache_last_channel_len": "cache_last_channel_len"
},
"outputs": {
"encoder_outputs": "outputs",
"output_lengths": "encoded_lengths",
"cache_last_channel_next": "cache_last_channel_next",
"cache_last_time_next": "cache_last_time_next",
"cache_last_channel_len_next": "cache_last_channel_len_next"
}
},
"decoder": {
"filename": "decoder.onnx",
"hidden_size": 640,
"num_hidden_layers": 2,
"inputs": {
"targets": "targets",
"lstm_hidden_state": "h_in",
"lstm_cell_state": "c_in"
},
"outputs": {
"outputs": "decoder_output",
"lstm_hidden_state": "h_out",
"lstm_cell_state": "c_out"
}
},
"joiner": {
"filename": "joint.onnx",
"inputs": {
"encoder_outputs": "encoder_output",
"decoder_outputs": "decoder_output"
},
"outputs": {
"logits": "joint_output"
}
}
}
} |