nemotron-cpu-int4 / genai_config.json
TechWithRay's picture
Upload folder using huggingface_hub
abfb0a4 verified
{
"model": {
"type": "nemotron_speech",
"vocab_size": 1025,
"num_mels": 128,
"fft_size": 512,
"hop_length": 160,
"win_length": 400,
"preemph": 0.97,
"log_eps": 5.96046448e-08,
"subsampling_factor": 8,
"left_context": 70,
"conv_context": 8,
"pre_encode_cache_size": 9,
"sample_rate": 16000,
"chunk_samples": 8960,
"blank_id": 1024,
"max_symbols_per_step": 10,
"encoder": {
"filename": "encoder.onnx",
"hidden_size": 1024,
"num_hidden_layers": 24,
"inputs": {
"audio_features": "audio_signal",
"input_lengths": "length",
"cache_last_channel": "cache_last_channel",
"cache_last_time": "cache_last_time",
"cache_last_channel_len": "cache_last_channel_len"
},
"outputs": {
"encoder_outputs": "outputs",
"output_lengths": "encoded_lengths",
"cache_last_channel_next": "cache_last_channel_next",
"cache_last_time_next": "cache_last_time_next",
"cache_last_channel_len_next": "cache_last_channel_len_next"
}
},
"decoder": {
"filename": "decoder.onnx",
"hidden_size": 640,
"num_hidden_layers": 2,
"inputs": {
"targets": "targets",
"lstm_hidden_state": "h_in",
"lstm_cell_state": "c_in"
},
"outputs": {
"outputs": "decoder_output",
"lstm_hidden_state": "h_out",
"lstm_cell_state": "c_out"
}
},
"joiner": {
"filename": "joint.onnx",
"inputs": {
"encoder_outputs": "encoder_output",
"decoder_outputs": "decoder_output"
},
"outputs": {
"logits": "joint_output"
}
}
}
}