| { | |
| "numMelBins": 128, | |
| "sampleRate": 16000, | |
| "nFFT": 512, | |
| "hopLength": 160, | |
| "winLength": 400, | |
| "preEmphasis": 0.97, | |
| "encoderHidden": 1024, | |
| "encoderLayers": 24, | |
| "subsamplingFactor": 8, | |
| "decoderHidden": 640, | |
| "decoderLayers": 2, | |
| "vocabSize": 8192, | |
| "blankTokenId": 8192, | |
| "numDurationBins": 5, | |
| "durationBins": [ | |
| 0, | |
| 1, | |
| 2, | |
| 3, | |
| 4 | |
| ] | |
| } |