| { | |
| "encoder_dim": 320, | |
| "decoder_dim": 320, | |
| "depth": 6, | |
| "nheads": 8, | |
| "head_dim": 40, | |
| "vocab_size": 32768, | |
| "bos_id": 1, | |
| "eos_id": 2, | |
| "frame_len": 80, | |
| "total_lookahead": 16, | |
| "d_model_frontend": 320, | |
| "c1": 640, | |
| "c2": 320, | |
| "frontend_state_shapes": { | |
| "sample_buffer": [ | |
| 1, | |
| 79 | |
| ], | |
| "sample_len": [ | |
| 1 | |
| ], | |
| "conv1_buffer": [ | |
| 1, | |
| 320, | |
| 4 | |
| ], | |
| "conv2_buffer": [ | |
| 1, | |
| 640, | |
| 4 | |
| ], | |
| "frame_count": [ | |
| 1 | |
| ] | |
| } | |
| } |