{ "feature_dim": 198, "d_model": 256, "num_heads": 8, "num_layers": 6, "ffn_dim": 1024, "dropout": 0.1, "vocab_size": 8000, "max_seq_len": 5000, "time_mask_prob": 0.5, "time_mask_max_frames": 20, "frame_drop_prob": 0.05, "use_grad_checkpoint": true }