{ "activation_dropout": 0.1, "architectures": [ "EmformerForRNNT" ], "attention_dropout": 0.1, "blank_token_id": 4096, "final_dropout": 0.1, "hidden_act": "gelu", "hidden_dropout": 0.1, "hidden_size": 768, "initializer_range": 0.02, "input_dim": 80, "intermediate_size": 2048, "joiner_activation": "relu", "layer_norm_eps": 1e-05, "left_context_length": 30, "lstm_dropout": 0.3, "lstm_hidden_dim": 512, "lstm_layer_norm": 0.001, "lstm_layer_norm_epsilon": 0.001, "model_type": "emformer", "num_attention_heads": 8, "num_hidden_layers": 20, "num_lstm_layers": 3, "output_dim": 1024, "pad_token_id": 1, "right_context_length": 4, "segment_length": 16, "symbol_embedding_dim": 512, "time_reduction_input_dim": 128, "time_reduction_stride": 4, "torch_dtype": "float32", "transformers_version": "4.20.0.dev0", "vocab_size": 4097 }