{ "architectures": [ "SharedSpaceEncoderForMaskedLM" ], "attention_backend": "sdpa", "attention_bias": false, "attention_dropout_prob": 0.1, "classifier_dropout": null, "dtype": "float32", "ffn_decompose": false, "ffn_rank": null, "head_dim": 32, "hidden_dropout_prob": 0.1, "hidden_size": 256, "initializer_range": 0.02, "intermediate_size": 1024, "kv_latent_dim": 32, "layer_norm_eps": 1e-12, "max_position_embeddings": 128, "model_type": "shared_subspace_encoder", "num_attention_heads": 8, "num_dense_layers": 0, "num_hidden_layers": 6, "o_latent_dim": 64, "output_subspace": false, "pad_token_id": 0, "q_latent_dim": 64, "rms_norm_eps": 1e-06, "rope_dims": 16, "rope_scaling": null, "rope_theta": 10000.0, "transformers_version": "4.56.1", "vocab_rank": 128, "vocab_size": 30522, "vocab_subspace": false }