{ "alpha": 1.0, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration.ShramConfig", "AutoModelForCausalLM": "huggingface.ShramForCausalLM" }, "beta": 32.0, "embedding_width": 512, "head_dim": 16, "inference_sequence_length": 1024, "local_rope_theta": 10000.0, "mlp_width": 1366, "model_type": "shram", "mosrah_rope_theta": 10000.0, "num_decoder_layers": 12, "num_mosrah_heads": 16, "num_selected_heads": 16, "num_sliding_window_heads": 16, "rms_norm_eps": 1e-05, "rope_mode": "main_sequence", "tie_word_embeddings": false, "training_sequence_length": 1024, "transformers_version": "5.12.1", "use_cache": true, "use_residual_gate": true, "vocab_size": 50277, "window_size": 128 }