{ "alpha": 1.0, "attention_dropout": 0.0, "auto_map": { "AutoConfig": "configuration.ShramConfig", "AutoModelForCausalLM": "huggingface.ShramForCausalLM" }, "beta": 32.0, "head_dim": 16, "hidden_size": 512, "inference_sequence_length": 1024, "intermediate_size": 1366, "local_rope_theta": 10000.0, "model_type": "shram", "mosrah_rope_theta": 10000.0, "num_hidden_layers": 12, "num_mosrah_heads": 16, "num_selected_heads": 16, "num_sliding_window_heads": 16, "rms_norm_eps": 1e-05, "rope_mode": "main_sequence", "tie_word_embeddings": false, "training_sequence_length": 1024, "transformers_version": "5.8.0", "use_cache": true, "vocab_size": 50277, "window_size": 128 }