{ "architectures": [ "LoopLMForCausalLM" ], "context_length": 1024, "d_ff": 2752, "d_model": 1024, "dtype": "float32", "lb_loss_factor": 0.01, "lz_loss_factor": 0.001, "max_length": 1024, "model_type": "loop-lm", "model_variant": "base", "num_active": 2, "num_experts": 8, "num_heads": 16, "num_layers": 16, "num_layers_in_stack": 8, "num_stacks": 2, "rope_theta": 10000.0, "transformers_version": "5.3.0", "vocab_size": 50257, "weight_tying": false, "width_ratio": 8.0, "auto_map": { "AutoConfig": "modeling_loop_lm.LoopLMConfig", "AutoModelForCausalLM": "modeling_loop_lm.LoopLMForCausalLM" } }