{ "model_type": "gtransformer", "architectures": ["GTransformerForCausalLM"], "vocab_size": 65536, "hidden_size": 8192, "intermediate_size": 22016, "num_hidden_layers": 48, "num_attention_heads": 64, "hidden_act": "swiglu", "max_position_embeddings": 65536, "initializer_range": 0.02, "layer_norm_epsilon": 1e-5, "attention_dropout": 0.05, "hidden_dropout_prob": 0.05, "rotary_emb_base": 10000, "use_flash_attention": true, "use_low_rank_ffn": true, "use_entropy_gate": true, "fp8_precision": true, "dvfs_enabled": true, "informational_constant_kI": 2.612e-20, "energy_per_token_target_J": 0.07, "training_objective": "cross_entropy + lambda*(I_total - I_useful)", "optimizer": "adamw_torch", "gradient_checkpointing": true, "torch_dtype": "float16", "transformers_version": "4.44.2", "tokenizer_class": "GTransformerTokenizer", "bos_token_id": 1, "eos_token_id": 2, "pad_token_id": 0, "tie_word_embeddings": false, "use_cache": true, "model_revision": "main", "git_informational_mode": { "enable": true, "delta_I_gate": 0.75, "thermal_threshold_T": 2.3, "energy_scaling_mode": "adaptive" } }