{ "vocab_size": 256, "hidden_dim": 64, "patch_encoder_dim": 32, "n_layers": 3, "n_heads": 4, "n_experts": 4, "top_k_experts": 2, "expert_hidden_mult": 2, "patch_encoder_layers": 1, "max_seq_length": 512, "max_patches": 64, "learning_rate": 0.001, "weight_decay": 0.01, "beta1": 0.9, "beta2": 0.98, "epsilon": 1e-08, "batch_size": 256, "micro_batch_size": 4, "gradient_clip": 0.5, "load_balance_coefficient": 0.01, "dropout": 0.0, "use_prenorm": true, "entropy_threshold": 0.4, "min_patch_size": 4, "max_patch_size": 32 }