{ "dim": 5120, "n_layers": 40, "head_dim": 128, "hidden_dim": 16384, "n_heads": 32, "n_kv_heads": 8, "rope_theta": 1000000000.0, "norm_eps": 1e-05, "vocab_size": 131072, "tied_embeddings": false, "max_position_embeddings": 262144, "llama_4_scaling": { "original_max_position_embeddings": 16384, "beta": 0.1 }, "q_lora_rank": null, "qk_rope_head_dim": null, "qk_nope_head_dim": null, "kv_lora_rank": null, "v_head_dim": null, "yarn": { "original_max_position_embeddings": 16384, "factor": 16, "apply_scale": false, "beta": 32, "alpha": 1 }, "vision_encoder": { "image_token_id": 10, "image_break_token_id": 12, "image_end_token_id": 13, "intermediate_size": 4096, "num_hidden_layers": 24, "num_attention_heads": 16, "mm_projector_id": "patch_merge", "spatial_merge_size": 2, "hidden_size": 1024, "num_channels": 3, "image_size": 1540, "max_image_size": 1540, "patch_size": 14, "rope_theta": 10000.0, "add_pre_mm_projector_layer_norm": true, "adapter_bias": false }, "quantization_config": { "config_groups": { "group_0": { "targets": [ "Linear" ], "weights": { "num_bits": 8, "type": "float", "symmetric": true, "group_size": null, "strategy": "channel", "block_structure": null, "dynamic": false, "actorder": null, "observer": "mse", "observer_kwargs": {} }, "input_activations": { "num_bits": 8, "type": "float", "symmetric": true, "group_size": null, "strategy": "token", "block_structure": null, "dynamic": true, "actorder": null, "observer": null, "observer_kwargs": {} }, "output_activations": null, "format": "float-quantized" } }, "quant_method": "compressed-tensors", "kv_cache_scheme": null, "format": "float-quantized", "quantization_status": "compressed", "global_compression_ratio": null, "ignore": [ "model.vision_tower.transformer.layers.0.feed_forward.gate_proj", "model.vision_tower.transformer.layers.0.feed_forward.up_proj", "model.vision_tower.transformer.layers.0.feed_forward.down_proj", "model.vision_tower.transformer.layers.0.attention.k_proj", "model.vision_tower.transformer.layers.0.attention.v_proj", "model.vision_tower.transformer.layers.0.attention.q_proj", "model.vision_tower.transformer.layers.0.attention.o_proj", "model.vision_tower.transformer.layers.1.feed_forward.gate_proj", "model.vision_tower.transformer.layers.1.feed_forward.up_proj", "model.vision_tower.transformer.layers.1.feed_forward.down_proj", "model.vision_tower.transformer.layers.1.attention.k_proj", "model.vision_tower.transformer.layers.1.attention.v_proj", "model.vision_tower.transformer.layers.1.attention.q_proj", "model.vision_tower.transformer.layers.1.attention.o_proj", "model.vision_tower.transformer.layers.2.feed_forward.gate_proj", "model.vision_tower.transformer.layers.2.feed_forward.up_proj", "model.vision_tower.transformer.layers.2.feed_forward.down_proj", "model.vision_tower.transformer.layers.2.attention.k_proj", "model.vision_tower.transformer.layers.2.attention.v_proj", "model.vision_tower.transformer.layers.2.attention.q_proj", "model.vision_tower.transformer.layers.2.attention.o_proj", "model.vision_tower.transformer.layers.3.feed_forward.gate_proj", "model.vision_tower.transformer.layers.3.feed_forward.up_proj", "model.vision_tower.transformer.layers.3.feed_forward.down_proj", "model.vision_tower.transformer.layers.3.attention.k_proj", "model.vision_tower.transformer.layers.3.attention.v_proj", "model.vision_tower.transformer.layers.3.attention.q_proj", "model.vision_tower.transformer.layers.3.attention.o_proj", "model.vision_tower.transformer.layers.4.feed_forward.gate_proj", "model.vision_tower.transformer.layers.4.feed_forward.up_proj", "model.vision_tower.transformer.layers.4.feed_forward.down_proj", "model.vision_tower.transformer.layers.4.attention.k_proj", "model.vision_tower.transformer.layers.4.attention.v_proj", "model.vision_tower.transformer.layers.4.attention.q_proj", "model.vision_tower.transformer.layers.4.attention.o_proj", "model.vision_tower.transformer.layers.5.feed_forward.gate_proj", "model.vision_tower.transformer.layers.5.feed_forward.up_proj", "model.vision_tower.transformer.layers.5.feed_forward.down_proj", "model.vision_tower.transformer.layers.5.attention.k_proj", "model.vision_tower.transformer.layers.5.attention.v_proj", "model.vision_tower.transformer.layers.5.attention.q_proj", "model.vision_tower.transformer.layers.5.attention.o_proj", "model.vision_tower.transformer.layers.6.feed_forward.gate_proj", "model.vision_tower.transformer.layers.6.feed_forward.up_proj", "model.vision_tower.transformer.layers.6.feed_forward.down_proj", "model.vision_tower.transformer.layers.6.attention.k_proj", "model.vision_tower.transformer.layers.6.attention.v_proj", "model.vision_tower.transformer.layers.6.attention.q_proj", "model.vision_tower.transformer.layers.6.attention.o_proj", "model.vision_tower.transformer.layers.7.feed_forward.gate_proj", "model.vision_tower.transformer.layers.7.feed_forward.up_proj", "model.vision_tower.transformer.layers.7.feed_forward.down_proj", "model.vision_tower.transformer.layers.7.attention.k_proj", "model.vision_tower.transformer.layers.7.attention.v_proj", "model.vision_tower.transformer.layers.7.attention.q_proj", "model.vision_tower.transformer.layers.7.attention.o_proj", "model.vision_tower.transformer.layers.8.feed_forward.gate_proj", "model.vision_tower.transformer.layers.8.feed_forward.up_proj", "model.vision_tower.transformer.layers.8.feed_forward.down_proj", "model.vision_tower.transformer.layers.8.attention.k_proj", "model.vision_tower.transformer.layers.8.attention.v_proj", "model.vision_tower.transformer.layers.8.attention.q_proj", "model.vision_tower.transformer.layers.8.attention.o_proj", "model.vision_tower.transformer.layers.9.feed_forward.gate_proj", "model.vision_tower.transformer.layers.9.feed_forward.up_proj", "model.vision_tower.transformer.layers.9.feed_forward.down_proj", "model.vision_tower.transformer.layers.9.attention.k_proj", "model.vision_tower.transformer.layers.9.attention.v_proj", "model.vision_tower.transformer.layers.9.attention.q_proj", "model.vision_tower.transformer.layers.9.attention.o_proj", "model.vision_tower.transformer.layers.10.feed_forward.gate_proj", "model.vision_tower.transformer.layers.10.feed_forward.up_proj", "model.vision_tower.transformer.layers.10.feed_forward.down_proj", "model.vision_tower.transformer.layers.10.attention.k_proj", "model.vision_tower.transformer.layers.10.attention.v_proj", "model.vision_tower.transformer.layers.10.attention.q_proj", "model.vision_tower.transformer.layers.10.attention.o_proj", "model.vision_tower.transformer.layers.11.feed_forward.gate_proj", "model.vision_tower.transformer.layers.11.feed_forward.up_proj", "model.vision_tower.transformer.layers.11.feed_forward.down_proj", "model.vision_tower.transformer.layers.11.attention.k_proj", "model.vision_tower.transformer.layers.11.attention.v_proj", "model.vision_tower.transformer.layers.11.attention.q_proj", "model.vision_tower.transformer.layers.11.attention.o_proj", "model.vision_tower.transformer.layers.12.feed_forward.gate_proj", "model.vision_tower.transformer.layers.12.feed_forward.up_proj", "model.vision_tower.transformer.layers.12.feed_forward.down_proj", "model.vision_tower.transformer.layers.12.attention.k_proj", "model.vision_tower.transformer.layers.12.attention.v_proj", "model.vision_tower.transformer.layers.12.attention.q_proj", "model.vision_tower.transformer.layers.12.attention.o_proj", "model.vision_tower.transformer.layers.13.feed_forward.gate_proj", "model.vision_tower.transformer.layers.13.feed_forward.up_proj", "model.vision_tower.transformer.layers.13.feed_forward.down_proj", "model.vision_tower.transformer.layers.13.attention.k_proj", "model.vision_tower.transformer.layers.13.attention.v_proj", "model.vision_tower.transformer.layers.13.attention.q_proj", "model.vision_tower.transformer.layers.13.attention.o_proj", "model.vision_tower.transformer.layers.14.feed_forward.gate_proj", "model.vision_tower.transformer.layers.14.feed_forward.up_proj", "model.vision_tower.transformer.layers.14.feed_forward.down_proj", "model.vision_tower.transformer.layers.14.attention.k_proj", "model.vision_tower.transformer.layers.14.attention.v_proj", "model.vision_tower.transformer.layers.14.attention.q_proj", "model.vision_tower.transformer.layers.14.attention.o_proj", "model.vision_tower.transformer.layers.15.feed_forward.gate_proj", "model.vision_tower.transformer.layers.15.feed_forward.up_proj", "model.vision_tower.transformer.layers.15.feed_forward.down_proj", "model.vision_tower.transformer.layers.15.attention.k_proj", "model.vision_tower.transformer.layers.15.attention.v_proj", "model.vision_tower.transformer.layers.15.attention.q_proj", "model.vision_tower.transformer.layers.15.attention.o_proj", "model.vision_tower.transformer.layers.16.feed_forward.gate_proj", "model.vision_tower.transformer.layers.16.feed_forward.up_proj", "model.vision_tower.transformer.layers.16.feed_forward.down_proj", "model.vision_tower.transformer.layers.16.attention.k_proj", "model.vision_tower.transformer.layers.16.attention.v_proj", "model.vision_tower.transformer.layers.16.attention.q_proj", "model.vision_tower.transformer.layers.16.attention.o_proj", "model.vision_tower.transformer.layers.17.feed_forward.gate_proj", "model.vision_tower.transformer.layers.17.feed_forward.up_proj", "model.vision_tower.transformer.layers.17.feed_forward.down_proj", "model.vision_tower.transformer.layers.17.attention.k_proj", "model.vision_tower.transformer.layers.17.attention.v_proj", "model.vision_tower.transformer.layers.17.attention.q_proj", "model.vision_tower.transformer.layers.17.attention.o_proj", "model.vision_tower.transformer.layers.18.feed_forward.gate_proj", "model.vision_tower.transformer.layers.18.feed_forward.up_proj", "model.vision_tower.transformer.layers.18.feed_forward.down_proj", "model.vision_tower.transformer.layers.18.attention.k_proj", "model.vision_tower.transformer.layers.18.attention.v_proj", "model.vision_tower.transformer.layers.18.attention.q_proj", "model.vision_tower.transformer.layers.18.attention.o_proj", "model.vision_tower.transformer.layers.19.feed_forward.gate_proj", "model.vision_tower.transformer.layers.19.feed_forward.up_proj", "model.vision_tower.transformer.layers.19.feed_forward.down_proj", "model.vision_tower.transformer.layers.19.attention.k_proj", "model.vision_tower.transformer.layers.19.attention.v_proj", "model.vision_tower.transformer.layers.19.attention.q_proj", "model.vision_tower.transformer.layers.19.attention.o_proj", "model.vision_tower.transformer.layers.20.feed_forward.gate_proj", "model.vision_tower.transformer.layers.20.feed_forward.up_proj", "model.vision_tower.transformer.layers.20.feed_forward.down_proj", "model.vision_tower.transformer.layers.20.attention.k_proj", "model.vision_tower.transformer.layers.20.attention.v_proj", "model.vision_tower.transformer.layers.20.attention.q_proj", "model.vision_tower.transformer.layers.20.attention.o_proj", "model.vision_tower.transformer.layers.21.feed_forward.gate_proj", "model.vision_tower.transformer.layers.21.feed_forward.up_proj", "model.vision_tower.transformer.layers.21.feed_forward.down_proj", "model.vision_tower.transformer.layers.21.attention.k_proj", "model.vision_tower.transformer.layers.21.attention.v_proj", "model.vision_tower.transformer.layers.21.attention.q_proj", "model.vision_tower.transformer.layers.21.attention.o_proj", "model.vision_tower.transformer.layers.22.feed_forward.gate_proj", "model.vision_tower.transformer.layers.22.feed_forward.up_proj", "model.vision_tower.transformer.layers.22.feed_forward.down_proj", "model.vision_tower.transformer.layers.22.attention.k_proj", "model.vision_tower.transformer.layers.22.attention.v_proj", "model.vision_tower.transformer.layers.22.attention.q_proj", "model.vision_tower.transformer.layers.22.attention.o_proj", "model.vision_tower.transformer.layers.23.feed_forward.gate_proj", "model.vision_tower.transformer.layers.23.feed_forward.up_proj", "model.vision_tower.transformer.layers.23.feed_forward.down_proj", "model.vision_tower.transformer.layers.23.attention.k_proj", "model.vision_tower.transformer.layers.23.attention.v_proj", "model.vision_tower.transformer.layers.23.attention.q_proj", "model.vision_tower.transformer.layers.23.attention.o_proj", "model.multi_modal_projector.patch_merger.merging_layer", "model.multi_modal_projector.linear_1", "model.multi_modal_projector.linear_2", "lm_head" ], "sparsity_config": {} } }