{ "model_name": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", "model_config": { "transformers_version": "5.5.3", "architectures": [ "SmolVLMForConditionalGeneration" ], "output_hidden_states": false, "return_dict": true, "dtype": "float16", "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "use_cache": false, "image_token_id": 49190, "tie_word_embeddings": false, "vision_config": { "architectures": null, "output_hidden_states": false, "return_dict": true, "dtype": "float16", "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "hidden_size": 1152, "intermediate_size": 4304, "num_hidden_layers": 27, "num_attention_heads": 16, "num_channels": 3, "image_size": 384, "patch_size": 14, "hidden_act": "gelu_pytorch_tanh", "layer_norm_eps": 1e-06, "attention_dropout": 0.0, "initializer_range": 0.02, "_name_or_path": "", "max_image_size": { "longest_edge": 384 }, "model_type": "smolvlm_vision", "size": { "longest_edge": 1920 }, "tie_word_embeddings": false, "use_base_siglip": false, "output_attentions": false }, "text_config": { "architectures": [ "VLlama3ForCausalLM" ], "output_hidden_states": false, "return_dict": true, "dtype": "float16", "chunk_size_feed_forward": 0, "is_encoder_decoder": false, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "problem_type": null, "vocab_size": 49280, "hidden_size": 2048, "intermediate_size": 8192, "num_hidden_layers": 24, "num_attention_heads": 32, "num_key_value_heads": 32, "hidden_act": "silu", "max_position_embeddings": 8192, "initializer_range": 0.02, "rms_norm_eps": 1e-05, "use_cache": true, "pad_token_id": 2, "bos_token_id": 1, "eos_token_id": 2, "pretraining_tp": 1, "tie_word_embeddings": false, "rope_parameters": { "rope_theta": 130000, "rope_type": "default" }, "attention_bias": false, "attention_dropout": 0.0, "mlp_bias": false, "head_dim": 64, "_name_or_path": "None", "_flash_attn_2_enabled": true, "model_type": "llama", "neftune_noise_alpha": 0.0, "perceiver_config": { "_attn_implementation_autoset": false, "_name_or_path": "", "add_cross_attention": false, "architectures": null, "attention_dropout": 0.0, "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": null, "chunk_size_feed_forward": 0, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "diversity_penalty": 0.0, "do_sample": false, "early_stopping": false, "encoder_no_repeat_ngram_size": 0, "eos_token_id": null, "exponential_decay_length_penalty": null, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, "hidden_act": "silu", "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "length_penalty": 1.0, "max_length": 20, "min_length": 0, "model_type": "vllama3", "no_repeat_ngram_size": 0, "num_beam_groups": 1, "num_beams": 1, "num_key_value_heads": 1, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_scores": false, "pad_token_id": null, "prefix": null, "problem_type": null, "pruned_heads": {}, "qk_layer_norms_perceiver": false, "remove_invalid_values": false, "repetition_penalty": 1.0, "resampler_depth": 6, "resampler_head_dim": 96, "resampler_n_heads": 16, "resampler_n_latents": 64, "return_dict": true, "return_dict_in_generate": false, "sep_token_id": null, "suppress_tokens": null, "task_specific_params": null, "temperature": 1.0, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "top_k": 50, "top_p": 1.0, "torch_dtype": null, "torchscript": false, "transformers_version": "4.46.0", "typical_p": 1.0, "use_bfloat16": false }, "pixel_shuffle_factor": 3, "qk_layer_norms": false, "transformers.js_config": { "kv_cache_dtype": { "fp16": "float16", "q4f16": "float16" } }, "use_resampler": false, "output_attentions": false }, "scale_factor": 3, "pad_token_id": 128002, "_name_or_path": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", "model_type": "smolvlm", "use_reentrant_checkpointing": false, "vocab_size": 49280, "output_attentions": false }, "quant_config": { "components": [ "text_backbone", "multimodal_connector" ], "scheme": "tritplane3", "group_size": 32, "n_iter": 10, "salient_fraction": 0.0, "rescue_fraction": 0.0, "n_planes": 3, "allow_all_linear": false, "target_module_names": [ "Wqkv", "att_proj", "attn.proj", "attn.qkv", "c_attn", "c_fc", "c_proj", "dense", "dense_4h_to_h", "dense_h_to_4h", "down_proj", "fc1", "fc2", "ff_proj", "gate_proj", "gate_up_proj", "k", "k_proj", "linear", "o", "o_proj", "out_proj", "per_layer_input_gate", "per_layer_projection", "proj", "q", "q_proj", "qkv", "qkv_proj", "query_key_value", "up_proj", "v", "v_proj", "w1", "w2", "w3", "wi", "wi_0", "wi_1", "wo" ], "max_length": 160, "calibration_batch_size": 2, "calibration_prompts": null, "vlm_use_demo_image": true }, "plan": {}, "layer_info": { "model.connector.modality_projection.proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 10368 ], "dtype": "torch.float16", "num_elements": 21233664, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 23887872, "effective_bits": 9.0 }, "model.text_model.layers.0.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.0.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.0.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.0.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.0.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.0.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.0.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.1.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.1.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.1.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.1.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.1.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.1.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.1.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.2.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.2.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.2.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.2.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.2.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.2.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.2.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.3.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.3.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.3.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.3.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.3.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.3.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.3.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.4.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.4.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.4.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.4.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.4.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.4.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.4.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.5.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.5.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.5.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.5.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.5.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.5.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.5.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.6.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.6.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.6.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.6.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.6.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.6.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.6.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.7.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.7.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.7.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.7.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.7.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.7.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.7.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.8.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.8.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.8.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.8.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.8.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.8.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.8.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.9.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.9.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.9.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.9.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.9.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.9.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.9.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.10.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.10.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.10.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.10.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.10.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.10.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.10.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.11.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.11.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.11.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.11.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.11.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.11.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.11.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.12.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.12.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.12.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.12.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.12.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.12.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.12.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.13.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.13.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.13.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.13.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.13.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.13.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.13.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.14.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.14.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.14.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.14.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.14.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.14.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.14.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.15.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.15.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.15.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.15.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.15.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.15.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.15.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.16.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.16.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.16.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.16.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.16.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.16.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.16.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.17.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.17.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.17.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.17.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.17.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.17.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.17.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.18.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.18.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.18.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.18.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.18.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.18.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.18.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.19.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.19.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.19.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.19.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.19.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.19.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.19.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.20.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.20.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.20.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.20.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.20.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.20.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.20.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.21.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.21.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.21.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.21.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.21.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.21.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.21.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.22.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.22.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.22.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.22.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.22.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.22.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.22.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.23.self_attn.q_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.23.self_attn.k_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.23.self_attn.v_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.23.self_attn.o_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 2048 ], "dtype": "torch.float16", "num_elements": 4194304, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 4718592, "effective_bits": 9.0 }, "model.text_model.layers.23.mlp.gate_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.23.mlp.up_proj": { "scheme": "tritplane_small_v1", "shape": [ 8192, 2048 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 }, "model.text_model.layers.23.mlp.down_proj": { "scheme": "tritplane_small_v1", "shape": [ 2048, 8192 ], "dtype": "torch.float16", "num_elements": 16777216, "n_planes": 3, "group_sizes": [ 32, 32, 32 ], "rescued_rows": 0, "stored_bytes": 18874368, "effective_bits": 9.0 } }, "stats": { "model.connector.modality_projection.proj": { "mse": 3.4296051580895437e-06, "rmse": 0.0018519193173811713, "relative_error": 0.11355021084933176, "max_error": 0.05395698547363281, "sparsity": 0.39136114716529846, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.0.self_attn.q_proj": { "mse": 0.0006577471503987908, "rmse": 0.025646581651338856, "relative_error": 0.16423382595057714, "max_error": 2.508922576904297, "sparsity": 0.3742976983388265, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.0.self_attn.k_proj": { "mse": 0.0006063183536753058, "rmse": 0.024623532518209198, "relative_error": 0.1626188869171995, "max_error": 2.065166473388672, "sparsity": 0.37669801712036133, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.0.self_attn.v_proj": { "mse": 5.711389530915767e-05, "rmse": 0.007557373572158364, "relative_error": 0.20627513709425288, "max_error": 0.27435970306396484, "sparsity": 0.37764477729797363, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.0.self_attn.o_proj": { "mse": 1.2771037290804088e-05, "rmse": 0.003573658810071841, "relative_error": 0.12916616624639116, "max_error": 0.5240488052368164, "sparsity": 0.3812715212504069, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.0.mlp.gate_proj": { "mse": 0.00010874148574657738, "rmse": 0.010427918572110994, "relative_error": 0.11862943092036356, "max_error": 0.5052490234375, "sparsity": 0.3853779236475627, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.0.mlp.up_proj": { "mse": 0.00010343005124013871, "rmse": 0.010170056599652665, "relative_error": 0.1189827787892244, "max_error": 0.33917236328125, "sparsity": 0.385589599609375, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.0.mlp.down_proj": { "mse": 0.0001387051015626639, "rmse": 0.011777313002661681, "relative_error": 0.13502964609806262, "max_error": 0.6370925903320312, "sparsity": 0.38009379307429, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.1.self_attn.q_proj": { "mse": 0.00026919360971078277, "rmse": 0.016407120701414455, "relative_error": 0.13839612971922743, "max_error": 0.726893424987793, "sparsity": 0.3849904537200928, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.1.self_attn.k_proj": { "mse": 0.0002586275804787874, "rmse": 0.016081902265552648, "relative_error": 0.13440342116408724, "max_error": 0.5205841064453125, "sparsity": 0.3860335350036621, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.1.self_attn.v_proj": { "mse": 8.325559610966593e-05, "rmse": 0.009124450455214601, "relative_error": 0.14485086837839517, "max_error": 0.3021749258041382, "sparsity": 0.38692625363667804, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.1.self_attn.o_proj": { "mse": 7.805755012668669e-05, "rmse": 0.008835018399906516, "relative_error": 0.14746445782389664, "max_error": 1.331390380859375, "sparsity": 0.3764190673828125, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.1.mlp.gate_proj": { "mse": 0.00010081167420139536, "rmse": 0.0100405016907222, "relative_error": 0.11212122738948778, "max_error": 0.9822454452514648, "sparsity": 0.3886687954266866, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.1.mlp.up_proj": { "mse": 8.722111670067534e-05, "rmse": 0.00933922463059302, "relative_error": 0.11204773790921406, "max_error": 0.4900360107421875, "sparsity": 0.3890010515848796, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.1.mlp.down_proj": { "mse": 0.000125523962196894, "rmse": 0.011203747685345917, "relative_error": 0.13279524768918374, "max_error": 0.6065788269042969, "sparsity": 0.38065383831659955, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.2.self_attn.q_proj": { "mse": 0.00016419387247879058, "rmse": 0.012813815687717323, "relative_error": 0.1146054007851603, "max_error": 0.40304821729660034, "sparsity": 0.39175184567769367, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.2.self_attn.k_proj": { "mse": 0.00016570747538935393, "rmse": 0.012872741564614506, "relative_error": 0.11461909224693181, "max_error": 0.3516998291015625, "sparsity": 0.3909467856089274, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.2.self_attn.v_proj": { "mse": 6.916217535035685e-05, "rmse": 0.00831637994264072, "relative_error": 0.1159744597091741, "max_error": 0.25598716735839844, "sparsity": 0.3940271536509196, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.2.self_attn.o_proj": { "mse": 8.277458982774988e-05, "rmse": 0.00909805417810588, "relative_error": 0.1345205354400204, "max_error": 0.5423431396484375, "sparsity": 0.3798822561899821, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.2.mlp.gate_proj": { "mse": 9.692814637674019e-05, "rmse": 0.009845209310966435, "relative_error": 0.11052952542386552, "max_error": 0.6398468017578125, "sparsity": 0.3880802392959595, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.2.mlp.up_proj": { "mse": 8.710896509001032e-05, "rmse": 0.009333218367209154, "relative_error": 0.11117947991341522, "max_error": 0.40167236328125, "sparsity": 0.3885002136230469, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.2.mlp.down_proj": { "mse": 0.00012658716877922416, "rmse": 0.011251096336767548, "relative_error": 0.1325751347848122, "max_error": 0.8208770751953125, "sparsity": 0.3796415527661641, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.3.self_attn.q_proj": { "mse": 0.0001530556910438463, "rmse": 0.012371567849058029, "relative_error": 0.11541190388398646, "max_error": 0.4719809889793396, "sparsity": 0.39139819145202637, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.3.self_attn.k_proj": { "mse": 0.00015896638797130436, "rmse": 0.012608187338840756, "relative_error": 0.11515529751944896, "max_error": 0.39187145233154297, "sparsity": 0.39035431543986004, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.3.self_attn.v_proj": { "mse": 7.291566726053134e-05, "rmse": 0.008539067118867923, "relative_error": 0.11644146996502198, "max_error": 0.2207043170928955, "sparsity": 0.3927210172017415, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.3.self_attn.o_proj": { "mse": 7.748796633677557e-05, "rmse": 0.008802724938152706, "relative_error": 0.1279519505773518, "max_error": 0.43854331970214844, "sparsity": 0.38050039609273273, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.3.mlp.gate_proj": { "mse": 9.516256977804005e-05, "rmse": 0.009755130433676427, "relative_error": 0.11032415499084043, "max_error": 0.597137451171875, "sparsity": 0.38887778917948407, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.3.mlp.up_proj": { "mse": 9.12658724701032e-05, "rmse": 0.009553317354202319, "relative_error": 0.11089436404123741, "max_error": 0.34808349609375, "sparsity": 0.389626423517863, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.3.mlp.down_proj": { "mse": 0.00013019863399676979, "rmse": 0.011410461603141644, "relative_error": 0.13189586812749926, "max_error": 0.5650882720947266, "sparsity": 0.3811826705932617, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.4.self_attn.q_proj": { "mse": 0.00015413184883072972, "rmse": 0.012414984850201378, "relative_error": 0.11780370464069141, "max_error": 0.37637901306152344, "sparsity": 0.39243324597676593, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.4.self_attn.k_proj": { "mse": 0.00015679103671573102, "rmse": 0.012521622766867361, "relative_error": 0.11770231723548095, "max_error": 0.3959388732910156, "sparsity": 0.3918443520863851, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.4.self_attn.v_proj": { "mse": 7.459029438905418e-05, "rmse": 0.008636567280410325, "relative_error": 0.11947416432783187, "max_error": 0.23118281364440918, "sparsity": 0.3938344319661458, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.4.self_attn.o_proj": { "mse": 7.937617920106277e-05, "rmse": 0.008909331018716432, "relative_error": 0.13073696797025244, "max_error": 0.6722698211669922, "sparsity": 0.3798239231109619, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.4.mlp.gate_proj": { "mse": 9.190893615595996e-05, "rmse": 0.009586914840341493, "relative_error": 0.10849688521505156, "max_error": 0.5092010498046875, "sparsity": 0.3894091447194417, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.4.mlp.up_proj": { "mse": 9.020772995427251e-05, "rmse": 0.009497775000192018, "relative_error": 0.10945214365573647, "max_error": 0.4986724853515625, "sparsity": 0.3900378147761027, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.4.mlp.down_proj": { "mse": 0.00012248774874024093, "rmse": 0.011067418341250182, "relative_error": 0.12746774428731608, "max_error": 0.5980072021484375, "sparsity": 0.38230576117833454, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.5.self_attn.q_proj": { "mse": 0.00015130281099118292, "rmse": 0.012300520760975242, "relative_error": 0.11469356438408594, "max_error": 0.3299667239189148, "sparsity": 0.3918568293253581, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.5.self_attn.k_proj": { "mse": 0.00015101679309736937, "rmse": 0.012288889009888948, "relative_error": 0.11402556294332304, "max_error": 0.3680969476699829, "sparsity": 0.3910319010416667, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.5.self_attn.v_proj": { "mse": 7.671088678762317e-05, "rmse": 0.008758475140549477, "relative_error": 0.11636823438420277, "max_error": 0.2230091094970703, "sparsity": 0.3936762809753418, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.5.self_attn.o_proj": { "mse": 9.098563896259293e-05, "rmse": 0.009538639261581964, "relative_error": 0.13238292097211185, "max_error": 0.416259765625, "sparsity": 0.3792380491892497, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.5.mlp.gate_proj": { "mse": 9.076636342797428e-05, "rmse": 0.009527138260147918, "relative_error": 0.10842027764411528, "max_error": 0.697662353515625, "sparsity": 0.388599971930186, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.5.mlp.up_proj": { "mse": 9.33339397306554e-05, "rmse": 0.009660949214785025, "relative_error": 0.1099180335797476, "max_error": 0.32053184509277344, "sparsity": 0.3889654080073039, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.5.mlp.down_proj": { "mse": 0.00011332923168083653, "rmse": 0.010645620305122503, "relative_error": 0.12130861115388147, "max_error": 0.5944061279296875, "sparsity": 0.3844008247057597, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.6.self_attn.q_proj": { "mse": 0.00014138600090518594, "rmse": 0.011890584548506685, "relative_error": 0.1137895158538832, "max_error": 0.31124114990234375, "sparsity": 0.39211424191792804, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.6.self_attn.k_proj": { "mse": 0.00014205885236151516, "rmse": 0.011918844422238053, "relative_error": 0.11346255810575416, "max_error": 0.3076510429382324, "sparsity": 0.39174707730611164, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.6.self_attn.v_proj": { "mse": 7.575999188702554e-05, "rmse": 0.00870402159274812, "relative_error": 0.1160057875043741, "max_error": 0.2431955337524414, "sparsity": 0.3937256336212158, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.6.self_attn.o_proj": { "mse": 8.068876923061907e-05, "rmse": 0.008982692760560114, "relative_error": 0.12594970736566127, "max_error": 0.5454425811767578, "sparsity": 0.3837650616963704, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.6.mlp.gate_proj": { "mse": 8.998379053082317e-05, "rmse": 0.009485978627997386, "relative_error": 0.10835457020640558, "max_error": 0.5523300170898438, "sparsity": 0.38953636089960736, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.6.mlp.up_proj": { "mse": 9.37409422476776e-05, "rmse": 0.009681990613901544, "relative_error": 0.10956738752297024, "max_error": 0.297149658203125, "sparsity": 0.3902047872543335, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.6.mlp.down_proj": { "mse": 0.00011795001773862168, "rmse": 0.010860479627466812, "relative_error": 0.12343403918579368, "max_error": 0.44796276092529297, "sparsity": 0.3843594789505005, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.7.self_attn.q_proj": { "mse": 0.00015688124403823167, "rmse": 0.012525224310894862, "relative_error": 0.12189713132098018, "max_error": 0.4827537536621094, "sparsity": 0.3912874062856038, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.7.self_attn.k_proj": { "mse": 0.00015296241326723248, "rmse": 0.01236779742990774, "relative_error": 0.1199153668104569, "max_error": 0.54345703125, "sparsity": 0.39050086339314777, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.7.self_attn.v_proj": { "mse": 8.948285540100187e-05, "rmse": 0.009459537800601141, "relative_error": 0.12402144103539234, "max_error": 0.25670433044433594, "sparsity": 0.3926668167114258, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.7.self_attn.o_proj": { "mse": 8.854311454342678e-05, "rmse": 0.009409735094221664, "relative_error": 0.13105534388713513, "max_error": 0.7096405029296875, "sparsity": 0.3785405158996582, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.7.mlp.gate_proj": { "mse": 8.576437539886683e-05, "rmse": 0.009260905754777273, "relative_error": 0.10707372869116029, "max_error": 0.48479461669921875, "sparsity": 0.3907337586085002, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.7.mlp.up_proj": { "mse": 9.252261952497065e-05, "rmse": 0.009618867892063527, "relative_error": 0.10801395651849023, "max_error": 0.375885009765625, "sparsity": 0.3915167252222697, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.7.mlp.down_proj": { "mse": 0.0001230652560479939, "rmse": 0.01109347808615467, "relative_error": 0.12529995595955354, "max_error": 0.7331366539001465, "sparsity": 0.3860664963722229, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.8.self_attn.q_proj": { "mse": 0.00012525853526312858, "rmse": 0.011191895963737717, "relative_error": 0.11461234808407195, "max_error": 0.35657501220703125, "sparsity": 0.39265839258829754, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.8.self_attn.k_proj": { "mse": 0.00011599854769883677, "rmse": 0.01077026219266907, "relative_error": 0.11264466468077658, "max_error": 0.35434722900390625, "sparsity": 0.39147520065307617, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.8.self_attn.v_proj": { "mse": 9.594231960363686e-05, "rmse": 0.009795015038458943, "relative_error": 0.11839808436837174, "max_error": 0.37999963760375977, "sparsity": 0.39471809069315594, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.8.self_attn.o_proj": { "mse": 0.0001107770367525518, "rmse": 0.010525067066415861, "relative_error": 0.13228799125690793, "max_error": 0.5034599304199219, "sparsity": 0.37880786259969074, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.8.mlp.gate_proj": { "mse": 8.402258390560746e-05, "rmse": 0.009166383360170328, "relative_error": 0.10532979753276703, "max_error": 0.52838134765625, "sparsity": 0.3913570245107015, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.8.mlp.up_proj": { "mse": 8.96376877790317e-05, "rmse": 0.009467718192839904, "relative_error": 0.1067531770222323, "max_error": 0.7634124755859375, "sparsity": 0.3922878901163737, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.8.mlp.down_proj": { "mse": 0.00010968392598442733, "rmse": 0.01047300940438933, "relative_error": 0.11969575410263461, "max_error": 0.3441123962402344, "sparsity": 0.3869070808092753, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.9.self_attn.q_proj": { "mse": 0.00013906371896155179, "rmse": 0.011792528098823923, "relative_error": 0.11619249670618485, "max_error": 0.5095748901367188, "sparsity": 0.3935159047444661, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.9.self_attn.k_proj": { "mse": 0.00013474057777784765, "rmse": 0.011607780915310542, "relative_error": 0.11522755122799154, "max_error": 0.6333770751953125, "sparsity": 0.39248037338256836, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.9.self_attn.v_proj": { "mse": 9.845116437645629e-05, "rmse": 0.009922256012442749, "relative_error": 0.11942510072251349, "max_error": 0.4576568603515625, "sparsity": 0.3951539993286133, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.9.self_attn.o_proj": { "mse": 0.00011052452464355156, "rmse": 0.0105130644744314, "relative_error": 0.13353254287792263, "max_error": 0.45235252380371094, "sparsity": 0.3810691038767497, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.9.mlp.gate_proj": { "mse": 8.366839028894901e-05, "rmse": 0.009147042707287913, "relative_error": 0.10679062559175231, "max_error": 0.4619140625, "sparsity": 0.39163299401601154, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.9.mlp.up_proj": { "mse": 9.668088023317978e-05, "rmse": 0.009832643603486286, "relative_error": 0.10821890520679862, "max_error": 0.38425350189208984, "sparsity": 0.39284054438273114, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.9.mlp.down_proj": { "mse": 0.00013003291678614914, "rmse": 0.011403197656190528, "relative_error": 0.12745194232982762, "max_error": 0.566009521484375, "sparsity": 0.3845730423927307, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.10.self_attn.q_proj": { "mse": 0.00012565116048790514, "rmse": 0.011209422843657257, "relative_error": 0.11854237750544573, "max_error": 0.4283638000488281, "sparsity": 0.3949253559112549, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.10.self_attn.k_proj": { "mse": 0.00012736594362650067, "rmse": 0.011285652113480225, "relative_error": 0.11790091080028516, "max_error": 0.4173877239227295, "sparsity": 0.3928183714548747, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.10.self_attn.v_proj": { "mse": 9.187028626911342e-05, "rmse": 0.00958489886587821, "relative_error": 0.12239239741322284, "max_error": 0.30411529541015625, "sparsity": 0.39554985364278156, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.10.self_attn.o_proj": { "mse": 0.00012515991693362594, "rmse": 0.011187489304291018, "relative_error": 0.15035625413716164, "max_error": 0.4181327819824219, "sparsity": 0.3772312005360921, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.10.mlp.gate_proj": { "mse": 8.393089228775352e-05, "rmse": 0.009161380479368462, "relative_error": 0.10709110477309372, "max_error": 0.6112289428710938, "sparsity": 0.39222437143325806, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.10.mlp.up_proj": { "mse": 9.599251643521711e-05, "rmse": 0.00979757706962375, "relative_error": 0.10849474513968223, "max_error": 0.5392261743545532, "sparsity": 0.39365750551223755, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.10.mlp.down_proj": { "mse": 0.00013664091238752007, "rmse": 0.011689350383469565, "relative_error": 0.13170919919705393, "max_error": 0.5762062072753906, "sparsity": 0.384915828704834, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.11.self_attn.q_proj": { "mse": 0.00013748332276009023, "rmse": 0.011725328258095389, "relative_error": 0.11656835122202536, "max_error": 1.0129890441894531, "sparsity": 0.39558879534403485, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.11.self_attn.k_proj": { "mse": 0.00012948595394846052, "rmse": 0.01137918951193188, "relative_error": 0.11476094516158379, "max_error": 0.6671142578125, "sparsity": 0.3946519692738851, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.11.self_attn.v_proj": { "mse": 8.885016723070294e-05, "rmse": 0.009426036666102193, "relative_error": 0.1168312712891464, "max_error": 0.3714599609375, "sparsity": 0.3978262742360433, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.11.self_attn.o_proj": { "mse": 0.00010273691441398114, "rmse": 0.010135921981447032, "relative_error": 0.13194819940886102, "max_error": 0.6239433288574219, "sparsity": 0.3784929911295573, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.11.mlp.gate_proj": { "mse": 8.606556366430596e-05, "rmse": 0.009277152777889666, "relative_error": 0.10805389747567434, "max_error": 0.801422119140625, "sparsity": 0.3931015928586324, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.11.mlp.up_proj": { "mse": 9.808417235035449e-05, "rmse": 0.00990374536982623, "relative_error": 0.10907260789107497, "max_error": 0.476348876953125, "sparsity": 0.3945031960805257, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.11.mlp.down_proj": { "mse": 0.00013493798905983567, "rmse": 0.011616281206127703, "relative_error": 0.13058919739846145, "max_error": 0.620574951171875, "sparsity": 0.38569573561350506, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.12.self_attn.q_proj": { "mse": 0.00011779519991250709, "rmse": 0.010853349709306665, "relative_error": 0.11593947411175125, "max_error": 0.8235530853271484, "sparsity": 0.39581966400146484, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.12.self_attn.k_proj": { "mse": 0.0001068732890416868, "rmse": 0.010337953813095066, "relative_error": 0.11340367751333466, "max_error": 0.39037322998046875, "sparsity": 0.394009272257487, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.12.self_attn.v_proj": { "mse": 9.546923683956265e-05, "rmse": 0.009770836035855001, "relative_error": 0.1183919527235388, "max_error": 0.3798694610595703, "sparsity": 0.3984665870666504, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.12.self_attn.o_proj": { "mse": 0.00010976817429764196, "rmse": 0.010477030795871603, "relative_error": 0.13323159144961502, "max_error": 0.4802589416503906, "sparsity": 0.37735788027445477, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.12.mlp.gate_proj": { "mse": 8.522283314960077e-05, "rmse": 0.009231621371655186, "relative_error": 0.10777059058583263, "max_error": 0.591461181640625, "sparsity": 0.39298417170842487, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.12.mlp.up_proj": { "mse": 9.667361155152321e-05, "rmse": 0.009832273976630392, "relative_error": 0.10836841175849397, "max_error": 0.5146751403808594, "sparsity": 0.39399949709574383, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.12.mlp.down_proj": { "mse": 0.00013385784404817969, "rmse": 0.011569695071529745, "relative_error": 0.13050353261534786, "max_error": 0.5663547515869141, "sparsity": 0.3858479857444763, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.13.self_attn.q_proj": { "mse": 0.00012699411308858544, "rmse": 0.011269166477099602, "relative_error": 0.1169069941607757, "max_error": 0.6077880859375, "sparsity": 0.3962131341298421, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.13.self_attn.k_proj": { "mse": 0.00011883420665981248, "rmse": 0.010901110340686056, "relative_error": 0.1141555243804878, "max_error": 0.38907623291015625, "sparsity": 0.39407960573832196, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.13.self_attn.v_proj": { "mse": 9.517266880720854e-05, "rmse": 0.009755648046501501, "relative_error": 0.11651403462037346, "max_error": 0.312164306640625, "sparsity": 0.39721115430196124, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.13.self_attn.o_proj": { "mse": 0.00011390489817131311, "rmse": 0.01067262377165583, "relative_error": 0.1345652980895498, "max_error": 0.4114570617675781, "sparsity": 0.3775333563486735, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.13.mlp.gate_proj": { "mse": 8.87833593878895e-05, "rmse": 0.009422492206836231, "relative_error": 0.11013698642560844, "max_error": 1.0255584716796875, "sparsity": 0.39292343457539874, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.13.mlp.up_proj": { "mse": 0.00010229789768345654, "rmse": 0.010114242318802558, "relative_error": 0.1105362572094837, "max_error": 0.4052734375, "sparsity": 0.3939278523127238, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.13.mlp.down_proj": { "mse": 0.0001434191653970629, "rmse": 0.011975774104293338, "relative_error": 0.13397899523618045, "max_error": 0.7285346984863281, "sparsity": 0.3844974438349406, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.14.self_attn.q_proj": { "mse": 0.00012377048551570624, "rmse": 0.011125218447999402, "relative_error": 0.1145393756245998, "max_error": 0.37328338623046875, "sparsity": 0.3949705759684245, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.14.self_attn.k_proj": { "mse": 0.00012177479220554233, "rmse": 0.01103516163024096, "relative_error": 0.11414312930004775, "max_error": 0.39933013916015625, "sparsity": 0.3937130769093831, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.14.self_attn.v_proj": { "mse": 0.00010253102664137259, "rmse": 0.010125760546318118, "relative_error": 0.11949763612113624, "max_error": 0.45636940002441406, "sparsity": 0.3970218499501546, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.14.self_attn.o_proj": { "mse": 0.00011449969315435737, "rmse": 0.010700452941551464, "relative_error": 0.13257149022193387, "max_error": 1.1525955200195312, "sparsity": 0.37923351923624676, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.14.mlp.gate_proj": { "mse": 9.163775393972173e-05, "rmse": 0.009572761040563049, "relative_error": 0.11190023092408524, "max_error": 0.83123779296875, "sparsity": 0.3928397297859192, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.14.mlp.up_proj": { "mse": 0.00010870010737562552, "rmse": 0.01042593436463253, "relative_error": 0.11280875161412238, "max_error": 0.5446014404296875, "sparsity": 0.3936503529548645, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.14.mlp.down_proj": { "mse": 0.00016400107415392995, "rmse": 0.012806290413462047, "relative_error": 0.14206598069839568, "max_error": 0.7069358825683594, "sparsity": 0.3815520207087199, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.15.self_attn.q_proj": { "mse": 9.789341129362583e-05, "rmse": 0.009894109929327944, "relative_error": 0.11414555337074125, "max_error": 0.33676910400390625, "sparsity": 0.3949731985727946, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.15.self_attn.k_proj": { "mse": 8.986466855276376e-05, "rmse": 0.009479697703659319, "relative_error": 0.1125592821962688, "max_error": 0.3172111511230469, "sparsity": 0.39313093821207684, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.15.self_attn.v_proj": { "mse": 0.00011400174116715789, "rmse": 0.010677159789342758, "relative_error": 0.1173175552733063, "max_error": 0.3896446228027344, "sparsity": 0.3969427744547526, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.15.self_attn.o_proj": { "mse": 0.0001631157356314361, "rmse": 0.012771677087659087, "relative_error": 0.1473888285697514, "max_error": 1.012359619140625, "sparsity": 0.3775180180867513, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.15.mlp.gate_proj": { "mse": 9.187518298858777e-05, "rmse": 0.009585154301762062, "relative_error": 0.11252498530861223, "max_error": 0.5476951599121094, "sparsity": 0.39175093173980713, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.15.mlp.up_proj": { "mse": 0.00011229647498112172, "rmse": 0.010597003113197698, "relative_error": 0.11439855196424717, "max_error": 0.32222747802734375, "sparsity": 0.39260558287302655, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.15.mlp.down_proj": { "mse": 0.00017591984942555428, "rmse": 0.01326347802899203, "relative_error": 0.1462905444778315, "max_error": 0.5610237121582031, "sparsity": 0.37977610031763714, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.16.self_attn.q_proj": { "mse": 0.0001145480782724917, "rmse": 0.010702713593873831, "relative_error": 0.11975183055006922, "max_error": 0.4313373565673828, "sparsity": 0.39432207743326825, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.16.self_attn.k_proj": { "mse": 0.0001037448673741892, "rmse": 0.010185522439923697, "relative_error": 0.11635053860916868, "max_error": 0.4398765563964844, "sparsity": 0.392624298731486, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.16.self_attn.v_proj": { "mse": 0.00011915640061488375, "rmse": 0.010915878371202371, "relative_error": 0.1201440636287298, "max_error": 0.3507728576660156, "sparsity": 0.39522210756937665, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.16.self_attn.o_proj": { "mse": 0.00014365943206939846, "rmse": 0.01198580126939365, "relative_error": 0.13777784807752752, "max_error": 0.62738037109375, "sparsity": 0.37827134132385254, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.16.mlp.gate_proj": { "mse": 9.585679799783975e-05, "rmse": 0.009790648497308018, "relative_error": 0.11537675215561138, "max_error": 0.4750213623046875, "sparsity": 0.39047886927922565, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.16.mlp.up_proj": { "mse": 0.00011986482422798872, "rmse": 0.010948279509949895, "relative_error": 0.11754970772120121, "max_error": 0.3110523223876953, "sparsity": 0.39113156000773114, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.16.mlp.down_proj": { "mse": 0.00017797393957152963, "rmse": 0.013340687372528059, "relative_error": 0.14594216595449572, "max_error": 0.7946376800537109, "sparsity": 0.3795015613238017, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.17.self_attn.q_proj": { "mse": 0.00012077944120392203, "rmse": 0.010989970027435108, "relative_error": 0.12809202973827413, "max_error": 0.36618804931640625, "sparsity": 0.3912224769592285, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.17.self_attn.k_proj": { "mse": 0.00010802940232679248, "rmse": 0.010393719369253361, "relative_error": 0.12436472277530712, "max_error": 0.3511810302734375, "sparsity": 0.39031481742858887, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.17.self_attn.v_proj": { "mse": 0.00015826526214368641, "rmse": 0.01258035222653509, "relative_error": 0.12655687426742085, "max_error": 0.40193939208984375, "sparsity": 0.3940921624501546, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.17.self_attn.o_proj": { "mse": 0.00017517831292934716, "rmse": 0.013235494434638517, "relative_error": 0.13974743449718308, "max_error": 0.38881492614746094, "sparsity": 0.37921682993570965, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.17.mlp.gate_proj": { "mse": 9.643776138545945e-05, "rmse": 0.009820272979172191, "relative_error": 0.11469761926663953, "max_error": 0.49706268310546875, "sparsity": 0.38984374205271405, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.17.mlp.up_proj": { "mse": 0.00011767178511945531, "rmse": 0.010847662656971561, "relative_error": 0.1163379295193364, "max_error": 0.2964920997619629, "sparsity": 0.3904351790746053, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.17.mlp.down_proj": { "mse": 0.00019267346942797303, "rmse": 0.013880686922050113, "relative_error": 0.1512596588641169, "max_error": 0.8572845458984375, "sparsity": 0.3779032230377197, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.18.self_attn.q_proj": { "mse": 0.00010977545753121376, "rmse": 0.010477378371100939, "relative_error": 0.12804331106255126, "max_error": 0.3930988311767578, "sparsity": 0.3909265200297038, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.18.self_attn.k_proj": { "mse": 9.381980635225773e-05, "rmse": 0.009686062479266677, "relative_error": 0.12292216202745253, "max_error": 0.33354949951171875, "sparsity": 0.3890887101491292, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.18.self_attn.v_proj": { "mse": 0.0001693356316536665, "rmse": 0.013012902506883946, "relative_error": 0.12694288127788866, "max_error": 0.5638923645019531, "sparsity": 0.3930181662241618, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.18.self_attn.o_proj": { "mse": 0.0002035392972175032, "rmse": 0.01426671991795953, "relative_error": 0.14500096842541085, "max_error": 0.6965866088867188, "sparsity": 0.3788052399953206, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.18.mlp.gate_proj": { "mse": 9.147105447482318e-05, "rmse": 0.009564050108339207, "relative_error": 0.11277841748947105, "max_error": 0.40427398681640625, "sparsity": 0.38996777931849164, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.18.mlp.up_proj": { "mse": 0.00011604203609749675, "rmse": 0.010772280914341991, "relative_error": 0.11471436240503491, "max_error": 0.28387451171875, "sparsity": 0.3903919855753581, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.18.mlp.down_proj": { "mse": 0.00020416387997101992, "rmse": 0.01428859265186813, "relative_error": 0.15430695516149365, "max_error": 1.15777587890625, "sparsity": 0.3757058580716451, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.19.self_attn.q_proj": { "mse": 0.00010159470548387617, "rmse": 0.010079419898182443, "relative_error": 0.13041868423058764, "max_error": 0.30798912048339844, "sparsity": 0.39068396886189777, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.19.self_attn.k_proj": { "mse": 8.797407645033672e-05, "rmse": 0.0093794496880327, "relative_error": 0.12614134813623457, "max_error": 0.2617206573486328, "sparsity": 0.3890175024668376, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.19.self_attn.v_proj": { "mse": 0.00019485123630147427, "rmse": 0.013958912432617173, "relative_error": 0.12632223635733977, "max_error": 0.3944683074951172, "sparsity": 0.39249809583028156, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.19.self_attn.o_proj": { "mse": 0.0002515649248380214, "rmse": 0.01586079836698082, "relative_error": 0.15257642322392984, "max_error": 1.296630859375, "sparsity": 0.37897300720214844, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.19.mlp.gate_proj": { "mse": 8.597908890806139e-05, "rmse": 0.009272490976434617, "relative_error": 0.11023672718946166, "max_error": 0.343109130859375, "sparsity": 0.39063696066538495, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.19.mlp.up_proj": { "mse": 0.00011183493188582361, "rmse": 0.010575203633302936, "relative_error": 0.1119852751134503, "max_error": 0.2927464246749878, "sparsity": 0.39104316631952923, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.19.mlp.down_proj": { "mse": 0.0001817988813854754, "rmse": 0.013483281551071882, "relative_error": 0.14447528253129333, "max_error": 0.69659423828125, "sparsity": 0.37729281187057495, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.20.self_attn.q_proj": { "mse": 0.00010160417150473222, "rmse": 0.010079889458954014, "relative_error": 0.13637383986791093, "max_error": 0.2930039167404175, "sparsity": 0.3889306386311849, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.20.self_attn.k_proj": { "mse": 9.039611904881895e-05, "rmse": 0.009507687365959135, "relative_error": 0.13288590446574086, "max_error": 0.29769229888916016, "sparsity": 0.38741453488667804, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.20.self_attn.v_proj": { "mse": 0.0002304271620232612, "rmse": 0.015179827470141457, "relative_error": 0.12986079018176874, "max_error": 0.49585485458374023, "sparsity": 0.39007751146952313, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.20.self_attn.o_proj": { "mse": 0.00018717959756031632, "rmse": 0.013681359492401197, "relative_error": 0.12420474417407759, "max_error": 0.5940694808959961, "sparsity": 0.3826877276102702, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.20.mlp.gate_proj": { "mse": 7.991305756149814e-05, "rmse": 0.008939410358714837, "relative_error": 0.10672288257016303, "max_error": 0.37044525146484375, "sparsity": 0.391612708568573, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.20.mlp.up_proj": { "mse": 0.00010522006778046489, "rmse": 0.010257683353489953, "relative_error": 0.10832224226656331, "max_error": 0.25397777557373047, "sparsity": 0.39186179637908936, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.20.mlp.down_proj": { "mse": 0.00016892752319108695, "rmse": 0.012997212131495236, "relative_error": 0.1382850726778082, "max_error": 0.695037841796875, "sparsity": 0.37873925765355426, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.21.self_attn.q_proj": { "mse": 8.775975584285334e-05, "rmse": 0.009368017711493362, "relative_error": 0.13237826106082362, "max_error": 0.3493930697441101, "sparsity": 0.3894302050272624, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.21.self_attn.k_proj": { "mse": 7.635178917553276e-05, "rmse": 0.008737951085668353, "relative_error": 0.12911753812683308, "max_error": 0.31544029712677, "sparsity": 0.38779481252034503, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.21.self_attn.v_proj": { "mse": 0.00024096955894492567, "rmse": 0.015523194224930823, "relative_error": 0.12822875184818627, "max_error": 0.45868492126464844, "sparsity": 0.3911566734313965, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.21.self_attn.o_proj": { "mse": 0.00024130858946591616, "rmse": 0.015534110514152915, "relative_error": 0.13656648222552506, "max_error": 0.6318511962890625, "sparsity": 0.379139502843221, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.21.mlp.gate_proj": { "mse": 7.701908180024475e-05, "rmse": 0.008776051606516722, "relative_error": 0.10519762249970133, "max_error": 0.29144287109375, "sparsity": 0.3916556040445964, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.21.mlp.up_proj": { "mse": 0.00010363446926930919, "rmse": 0.010180101633545177, "relative_error": 0.1068552422607346, "max_error": 0.2808570861816406, "sparsity": 0.39187947909037274, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.21.mlp.down_proj": { "mse": 0.00017394559108652174, "rmse": 0.013188843432481931, "relative_error": 0.13934798047636146, "max_error": 0.806182861328125, "sparsity": 0.3770946264266968, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.22.self_attn.q_proj": { "mse": 6.889259384479374e-05, "rmse": 0.00830015625423966, "relative_error": 0.11974573199073539, "max_error": 0.37679290771484375, "sparsity": 0.39107751846313477, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.22.self_attn.k_proj": { "mse": 6.384578591678292e-05, "rmse": 0.00799035580664484, "relative_error": 0.11802824517589042, "max_error": 0.2800483703613281, "sparsity": 0.38954949378967285, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.22.self_attn.v_proj": { "mse": 0.0002198831207351759, "rmse": 0.014828456451538572, "relative_error": 0.12174802900805574, "max_error": 0.40467071533203125, "sparsity": 0.3936028480529785, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.22.self_attn.o_proj": { "mse": 0.00026977865491062403, "rmse": 0.016424940027611183, "relative_error": 0.14430395391322967, "max_error": 0.4811992645263672, "sparsity": 0.37709347407023114, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.22.mlp.gate_proj": { "mse": 8.570995851187035e-05, "rmse": 0.009257967299135937, "relative_error": 0.1092938200920434, "max_error": 0.8341293334960938, "sparsity": 0.3915645281473796, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.22.mlp.up_proj": { "mse": 0.00011422947864048183, "rmse": 0.010687819171397026, "relative_error": 0.11077979518624878, "max_error": 0.6844406127929688, "sparsity": 0.39192960659662884, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.22.mlp.down_proj": { "mse": 0.0001934868050739169, "rmse": 0.013909953453333944, "relative_error": 0.1468472238781438, "max_error": 0.861419677734375, "sparsity": 0.3759889403978984, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.23.self_attn.q_proj": { "mse": 4.9034068069886416e-05, "rmse": 0.00700243301073894, "relative_error": 0.11425448677610671, "max_error": 0.2026386260986328, "sparsity": 0.3900686899820964, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.23.self_attn.k_proj": { "mse": 4.728814383270219e-05, "rmse": 0.006876637538266954, "relative_error": 0.1134439127209608, "max_error": 0.22553253173828125, "sparsity": 0.3899570306142171, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.23.self_attn.v_proj": { "mse": 0.00021581994951702654, "rmse": 0.01469081173785256, "relative_error": 0.11788501133986323, "max_error": 0.37439680099487305, "sparsity": 0.39295633633931476, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.23.self_attn.o_proj": { "mse": 0.0002346659603063017, "rmse": 0.015318810668792199, "relative_error": 0.13143557617866453, "max_error": 1.225677490234375, "sparsity": 0.3775166670481364, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.23.mlp.gate_proj": { "mse": 0.00010114066390087828, "rmse": 0.010056871476800242, "relative_error": 0.11570676856337891, "max_error": 0.42010498046875, "sparsity": 0.3895459572474162, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.23.mlp.up_proj": { "mse": 0.0001254764065379277, "rmse": 0.011201625173961486, "relative_error": 0.11651321952774005, "max_error": 0.36852455139160156, "sparsity": 0.38976337512334186, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 }, "model.text_model.layers.23.mlp.down_proj": { "mse": 0.00048252849956043065, "rmse": 0.021966531350225294, "relative_error": 0.24617655752789447, "max_error": 1.2671375274658203, "sparsity": 0.3508457938830058, "effective_bits": 9.0, "sparse_nnz": 0, "n_planes": 3, "rescued_rows": 0 } }, "summary": { "method_name": "Broad-tritplane3", "model_family": "image_text_to_text", "selected_components": [ "text_backbone", "multimodal_connector" ], "quantized_modules": 169, "quantized_params": 1631846400, "quantized_fraction": 0.7263029115631221, "avg_relative_error": 0.12359459755394277, "avg_effective_bits": 9.0, "full_model_effective_bits": 10.915879619058146, "compression_ratio": 1.4657545299478603 }, "method_name": "Broad-tritplane3", "model_family": "image_text_to_text", "format_family": "tritplane_small", "format_version": "1.0", "total_packed_bytes": 1835827200, "total_fp16_bytes": 3263692800, "compression_ratio": 1.7777777777777777 }