AsadIsmail's picture
Upload folder using huggingface_hub
31244fc verified
{
"model_name": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"model_config": {
"transformers_version": "5.5.3",
"architectures": [
"SmolVLMForConditionalGeneration"
],
"output_hidden_states": false,
"return_dict": true,
"dtype": "float16",
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"problem_type": null,
"use_cache": false,
"image_token_id": 49190,
"tie_word_embeddings": false,
"vision_config": {
"architectures": null,
"output_hidden_states": false,
"return_dict": true,
"dtype": "float16",
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"problem_type": null,
"hidden_size": 1152,
"intermediate_size": 4304,
"num_hidden_layers": 27,
"num_attention_heads": 16,
"num_channels": 3,
"image_size": 384,
"patch_size": 14,
"hidden_act": "gelu_pytorch_tanh",
"layer_norm_eps": 1e-06,
"attention_dropout": 0.0,
"initializer_range": 0.02,
"_name_or_path": "",
"max_image_size": {
"longest_edge": 384
},
"model_type": "smolvlm_vision",
"size": {
"longest_edge": 1920
},
"tie_word_embeddings": false,
"use_base_siglip": false,
"output_attentions": false
},
"text_config": {
"architectures": [
"VLlama3ForCausalLM"
],
"output_hidden_states": false,
"return_dict": true,
"dtype": "float16",
"chunk_size_feed_forward": 0,
"is_encoder_decoder": false,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"problem_type": null,
"vocab_size": 49280,
"hidden_size": 2048,
"intermediate_size": 8192,
"num_hidden_layers": 24,
"num_attention_heads": 32,
"num_key_value_heads": 32,
"hidden_act": "silu",
"max_position_embeddings": 8192,
"initializer_range": 0.02,
"rms_norm_eps": 1e-05,
"use_cache": true,
"pad_token_id": 2,
"bos_token_id": 1,
"eos_token_id": 2,
"pretraining_tp": 1,
"tie_word_embeddings": false,
"rope_parameters": {
"rope_theta": 130000,
"rope_type": "default"
},
"attention_bias": false,
"attention_dropout": 0.0,
"mlp_bias": false,
"head_dim": 64,
"_name_or_path": "None",
"_flash_attn_2_enabled": true,
"model_type": "llama",
"neftune_noise_alpha": 0.0,
"perceiver_config": {
"_attn_implementation_autoset": false,
"_name_or_path": "",
"add_cross_attention": false,
"architectures": null,
"attention_dropout": 0.0,
"bad_words_ids": null,
"begin_suppress_tokens": null,
"bos_token_id": null,
"chunk_size_feed_forward": 0,
"cross_attention_hidden_size": null,
"decoder_start_token_id": null,
"diversity_penalty": 0.0,
"do_sample": false,
"early_stopping": false,
"encoder_no_repeat_ngram_size": 0,
"eos_token_id": null,
"exponential_decay_length_penalty": null,
"finetuning_task": null,
"forced_bos_token_id": null,
"forced_eos_token_id": null,
"hidden_act": "silu",
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1"
},
"is_decoder": false,
"is_encoder_decoder": false,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1
},
"length_penalty": 1.0,
"max_length": 20,
"min_length": 0,
"model_type": "vllama3",
"no_repeat_ngram_size": 0,
"num_beam_groups": 1,
"num_beams": 1,
"num_key_value_heads": 1,
"num_return_sequences": 1,
"output_attentions": false,
"output_hidden_states": false,
"output_scores": false,
"pad_token_id": null,
"prefix": null,
"problem_type": null,
"pruned_heads": {},
"qk_layer_norms_perceiver": false,
"remove_invalid_values": false,
"repetition_penalty": 1.0,
"resampler_depth": 6,
"resampler_head_dim": 96,
"resampler_n_heads": 16,
"resampler_n_latents": 64,
"return_dict": true,
"return_dict_in_generate": false,
"sep_token_id": null,
"suppress_tokens": null,
"task_specific_params": null,
"temperature": 1.0,
"tf_legacy_loss": false,
"tie_encoder_decoder": false,
"tie_word_embeddings": true,
"tokenizer_class": null,
"top_k": 50,
"top_p": 1.0,
"torch_dtype": null,
"torchscript": false,
"transformers_version": "4.46.0",
"typical_p": 1.0,
"use_bfloat16": false
},
"pixel_shuffle_factor": 3,
"qk_layer_norms": false,
"transformers.js_config": {
"kv_cache_dtype": {
"fp16": "float16",
"q4f16": "float16"
}
},
"use_resampler": false,
"output_attentions": false
},
"scale_factor": 3,
"pad_token_id": 128002,
"_name_or_path": "HuggingFaceTB/SmolVLM2-2.2B-Instruct",
"model_type": "smolvlm",
"use_reentrant_checkpointing": false,
"vocab_size": 49280,
"output_attentions": false
},
"quant_config": {
"components": [
"text_backbone",
"multimodal_connector"
],
"scheme": "tritplane3",
"group_size": 32,
"n_iter": 10,
"salient_fraction": 0.0,
"rescue_fraction": 0.0,
"n_planes": 3,
"allow_all_linear": false,
"target_module_names": [
"Wqkv",
"att_proj",
"attn.proj",
"attn.qkv",
"c_attn",
"c_fc",
"c_proj",
"dense",
"dense_4h_to_h",
"dense_h_to_4h",
"down_proj",
"fc1",
"fc2",
"ff_proj",
"gate_proj",
"gate_up_proj",
"k",
"k_proj",
"linear",
"o",
"o_proj",
"out_proj",
"per_layer_input_gate",
"per_layer_projection",
"proj",
"q",
"q_proj",
"qkv",
"qkv_proj",
"query_key_value",
"up_proj",
"v",
"v_proj",
"w1",
"w2",
"w3",
"wi",
"wi_0",
"wi_1",
"wo"
],
"max_length": 160,
"calibration_batch_size": 2,
"calibration_prompts": null,
"vlm_use_demo_image": true
},
"plan": {},
"layer_info": {
"model.connector.modality_projection.proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
10368
],
"dtype": "torch.float16",
"num_elements": 21233664,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 23887872,
"effective_bits": 9.0
},
"model.text_model.layers.0.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.0.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.0.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.0.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.0.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.0.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.0.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.1.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.1.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.1.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.1.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.1.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.1.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.1.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.2.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.2.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.2.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.2.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.2.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.2.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.2.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.3.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.3.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.3.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.3.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.3.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.3.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.3.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.4.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.4.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.4.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.4.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.4.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.4.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.4.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.5.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.5.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.5.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.5.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.5.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.5.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.5.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.6.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.6.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.6.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.6.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.6.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.6.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.6.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.7.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.7.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.7.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.7.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.7.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.7.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.7.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.8.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.8.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.8.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.8.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.8.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.8.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.8.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.9.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.9.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.9.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.9.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.9.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.9.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.9.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.10.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.10.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.10.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.10.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.10.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.10.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.10.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.11.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.11.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.11.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.11.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.11.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.11.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.11.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.12.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.12.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.12.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.12.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.12.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.12.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.12.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.13.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.13.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.13.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.13.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.13.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.13.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.13.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.14.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.14.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.14.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.14.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.14.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.14.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.14.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.15.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.15.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.15.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.15.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.15.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.15.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.15.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.16.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.16.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.16.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.16.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.16.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.16.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.16.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.17.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.17.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.17.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.17.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.17.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.17.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.17.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.18.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.18.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.18.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.18.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.18.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.18.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.18.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.19.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.19.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.19.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.19.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.19.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.19.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.19.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.20.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.20.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.20.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.20.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.20.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.20.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.20.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.21.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.21.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.21.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.21.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.21.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.21.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.21.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.22.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.22.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.22.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.22.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.22.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.22.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.22.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.23.self_attn.q_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.23.self_attn.k_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.23.self_attn.v_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.23.self_attn.o_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
2048
],
"dtype": "torch.float16",
"num_elements": 4194304,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 4718592,
"effective_bits": 9.0
},
"model.text_model.layers.23.mlp.gate_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.23.mlp.up_proj": {
"scheme": "tritplane_small_v1",
"shape": [
8192,
2048
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
},
"model.text_model.layers.23.mlp.down_proj": {
"scheme": "tritplane_small_v1",
"shape": [
2048,
8192
],
"dtype": "torch.float16",
"num_elements": 16777216,
"n_planes": 3,
"group_sizes": [
32,
32,
32
],
"rescued_rows": 0,
"stored_bytes": 18874368,
"effective_bits": 9.0
}
},
"stats": {
"model.connector.modality_projection.proj": {
"mse": 3.4296051580895437e-06,
"rmse": 0.0018519193173811713,
"relative_error": 0.11355021084933176,
"max_error": 0.05395698547363281,
"sparsity": 0.39136114716529846,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.0.self_attn.q_proj": {
"mse": 0.0006577471503987908,
"rmse": 0.025646581651338856,
"relative_error": 0.16423382595057714,
"max_error": 2.508922576904297,
"sparsity": 0.3742976983388265,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.0.self_attn.k_proj": {
"mse": 0.0006063183536753058,
"rmse": 0.024623532518209198,
"relative_error": 0.1626188869171995,
"max_error": 2.065166473388672,
"sparsity": 0.37669801712036133,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.0.self_attn.v_proj": {
"mse": 5.711389530915767e-05,
"rmse": 0.007557373572158364,
"relative_error": 0.20627513709425288,
"max_error": 0.27435970306396484,
"sparsity": 0.37764477729797363,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.0.self_attn.o_proj": {
"mse": 1.2771037290804088e-05,
"rmse": 0.003573658810071841,
"relative_error": 0.12916616624639116,
"max_error": 0.5240488052368164,
"sparsity": 0.3812715212504069,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.0.mlp.gate_proj": {
"mse": 0.00010874148574657738,
"rmse": 0.010427918572110994,
"relative_error": 0.11862943092036356,
"max_error": 0.5052490234375,
"sparsity": 0.3853779236475627,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.0.mlp.up_proj": {
"mse": 0.00010343005124013871,
"rmse": 0.010170056599652665,
"relative_error": 0.1189827787892244,
"max_error": 0.33917236328125,
"sparsity": 0.385589599609375,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.0.mlp.down_proj": {
"mse": 0.0001387051015626639,
"rmse": 0.011777313002661681,
"relative_error": 0.13502964609806262,
"max_error": 0.6370925903320312,
"sparsity": 0.38009379307429,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.1.self_attn.q_proj": {
"mse": 0.00026919360971078277,
"rmse": 0.016407120701414455,
"relative_error": 0.13839612971922743,
"max_error": 0.726893424987793,
"sparsity": 0.3849904537200928,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.1.self_attn.k_proj": {
"mse": 0.0002586275804787874,
"rmse": 0.016081902265552648,
"relative_error": 0.13440342116408724,
"max_error": 0.5205841064453125,
"sparsity": 0.3860335350036621,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.1.self_attn.v_proj": {
"mse": 8.325559610966593e-05,
"rmse": 0.009124450455214601,
"relative_error": 0.14485086837839517,
"max_error": 0.3021749258041382,
"sparsity": 0.38692625363667804,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.1.self_attn.o_proj": {
"mse": 7.805755012668669e-05,
"rmse": 0.008835018399906516,
"relative_error": 0.14746445782389664,
"max_error": 1.331390380859375,
"sparsity": 0.3764190673828125,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.1.mlp.gate_proj": {
"mse": 0.00010081167420139536,
"rmse": 0.0100405016907222,
"relative_error": 0.11212122738948778,
"max_error": 0.9822454452514648,
"sparsity": 0.3886687954266866,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.1.mlp.up_proj": {
"mse": 8.722111670067534e-05,
"rmse": 0.00933922463059302,
"relative_error": 0.11204773790921406,
"max_error": 0.4900360107421875,
"sparsity": 0.3890010515848796,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.1.mlp.down_proj": {
"mse": 0.000125523962196894,
"rmse": 0.011203747685345917,
"relative_error": 0.13279524768918374,
"max_error": 0.6065788269042969,
"sparsity": 0.38065383831659955,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.2.self_attn.q_proj": {
"mse": 0.00016419387247879058,
"rmse": 0.012813815687717323,
"relative_error": 0.1146054007851603,
"max_error": 0.40304821729660034,
"sparsity": 0.39175184567769367,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.2.self_attn.k_proj": {
"mse": 0.00016570747538935393,
"rmse": 0.012872741564614506,
"relative_error": 0.11461909224693181,
"max_error": 0.3516998291015625,
"sparsity": 0.3909467856089274,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.2.self_attn.v_proj": {
"mse": 6.916217535035685e-05,
"rmse": 0.00831637994264072,
"relative_error": 0.1159744597091741,
"max_error": 0.25598716735839844,
"sparsity": 0.3940271536509196,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.2.self_attn.o_proj": {
"mse": 8.277458982774988e-05,
"rmse": 0.00909805417810588,
"relative_error": 0.1345205354400204,
"max_error": 0.5423431396484375,
"sparsity": 0.3798822561899821,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.2.mlp.gate_proj": {
"mse": 9.692814637674019e-05,
"rmse": 0.009845209310966435,
"relative_error": 0.11052952542386552,
"max_error": 0.6398468017578125,
"sparsity": 0.3880802392959595,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.2.mlp.up_proj": {
"mse": 8.710896509001032e-05,
"rmse": 0.009333218367209154,
"relative_error": 0.11117947991341522,
"max_error": 0.40167236328125,
"sparsity": 0.3885002136230469,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.2.mlp.down_proj": {
"mse": 0.00012658716877922416,
"rmse": 0.011251096336767548,
"relative_error": 0.1325751347848122,
"max_error": 0.8208770751953125,
"sparsity": 0.3796415527661641,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.3.self_attn.q_proj": {
"mse": 0.0001530556910438463,
"rmse": 0.012371567849058029,
"relative_error": 0.11541190388398646,
"max_error": 0.4719809889793396,
"sparsity": 0.39139819145202637,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.3.self_attn.k_proj": {
"mse": 0.00015896638797130436,
"rmse": 0.012608187338840756,
"relative_error": 0.11515529751944896,
"max_error": 0.39187145233154297,
"sparsity": 0.39035431543986004,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.3.self_attn.v_proj": {
"mse": 7.291566726053134e-05,
"rmse": 0.008539067118867923,
"relative_error": 0.11644146996502198,
"max_error": 0.2207043170928955,
"sparsity": 0.3927210172017415,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.3.self_attn.o_proj": {
"mse": 7.748796633677557e-05,
"rmse": 0.008802724938152706,
"relative_error": 0.1279519505773518,
"max_error": 0.43854331970214844,
"sparsity": 0.38050039609273273,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.3.mlp.gate_proj": {
"mse": 9.516256977804005e-05,
"rmse": 0.009755130433676427,
"relative_error": 0.11032415499084043,
"max_error": 0.597137451171875,
"sparsity": 0.38887778917948407,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.3.mlp.up_proj": {
"mse": 9.12658724701032e-05,
"rmse": 0.009553317354202319,
"relative_error": 0.11089436404123741,
"max_error": 0.34808349609375,
"sparsity": 0.389626423517863,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.3.mlp.down_proj": {
"mse": 0.00013019863399676979,
"rmse": 0.011410461603141644,
"relative_error": 0.13189586812749926,
"max_error": 0.5650882720947266,
"sparsity": 0.3811826705932617,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.4.self_attn.q_proj": {
"mse": 0.00015413184883072972,
"rmse": 0.012414984850201378,
"relative_error": 0.11780370464069141,
"max_error": 0.37637901306152344,
"sparsity": 0.39243324597676593,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.4.self_attn.k_proj": {
"mse": 0.00015679103671573102,
"rmse": 0.012521622766867361,
"relative_error": 0.11770231723548095,
"max_error": 0.3959388732910156,
"sparsity": 0.3918443520863851,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.4.self_attn.v_proj": {
"mse": 7.459029438905418e-05,
"rmse": 0.008636567280410325,
"relative_error": 0.11947416432783187,
"max_error": 0.23118281364440918,
"sparsity": 0.3938344319661458,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.4.self_attn.o_proj": {
"mse": 7.937617920106277e-05,
"rmse": 0.008909331018716432,
"relative_error": 0.13073696797025244,
"max_error": 0.6722698211669922,
"sparsity": 0.3798239231109619,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.4.mlp.gate_proj": {
"mse": 9.190893615595996e-05,
"rmse": 0.009586914840341493,
"relative_error": 0.10849688521505156,
"max_error": 0.5092010498046875,
"sparsity": 0.3894091447194417,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.4.mlp.up_proj": {
"mse": 9.020772995427251e-05,
"rmse": 0.009497775000192018,
"relative_error": 0.10945214365573647,
"max_error": 0.4986724853515625,
"sparsity": 0.3900378147761027,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.4.mlp.down_proj": {
"mse": 0.00012248774874024093,
"rmse": 0.011067418341250182,
"relative_error": 0.12746774428731608,
"max_error": 0.5980072021484375,
"sparsity": 0.38230576117833454,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.5.self_attn.q_proj": {
"mse": 0.00015130281099118292,
"rmse": 0.012300520760975242,
"relative_error": 0.11469356438408594,
"max_error": 0.3299667239189148,
"sparsity": 0.3918568293253581,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.5.self_attn.k_proj": {
"mse": 0.00015101679309736937,
"rmse": 0.012288889009888948,
"relative_error": 0.11402556294332304,
"max_error": 0.3680969476699829,
"sparsity": 0.3910319010416667,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.5.self_attn.v_proj": {
"mse": 7.671088678762317e-05,
"rmse": 0.008758475140549477,
"relative_error": 0.11636823438420277,
"max_error": 0.2230091094970703,
"sparsity": 0.3936762809753418,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.5.self_attn.o_proj": {
"mse": 9.098563896259293e-05,
"rmse": 0.009538639261581964,
"relative_error": 0.13238292097211185,
"max_error": 0.416259765625,
"sparsity": 0.3792380491892497,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.5.mlp.gate_proj": {
"mse": 9.076636342797428e-05,
"rmse": 0.009527138260147918,
"relative_error": 0.10842027764411528,
"max_error": 0.697662353515625,
"sparsity": 0.388599971930186,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.5.mlp.up_proj": {
"mse": 9.33339397306554e-05,
"rmse": 0.009660949214785025,
"relative_error": 0.1099180335797476,
"max_error": 0.32053184509277344,
"sparsity": 0.3889654080073039,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.5.mlp.down_proj": {
"mse": 0.00011332923168083653,
"rmse": 0.010645620305122503,
"relative_error": 0.12130861115388147,
"max_error": 0.5944061279296875,
"sparsity": 0.3844008247057597,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.6.self_attn.q_proj": {
"mse": 0.00014138600090518594,
"rmse": 0.011890584548506685,
"relative_error": 0.1137895158538832,
"max_error": 0.31124114990234375,
"sparsity": 0.39211424191792804,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.6.self_attn.k_proj": {
"mse": 0.00014205885236151516,
"rmse": 0.011918844422238053,
"relative_error": 0.11346255810575416,
"max_error": 0.3076510429382324,
"sparsity": 0.39174707730611164,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.6.self_attn.v_proj": {
"mse": 7.575999188702554e-05,
"rmse": 0.00870402159274812,
"relative_error": 0.1160057875043741,
"max_error": 0.2431955337524414,
"sparsity": 0.3937256336212158,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.6.self_attn.o_proj": {
"mse": 8.068876923061907e-05,
"rmse": 0.008982692760560114,
"relative_error": 0.12594970736566127,
"max_error": 0.5454425811767578,
"sparsity": 0.3837650616963704,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.6.mlp.gate_proj": {
"mse": 8.998379053082317e-05,
"rmse": 0.009485978627997386,
"relative_error": 0.10835457020640558,
"max_error": 0.5523300170898438,
"sparsity": 0.38953636089960736,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.6.mlp.up_proj": {
"mse": 9.37409422476776e-05,
"rmse": 0.009681990613901544,
"relative_error": 0.10956738752297024,
"max_error": 0.297149658203125,
"sparsity": 0.3902047872543335,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.6.mlp.down_proj": {
"mse": 0.00011795001773862168,
"rmse": 0.010860479627466812,
"relative_error": 0.12343403918579368,
"max_error": 0.44796276092529297,
"sparsity": 0.3843594789505005,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.7.self_attn.q_proj": {
"mse": 0.00015688124403823167,
"rmse": 0.012525224310894862,
"relative_error": 0.12189713132098018,
"max_error": 0.4827537536621094,
"sparsity": 0.3912874062856038,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.7.self_attn.k_proj": {
"mse": 0.00015296241326723248,
"rmse": 0.01236779742990774,
"relative_error": 0.1199153668104569,
"max_error": 0.54345703125,
"sparsity": 0.39050086339314777,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.7.self_attn.v_proj": {
"mse": 8.948285540100187e-05,
"rmse": 0.009459537800601141,
"relative_error": 0.12402144103539234,
"max_error": 0.25670433044433594,
"sparsity": 0.3926668167114258,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.7.self_attn.o_proj": {
"mse": 8.854311454342678e-05,
"rmse": 0.009409735094221664,
"relative_error": 0.13105534388713513,
"max_error": 0.7096405029296875,
"sparsity": 0.3785405158996582,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.7.mlp.gate_proj": {
"mse": 8.576437539886683e-05,
"rmse": 0.009260905754777273,
"relative_error": 0.10707372869116029,
"max_error": 0.48479461669921875,
"sparsity": 0.3907337586085002,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.7.mlp.up_proj": {
"mse": 9.252261952497065e-05,
"rmse": 0.009618867892063527,
"relative_error": 0.10801395651849023,
"max_error": 0.375885009765625,
"sparsity": 0.3915167252222697,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.7.mlp.down_proj": {
"mse": 0.0001230652560479939,
"rmse": 0.01109347808615467,
"relative_error": 0.12529995595955354,
"max_error": 0.7331366539001465,
"sparsity": 0.3860664963722229,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.8.self_attn.q_proj": {
"mse": 0.00012525853526312858,
"rmse": 0.011191895963737717,
"relative_error": 0.11461234808407195,
"max_error": 0.35657501220703125,
"sparsity": 0.39265839258829754,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.8.self_attn.k_proj": {
"mse": 0.00011599854769883677,
"rmse": 0.01077026219266907,
"relative_error": 0.11264466468077658,
"max_error": 0.35434722900390625,
"sparsity": 0.39147520065307617,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.8.self_attn.v_proj": {
"mse": 9.594231960363686e-05,
"rmse": 0.009795015038458943,
"relative_error": 0.11839808436837174,
"max_error": 0.37999963760375977,
"sparsity": 0.39471809069315594,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.8.self_attn.o_proj": {
"mse": 0.0001107770367525518,
"rmse": 0.010525067066415861,
"relative_error": 0.13228799125690793,
"max_error": 0.5034599304199219,
"sparsity": 0.37880786259969074,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.8.mlp.gate_proj": {
"mse": 8.402258390560746e-05,
"rmse": 0.009166383360170328,
"relative_error": 0.10532979753276703,
"max_error": 0.52838134765625,
"sparsity": 0.3913570245107015,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.8.mlp.up_proj": {
"mse": 8.96376877790317e-05,
"rmse": 0.009467718192839904,
"relative_error": 0.1067531770222323,
"max_error": 0.7634124755859375,
"sparsity": 0.3922878901163737,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.8.mlp.down_proj": {
"mse": 0.00010968392598442733,
"rmse": 0.01047300940438933,
"relative_error": 0.11969575410263461,
"max_error": 0.3441123962402344,
"sparsity": 0.3869070808092753,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.9.self_attn.q_proj": {
"mse": 0.00013906371896155179,
"rmse": 0.011792528098823923,
"relative_error": 0.11619249670618485,
"max_error": 0.5095748901367188,
"sparsity": 0.3935159047444661,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.9.self_attn.k_proj": {
"mse": 0.00013474057777784765,
"rmse": 0.011607780915310542,
"relative_error": 0.11522755122799154,
"max_error": 0.6333770751953125,
"sparsity": 0.39248037338256836,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.9.self_attn.v_proj": {
"mse": 9.845116437645629e-05,
"rmse": 0.009922256012442749,
"relative_error": 0.11942510072251349,
"max_error": 0.4576568603515625,
"sparsity": 0.3951539993286133,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.9.self_attn.o_proj": {
"mse": 0.00011052452464355156,
"rmse": 0.0105130644744314,
"relative_error": 0.13353254287792263,
"max_error": 0.45235252380371094,
"sparsity": 0.3810691038767497,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.9.mlp.gate_proj": {
"mse": 8.366839028894901e-05,
"rmse": 0.009147042707287913,
"relative_error": 0.10679062559175231,
"max_error": 0.4619140625,
"sparsity": 0.39163299401601154,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.9.mlp.up_proj": {
"mse": 9.668088023317978e-05,
"rmse": 0.009832643603486286,
"relative_error": 0.10821890520679862,
"max_error": 0.38425350189208984,
"sparsity": 0.39284054438273114,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.9.mlp.down_proj": {
"mse": 0.00013003291678614914,
"rmse": 0.011403197656190528,
"relative_error": 0.12745194232982762,
"max_error": 0.566009521484375,
"sparsity": 0.3845730423927307,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.10.self_attn.q_proj": {
"mse": 0.00012565116048790514,
"rmse": 0.011209422843657257,
"relative_error": 0.11854237750544573,
"max_error": 0.4283638000488281,
"sparsity": 0.3949253559112549,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.10.self_attn.k_proj": {
"mse": 0.00012736594362650067,
"rmse": 0.011285652113480225,
"relative_error": 0.11790091080028516,
"max_error": 0.4173877239227295,
"sparsity": 0.3928183714548747,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.10.self_attn.v_proj": {
"mse": 9.187028626911342e-05,
"rmse": 0.00958489886587821,
"relative_error": 0.12239239741322284,
"max_error": 0.30411529541015625,
"sparsity": 0.39554985364278156,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.10.self_attn.o_proj": {
"mse": 0.00012515991693362594,
"rmse": 0.011187489304291018,
"relative_error": 0.15035625413716164,
"max_error": 0.4181327819824219,
"sparsity": 0.3772312005360921,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.10.mlp.gate_proj": {
"mse": 8.393089228775352e-05,
"rmse": 0.009161380479368462,
"relative_error": 0.10709110477309372,
"max_error": 0.6112289428710938,
"sparsity": 0.39222437143325806,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.10.mlp.up_proj": {
"mse": 9.599251643521711e-05,
"rmse": 0.00979757706962375,
"relative_error": 0.10849474513968223,
"max_error": 0.5392261743545532,
"sparsity": 0.39365750551223755,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.10.mlp.down_proj": {
"mse": 0.00013664091238752007,
"rmse": 0.011689350383469565,
"relative_error": 0.13170919919705393,
"max_error": 0.5762062072753906,
"sparsity": 0.384915828704834,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.11.self_attn.q_proj": {
"mse": 0.00013748332276009023,
"rmse": 0.011725328258095389,
"relative_error": 0.11656835122202536,
"max_error": 1.0129890441894531,
"sparsity": 0.39558879534403485,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.11.self_attn.k_proj": {
"mse": 0.00012948595394846052,
"rmse": 0.01137918951193188,
"relative_error": 0.11476094516158379,
"max_error": 0.6671142578125,
"sparsity": 0.3946519692738851,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.11.self_attn.v_proj": {
"mse": 8.885016723070294e-05,
"rmse": 0.009426036666102193,
"relative_error": 0.1168312712891464,
"max_error": 0.3714599609375,
"sparsity": 0.3978262742360433,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.11.self_attn.o_proj": {
"mse": 0.00010273691441398114,
"rmse": 0.010135921981447032,
"relative_error": 0.13194819940886102,
"max_error": 0.6239433288574219,
"sparsity": 0.3784929911295573,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.11.mlp.gate_proj": {
"mse": 8.606556366430596e-05,
"rmse": 0.009277152777889666,
"relative_error": 0.10805389747567434,
"max_error": 0.801422119140625,
"sparsity": 0.3931015928586324,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.11.mlp.up_proj": {
"mse": 9.808417235035449e-05,
"rmse": 0.00990374536982623,
"relative_error": 0.10907260789107497,
"max_error": 0.476348876953125,
"sparsity": 0.3945031960805257,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.11.mlp.down_proj": {
"mse": 0.00013493798905983567,
"rmse": 0.011616281206127703,
"relative_error": 0.13058919739846145,
"max_error": 0.620574951171875,
"sparsity": 0.38569573561350506,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.12.self_attn.q_proj": {
"mse": 0.00011779519991250709,
"rmse": 0.010853349709306665,
"relative_error": 0.11593947411175125,
"max_error": 0.8235530853271484,
"sparsity": 0.39581966400146484,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.12.self_attn.k_proj": {
"mse": 0.0001068732890416868,
"rmse": 0.010337953813095066,
"relative_error": 0.11340367751333466,
"max_error": 0.39037322998046875,
"sparsity": 0.394009272257487,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.12.self_attn.v_proj": {
"mse": 9.546923683956265e-05,
"rmse": 0.009770836035855001,
"relative_error": 0.1183919527235388,
"max_error": 0.3798694610595703,
"sparsity": 0.3984665870666504,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.12.self_attn.o_proj": {
"mse": 0.00010976817429764196,
"rmse": 0.010477030795871603,
"relative_error": 0.13323159144961502,
"max_error": 0.4802589416503906,
"sparsity": 0.37735788027445477,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.12.mlp.gate_proj": {
"mse": 8.522283314960077e-05,
"rmse": 0.009231621371655186,
"relative_error": 0.10777059058583263,
"max_error": 0.591461181640625,
"sparsity": 0.39298417170842487,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.12.mlp.up_proj": {
"mse": 9.667361155152321e-05,
"rmse": 0.009832273976630392,
"relative_error": 0.10836841175849397,
"max_error": 0.5146751403808594,
"sparsity": 0.39399949709574383,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.12.mlp.down_proj": {
"mse": 0.00013385784404817969,
"rmse": 0.011569695071529745,
"relative_error": 0.13050353261534786,
"max_error": 0.5663547515869141,
"sparsity": 0.3858479857444763,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.13.self_attn.q_proj": {
"mse": 0.00012699411308858544,
"rmse": 0.011269166477099602,
"relative_error": 0.1169069941607757,
"max_error": 0.6077880859375,
"sparsity": 0.3962131341298421,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.13.self_attn.k_proj": {
"mse": 0.00011883420665981248,
"rmse": 0.010901110340686056,
"relative_error": 0.1141555243804878,
"max_error": 0.38907623291015625,
"sparsity": 0.39407960573832196,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.13.self_attn.v_proj": {
"mse": 9.517266880720854e-05,
"rmse": 0.009755648046501501,
"relative_error": 0.11651403462037346,
"max_error": 0.312164306640625,
"sparsity": 0.39721115430196124,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.13.self_attn.o_proj": {
"mse": 0.00011390489817131311,
"rmse": 0.01067262377165583,
"relative_error": 0.1345652980895498,
"max_error": 0.4114570617675781,
"sparsity": 0.3775333563486735,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.13.mlp.gate_proj": {
"mse": 8.87833593878895e-05,
"rmse": 0.009422492206836231,
"relative_error": 0.11013698642560844,
"max_error": 1.0255584716796875,
"sparsity": 0.39292343457539874,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.13.mlp.up_proj": {
"mse": 0.00010229789768345654,
"rmse": 0.010114242318802558,
"relative_error": 0.1105362572094837,
"max_error": 0.4052734375,
"sparsity": 0.3939278523127238,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.13.mlp.down_proj": {
"mse": 0.0001434191653970629,
"rmse": 0.011975774104293338,
"relative_error": 0.13397899523618045,
"max_error": 0.7285346984863281,
"sparsity": 0.3844974438349406,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.14.self_attn.q_proj": {
"mse": 0.00012377048551570624,
"rmse": 0.011125218447999402,
"relative_error": 0.1145393756245998,
"max_error": 0.37328338623046875,
"sparsity": 0.3949705759684245,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.14.self_attn.k_proj": {
"mse": 0.00012177479220554233,
"rmse": 0.01103516163024096,
"relative_error": 0.11414312930004775,
"max_error": 0.39933013916015625,
"sparsity": 0.3937130769093831,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.14.self_attn.v_proj": {
"mse": 0.00010253102664137259,
"rmse": 0.010125760546318118,
"relative_error": 0.11949763612113624,
"max_error": 0.45636940002441406,
"sparsity": 0.3970218499501546,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.14.self_attn.o_proj": {
"mse": 0.00011449969315435737,
"rmse": 0.010700452941551464,
"relative_error": 0.13257149022193387,
"max_error": 1.1525955200195312,
"sparsity": 0.37923351923624676,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.14.mlp.gate_proj": {
"mse": 9.163775393972173e-05,
"rmse": 0.009572761040563049,
"relative_error": 0.11190023092408524,
"max_error": 0.83123779296875,
"sparsity": 0.3928397297859192,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.14.mlp.up_proj": {
"mse": 0.00010870010737562552,
"rmse": 0.01042593436463253,
"relative_error": 0.11280875161412238,
"max_error": 0.5446014404296875,
"sparsity": 0.3936503529548645,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.14.mlp.down_proj": {
"mse": 0.00016400107415392995,
"rmse": 0.012806290413462047,
"relative_error": 0.14206598069839568,
"max_error": 0.7069358825683594,
"sparsity": 0.3815520207087199,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.15.self_attn.q_proj": {
"mse": 9.789341129362583e-05,
"rmse": 0.009894109929327944,
"relative_error": 0.11414555337074125,
"max_error": 0.33676910400390625,
"sparsity": 0.3949731985727946,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.15.self_attn.k_proj": {
"mse": 8.986466855276376e-05,
"rmse": 0.009479697703659319,
"relative_error": 0.1125592821962688,
"max_error": 0.3172111511230469,
"sparsity": 0.39313093821207684,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.15.self_attn.v_proj": {
"mse": 0.00011400174116715789,
"rmse": 0.010677159789342758,
"relative_error": 0.1173175552733063,
"max_error": 0.3896446228027344,
"sparsity": 0.3969427744547526,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.15.self_attn.o_proj": {
"mse": 0.0001631157356314361,
"rmse": 0.012771677087659087,
"relative_error": 0.1473888285697514,
"max_error": 1.012359619140625,
"sparsity": 0.3775180180867513,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.15.mlp.gate_proj": {
"mse": 9.187518298858777e-05,
"rmse": 0.009585154301762062,
"relative_error": 0.11252498530861223,
"max_error": 0.5476951599121094,
"sparsity": 0.39175093173980713,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.15.mlp.up_proj": {
"mse": 0.00011229647498112172,
"rmse": 0.010597003113197698,
"relative_error": 0.11439855196424717,
"max_error": 0.32222747802734375,
"sparsity": 0.39260558287302655,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.15.mlp.down_proj": {
"mse": 0.00017591984942555428,
"rmse": 0.01326347802899203,
"relative_error": 0.1462905444778315,
"max_error": 0.5610237121582031,
"sparsity": 0.37977610031763714,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.16.self_attn.q_proj": {
"mse": 0.0001145480782724917,
"rmse": 0.010702713593873831,
"relative_error": 0.11975183055006922,
"max_error": 0.4313373565673828,
"sparsity": 0.39432207743326825,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.16.self_attn.k_proj": {
"mse": 0.0001037448673741892,
"rmse": 0.010185522439923697,
"relative_error": 0.11635053860916868,
"max_error": 0.4398765563964844,
"sparsity": 0.392624298731486,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.16.self_attn.v_proj": {
"mse": 0.00011915640061488375,
"rmse": 0.010915878371202371,
"relative_error": 0.1201440636287298,
"max_error": 0.3507728576660156,
"sparsity": 0.39522210756937665,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.16.self_attn.o_proj": {
"mse": 0.00014365943206939846,
"rmse": 0.01198580126939365,
"relative_error": 0.13777784807752752,
"max_error": 0.62738037109375,
"sparsity": 0.37827134132385254,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.16.mlp.gate_proj": {
"mse": 9.585679799783975e-05,
"rmse": 0.009790648497308018,
"relative_error": 0.11537675215561138,
"max_error": 0.4750213623046875,
"sparsity": 0.39047886927922565,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.16.mlp.up_proj": {
"mse": 0.00011986482422798872,
"rmse": 0.010948279509949895,
"relative_error": 0.11754970772120121,
"max_error": 0.3110523223876953,
"sparsity": 0.39113156000773114,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.16.mlp.down_proj": {
"mse": 0.00017797393957152963,
"rmse": 0.013340687372528059,
"relative_error": 0.14594216595449572,
"max_error": 0.7946376800537109,
"sparsity": 0.3795015613238017,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.17.self_attn.q_proj": {
"mse": 0.00012077944120392203,
"rmse": 0.010989970027435108,
"relative_error": 0.12809202973827413,
"max_error": 0.36618804931640625,
"sparsity": 0.3912224769592285,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.17.self_attn.k_proj": {
"mse": 0.00010802940232679248,
"rmse": 0.010393719369253361,
"relative_error": 0.12436472277530712,
"max_error": 0.3511810302734375,
"sparsity": 0.39031481742858887,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.17.self_attn.v_proj": {
"mse": 0.00015826526214368641,
"rmse": 0.01258035222653509,
"relative_error": 0.12655687426742085,
"max_error": 0.40193939208984375,
"sparsity": 0.3940921624501546,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.17.self_attn.o_proj": {
"mse": 0.00017517831292934716,
"rmse": 0.013235494434638517,
"relative_error": 0.13974743449718308,
"max_error": 0.38881492614746094,
"sparsity": 0.37921682993570965,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.17.mlp.gate_proj": {
"mse": 9.643776138545945e-05,
"rmse": 0.009820272979172191,
"relative_error": 0.11469761926663953,
"max_error": 0.49706268310546875,
"sparsity": 0.38984374205271405,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.17.mlp.up_proj": {
"mse": 0.00011767178511945531,
"rmse": 0.010847662656971561,
"relative_error": 0.1163379295193364,
"max_error": 0.2964920997619629,
"sparsity": 0.3904351790746053,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.17.mlp.down_proj": {
"mse": 0.00019267346942797303,
"rmse": 0.013880686922050113,
"relative_error": 0.1512596588641169,
"max_error": 0.8572845458984375,
"sparsity": 0.3779032230377197,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.18.self_attn.q_proj": {
"mse": 0.00010977545753121376,
"rmse": 0.010477378371100939,
"relative_error": 0.12804331106255126,
"max_error": 0.3930988311767578,
"sparsity": 0.3909265200297038,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.18.self_attn.k_proj": {
"mse": 9.381980635225773e-05,
"rmse": 0.009686062479266677,
"relative_error": 0.12292216202745253,
"max_error": 0.33354949951171875,
"sparsity": 0.3890887101491292,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.18.self_attn.v_proj": {
"mse": 0.0001693356316536665,
"rmse": 0.013012902506883946,
"relative_error": 0.12694288127788866,
"max_error": 0.5638923645019531,
"sparsity": 0.3930181662241618,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.18.self_attn.o_proj": {
"mse": 0.0002035392972175032,
"rmse": 0.01426671991795953,
"relative_error": 0.14500096842541085,
"max_error": 0.6965866088867188,
"sparsity": 0.3788052399953206,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.18.mlp.gate_proj": {
"mse": 9.147105447482318e-05,
"rmse": 0.009564050108339207,
"relative_error": 0.11277841748947105,
"max_error": 0.40427398681640625,
"sparsity": 0.38996777931849164,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.18.mlp.up_proj": {
"mse": 0.00011604203609749675,
"rmse": 0.010772280914341991,
"relative_error": 0.11471436240503491,
"max_error": 0.28387451171875,
"sparsity": 0.3903919855753581,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.18.mlp.down_proj": {
"mse": 0.00020416387997101992,
"rmse": 0.01428859265186813,
"relative_error": 0.15430695516149365,
"max_error": 1.15777587890625,
"sparsity": 0.3757058580716451,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.19.self_attn.q_proj": {
"mse": 0.00010159470548387617,
"rmse": 0.010079419898182443,
"relative_error": 0.13041868423058764,
"max_error": 0.30798912048339844,
"sparsity": 0.39068396886189777,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.19.self_attn.k_proj": {
"mse": 8.797407645033672e-05,
"rmse": 0.0093794496880327,
"relative_error": 0.12614134813623457,
"max_error": 0.2617206573486328,
"sparsity": 0.3890175024668376,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.19.self_attn.v_proj": {
"mse": 0.00019485123630147427,
"rmse": 0.013958912432617173,
"relative_error": 0.12632223635733977,
"max_error": 0.3944683074951172,
"sparsity": 0.39249809583028156,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.19.self_attn.o_proj": {
"mse": 0.0002515649248380214,
"rmse": 0.01586079836698082,
"relative_error": 0.15257642322392984,
"max_error": 1.296630859375,
"sparsity": 0.37897300720214844,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.19.mlp.gate_proj": {
"mse": 8.597908890806139e-05,
"rmse": 0.009272490976434617,
"relative_error": 0.11023672718946166,
"max_error": 0.343109130859375,
"sparsity": 0.39063696066538495,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.19.mlp.up_proj": {
"mse": 0.00011183493188582361,
"rmse": 0.010575203633302936,
"relative_error": 0.1119852751134503,
"max_error": 0.2927464246749878,
"sparsity": 0.39104316631952923,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.19.mlp.down_proj": {
"mse": 0.0001817988813854754,
"rmse": 0.013483281551071882,
"relative_error": 0.14447528253129333,
"max_error": 0.69659423828125,
"sparsity": 0.37729281187057495,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.20.self_attn.q_proj": {
"mse": 0.00010160417150473222,
"rmse": 0.010079889458954014,
"relative_error": 0.13637383986791093,
"max_error": 0.2930039167404175,
"sparsity": 0.3889306386311849,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.20.self_attn.k_proj": {
"mse": 9.039611904881895e-05,
"rmse": 0.009507687365959135,
"relative_error": 0.13288590446574086,
"max_error": 0.29769229888916016,
"sparsity": 0.38741453488667804,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.20.self_attn.v_proj": {
"mse": 0.0002304271620232612,
"rmse": 0.015179827470141457,
"relative_error": 0.12986079018176874,
"max_error": 0.49585485458374023,
"sparsity": 0.39007751146952313,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.20.self_attn.o_proj": {
"mse": 0.00018717959756031632,
"rmse": 0.013681359492401197,
"relative_error": 0.12420474417407759,
"max_error": 0.5940694808959961,
"sparsity": 0.3826877276102702,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.20.mlp.gate_proj": {
"mse": 7.991305756149814e-05,
"rmse": 0.008939410358714837,
"relative_error": 0.10672288257016303,
"max_error": 0.37044525146484375,
"sparsity": 0.391612708568573,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.20.mlp.up_proj": {
"mse": 0.00010522006778046489,
"rmse": 0.010257683353489953,
"relative_error": 0.10832224226656331,
"max_error": 0.25397777557373047,
"sparsity": 0.39186179637908936,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.20.mlp.down_proj": {
"mse": 0.00016892752319108695,
"rmse": 0.012997212131495236,
"relative_error": 0.1382850726778082,
"max_error": 0.695037841796875,
"sparsity": 0.37873925765355426,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.21.self_attn.q_proj": {
"mse": 8.775975584285334e-05,
"rmse": 0.009368017711493362,
"relative_error": 0.13237826106082362,
"max_error": 0.3493930697441101,
"sparsity": 0.3894302050272624,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.21.self_attn.k_proj": {
"mse": 7.635178917553276e-05,
"rmse": 0.008737951085668353,
"relative_error": 0.12911753812683308,
"max_error": 0.31544029712677,
"sparsity": 0.38779481252034503,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.21.self_attn.v_proj": {
"mse": 0.00024096955894492567,
"rmse": 0.015523194224930823,
"relative_error": 0.12822875184818627,
"max_error": 0.45868492126464844,
"sparsity": 0.3911566734313965,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.21.self_attn.o_proj": {
"mse": 0.00024130858946591616,
"rmse": 0.015534110514152915,
"relative_error": 0.13656648222552506,
"max_error": 0.6318511962890625,
"sparsity": 0.379139502843221,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.21.mlp.gate_proj": {
"mse": 7.701908180024475e-05,
"rmse": 0.008776051606516722,
"relative_error": 0.10519762249970133,
"max_error": 0.29144287109375,
"sparsity": 0.3916556040445964,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.21.mlp.up_proj": {
"mse": 0.00010363446926930919,
"rmse": 0.010180101633545177,
"relative_error": 0.1068552422607346,
"max_error": 0.2808570861816406,
"sparsity": 0.39187947909037274,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.21.mlp.down_proj": {
"mse": 0.00017394559108652174,
"rmse": 0.013188843432481931,
"relative_error": 0.13934798047636146,
"max_error": 0.806182861328125,
"sparsity": 0.3770946264266968,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.22.self_attn.q_proj": {
"mse": 6.889259384479374e-05,
"rmse": 0.00830015625423966,
"relative_error": 0.11974573199073539,
"max_error": 0.37679290771484375,
"sparsity": 0.39107751846313477,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.22.self_attn.k_proj": {
"mse": 6.384578591678292e-05,
"rmse": 0.00799035580664484,
"relative_error": 0.11802824517589042,
"max_error": 0.2800483703613281,
"sparsity": 0.38954949378967285,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.22.self_attn.v_proj": {
"mse": 0.0002198831207351759,
"rmse": 0.014828456451538572,
"relative_error": 0.12174802900805574,
"max_error": 0.40467071533203125,
"sparsity": 0.3936028480529785,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.22.self_attn.o_proj": {
"mse": 0.00026977865491062403,
"rmse": 0.016424940027611183,
"relative_error": 0.14430395391322967,
"max_error": 0.4811992645263672,
"sparsity": 0.37709347407023114,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.22.mlp.gate_proj": {
"mse": 8.570995851187035e-05,
"rmse": 0.009257967299135937,
"relative_error": 0.1092938200920434,
"max_error": 0.8341293334960938,
"sparsity": 0.3915645281473796,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.22.mlp.up_proj": {
"mse": 0.00011422947864048183,
"rmse": 0.010687819171397026,
"relative_error": 0.11077979518624878,
"max_error": 0.6844406127929688,
"sparsity": 0.39192960659662884,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.22.mlp.down_proj": {
"mse": 0.0001934868050739169,
"rmse": 0.013909953453333944,
"relative_error": 0.1468472238781438,
"max_error": 0.861419677734375,
"sparsity": 0.3759889403978984,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.23.self_attn.q_proj": {
"mse": 4.9034068069886416e-05,
"rmse": 0.00700243301073894,
"relative_error": 0.11425448677610671,
"max_error": 0.2026386260986328,
"sparsity": 0.3900686899820964,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.23.self_attn.k_proj": {
"mse": 4.728814383270219e-05,
"rmse": 0.006876637538266954,
"relative_error": 0.1134439127209608,
"max_error": 0.22553253173828125,
"sparsity": 0.3899570306142171,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.23.self_attn.v_proj": {
"mse": 0.00021581994951702654,
"rmse": 0.01469081173785256,
"relative_error": 0.11788501133986323,
"max_error": 0.37439680099487305,
"sparsity": 0.39295633633931476,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.23.self_attn.o_proj": {
"mse": 0.0002346659603063017,
"rmse": 0.015318810668792199,
"relative_error": 0.13143557617866453,
"max_error": 1.225677490234375,
"sparsity": 0.3775166670481364,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.23.mlp.gate_proj": {
"mse": 0.00010114066390087828,
"rmse": 0.010056871476800242,
"relative_error": 0.11570676856337891,
"max_error": 0.42010498046875,
"sparsity": 0.3895459572474162,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.23.mlp.up_proj": {
"mse": 0.0001254764065379277,
"rmse": 0.011201625173961486,
"relative_error": 0.11651321952774005,
"max_error": 0.36852455139160156,
"sparsity": 0.38976337512334186,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
},
"model.text_model.layers.23.mlp.down_proj": {
"mse": 0.00048252849956043065,
"rmse": 0.021966531350225294,
"relative_error": 0.24617655752789447,
"max_error": 1.2671375274658203,
"sparsity": 0.3508457938830058,
"effective_bits": 9.0,
"sparse_nnz": 0,
"n_planes": 3,
"rescued_rows": 0
}
},
"summary": {
"method_name": "Broad-tritplane3",
"model_family": "image_text_to_text",
"selected_components": [
"text_backbone",
"multimodal_connector"
],
"quantized_modules": 169,
"quantized_params": 1631846400,
"quantized_fraction": 0.7263029115631221,
"avg_relative_error": 0.12359459755394277,
"avg_effective_bits": 9.0,
"full_model_effective_bits": 10.915879619058146,
"compression_ratio": 1.4657545299478603
},
"method_name": "Broad-tritplane3",
"model_family": "image_text_to_text",
"format_family": "tritplane_small",
"format_version": "1.0",
"total_packed_bytes": 1835827200,
"total_fp16_bytes": 3263692800,
"compression_ratio": 1.7777777777777777
}