| { |
| "model_name": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", |
| "model_config": { |
| "transformers_version": "5.5.3", |
| "architectures": [ |
| "SmolVLMForConditionalGeneration" |
| ], |
| "output_hidden_states": false, |
| "return_dict": true, |
| "dtype": "float16", |
| "chunk_size_feed_forward": 0, |
| "is_encoder_decoder": false, |
| "id2label": { |
| "0": "LABEL_0", |
| "1": "LABEL_1" |
| }, |
| "label2id": { |
| "LABEL_0": 0, |
| "LABEL_1": 1 |
| }, |
| "problem_type": null, |
| "use_cache": false, |
| "image_token_id": 49190, |
| "tie_word_embeddings": false, |
| "vision_config": { |
| "architectures": null, |
| "output_hidden_states": false, |
| "return_dict": true, |
| "dtype": "float16", |
| "chunk_size_feed_forward": 0, |
| "is_encoder_decoder": false, |
| "id2label": { |
| "0": "LABEL_0", |
| "1": "LABEL_1" |
| }, |
| "label2id": { |
| "LABEL_0": 0, |
| "LABEL_1": 1 |
| }, |
| "problem_type": null, |
| "hidden_size": 1152, |
| "intermediate_size": 4304, |
| "num_hidden_layers": 27, |
| "num_attention_heads": 16, |
| "num_channels": 3, |
| "image_size": 384, |
| "patch_size": 14, |
| "hidden_act": "gelu_pytorch_tanh", |
| "layer_norm_eps": 1e-06, |
| "attention_dropout": 0.0, |
| "initializer_range": 0.02, |
| "_name_or_path": "", |
| "max_image_size": { |
| "longest_edge": 384 |
| }, |
| "model_type": "smolvlm_vision", |
| "size": { |
| "longest_edge": 1920 |
| }, |
| "tie_word_embeddings": false, |
| "use_base_siglip": false, |
| "output_attentions": false |
| }, |
| "text_config": { |
| "architectures": [ |
| "VLlama3ForCausalLM" |
| ], |
| "output_hidden_states": false, |
| "return_dict": true, |
| "dtype": "float16", |
| "chunk_size_feed_forward": 0, |
| "is_encoder_decoder": false, |
| "id2label": { |
| "0": "LABEL_0", |
| "1": "LABEL_1" |
| }, |
| "label2id": { |
| "LABEL_0": 0, |
| "LABEL_1": 1 |
| }, |
| "problem_type": null, |
| "vocab_size": 49280, |
| "hidden_size": 2048, |
| "intermediate_size": 8192, |
| "num_hidden_layers": 24, |
| "num_attention_heads": 32, |
| "num_key_value_heads": 32, |
| "hidden_act": "silu", |
| "max_position_embeddings": 8192, |
| "initializer_range": 0.02, |
| "rms_norm_eps": 1e-05, |
| "use_cache": true, |
| "pad_token_id": 2, |
| "bos_token_id": 1, |
| "eos_token_id": 2, |
| "pretraining_tp": 1, |
| "tie_word_embeddings": false, |
| "rope_parameters": { |
| "rope_theta": 130000, |
| "rope_type": "default" |
| }, |
| "attention_bias": false, |
| "attention_dropout": 0.0, |
| "mlp_bias": false, |
| "head_dim": 64, |
| "_name_or_path": "None", |
| "_flash_attn_2_enabled": true, |
| "model_type": "llama", |
| "neftune_noise_alpha": 0.0, |
| "perceiver_config": { |
| "_attn_implementation_autoset": false, |
| "_name_or_path": "", |
| "add_cross_attention": false, |
| "architectures": null, |
| "attention_dropout": 0.0, |
| "bad_words_ids": null, |
| "begin_suppress_tokens": null, |
| "bos_token_id": null, |
| "chunk_size_feed_forward": 0, |
| "cross_attention_hidden_size": null, |
| "decoder_start_token_id": null, |
| "diversity_penalty": 0.0, |
| "do_sample": false, |
| "early_stopping": false, |
| "encoder_no_repeat_ngram_size": 0, |
| "eos_token_id": null, |
| "exponential_decay_length_penalty": null, |
| "finetuning_task": null, |
| "forced_bos_token_id": null, |
| "forced_eos_token_id": null, |
| "hidden_act": "silu", |
| "id2label": { |
| "0": "LABEL_0", |
| "1": "LABEL_1" |
| }, |
| "is_decoder": false, |
| "is_encoder_decoder": false, |
| "label2id": { |
| "LABEL_0": 0, |
| "LABEL_1": 1 |
| }, |
| "length_penalty": 1.0, |
| "max_length": 20, |
| "min_length": 0, |
| "model_type": "vllama3", |
| "no_repeat_ngram_size": 0, |
| "num_beam_groups": 1, |
| "num_beams": 1, |
| "num_key_value_heads": 1, |
| "num_return_sequences": 1, |
| "output_attentions": false, |
| "output_hidden_states": false, |
| "output_scores": false, |
| "pad_token_id": null, |
| "prefix": null, |
| "problem_type": null, |
| "pruned_heads": {}, |
| "qk_layer_norms_perceiver": false, |
| "remove_invalid_values": false, |
| "repetition_penalty": 1.0, |
| "resampler_depth": 6, |
| "resampler_head_dim": 96, |
| "resampler_n_heads": 16, |
| "resampler_n_latents": 64, |
| "return_dict": true, |
| "return_dict_in_generate": false, |
| "sep_token_id": null, |
| "suppress_tokens": null, |
| "task_specific_params": null, |
| "temperature": 1.0, |
| "tf_legacy_loss": false, |
| "tie_encoder_decoder": false, |
| "tie_word_embeddings": true, |
| "tokenizer_class": null, |
| "top_k": 50, |
| "top_p": 1.0, |
| "torch_dtype": null, |
| "torchscript": false, |
| "transformers_version": "4.46.0", |
| "typical_p": 1.0, |
| "use_bfloat16": false |
| }, |
| "pixel_shuffle_factor": 3, |
| "qk_layer_norms": false, |
| "transformers.js_config": { |
| "kv_cache_dtype": { |
| "fp16": "float16", |
| "q4f16": "float16" |
| } |
| }, |
| "use_resampler": false, |
| "output_attentions": false |
| }, |
| "scale_factor": 3, |
| "pad_token_id": 128002, |
| "_name_or_path": "HuggingFaceTB/SmolVLM2-2.2B-Instruct", |
| "model_type": "smolvlm", |
| "use_reentrant_checkpointing": false, |
| "vocab_size": 49280, |
| "output_attentions": false |
| }, |
| "quant_config": { |
| "components": [ |
| "text_backbone", |
| "multimodal_connector" |
| ], |
| "scheme": "tritplane3", |
| "group_size": 32, |
| "n_iter": 10, |
| "salient_fraction": 0.0, |
| "rescue_fraction": 0.0, |
| "n_planes": 3, |
| "allow_all_linear": false, |
| "target_module_names": [ |
| "Wqkv", |
| "att_proj", |
| "attn.proj", |
| "attn.qkv", |
| "c_attn", |
| "c_fc", |
| "c_proj", |
| "dense", |
| "dense_4h_to_h", |
| "dense_h_to_4h", |
| "down_proj", |
| "fc1", |
| "fc2", |
| "ff_proj", |
| "gate_proj", |
| "gate_up_proj", |
| "k", |
| "k_proj", |
| "linear", |
| "o", |
| "o_proj", |
| "out_proj", |
| "per_layer_input_gate", |
| "per_layer_projection", |
| "proj", |
| "q", |
| "q_proj", |
| "qkv", |
| "qkv_proj", |
| "query_key_value", |
| "up_proj", |
| "v", |
| "v_proj", |
| "w1", |
| "w2", |
| "w3", |
| "wi", |
| "wi_0", |
| "wi_1", |
| "wo" |
| ], |
| "max_length": 160, |
| "calibration_batch_size": 2, |
| "calibration_prompts": null, |
| "vlm_use_demo_image": true |
| }, |
| "plan": {}, |
| "layer_info": { |
| "model.connector.modality_projection.proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 10368 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 21233664, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 23887872, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.0.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.0.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.0.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.0.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.0.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.0.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.0.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.1.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.1.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.1.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.1.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.1.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.1.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.1.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.2.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.2.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.2.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.2.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.2.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.2.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.2.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.3.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.3.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.3.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.3.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.3.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.3.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.3.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.4.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.4.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.4.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.4.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.4.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.4.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.4.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.5.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.5.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.5.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.5.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.5.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.5.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.5.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.6.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.6.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.6.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.6.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.6.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.6.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.6.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.7.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.7.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.7.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.7.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.7.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.7.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.7.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.8.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.8.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.8.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.8.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.8.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.8.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.8.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.9.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.9.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.9.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.9.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.9.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.9.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.9.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.10.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.10.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.10.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.10.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.10.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.10.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.10.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.11.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.11.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.11.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.11.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.11.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.11.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.11.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.12.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.12.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.12.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.12.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.12.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.12.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.12.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.13.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.13.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.13.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.13.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.13.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.13.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.13.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.14.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.14.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.14.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.14.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.14.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.14.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.14.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.15.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.15.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.15.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.15.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.15.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.15.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.15.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.16.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.16.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.16.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.16.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.16.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.16.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.16.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.17.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.17.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.17.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.17.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.17.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.17.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.17.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.18.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.18.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.18.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.18.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.18.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.18.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.18.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.19.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.19.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.19.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.19.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.19.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.19.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.19.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.20.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.20.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.20.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.20.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.20.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.20.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.20.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.21.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.21.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.21.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.21.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.21.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.21.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.21.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.22.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.22.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.22.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.22.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.22.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.22.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.22.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.23.self_attn.q_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.23.self_attn.k_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.23.self_attn.v_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.23.self_attn.o_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 4194304, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 4718592, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.23.mlp.gate_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.23.mlp.up_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 8192, |
| 2048 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| }, |
| "model.text_model.layers.23.mlp.down_proj": { |
| "scheme": "tritplane_small_v1", |
| "shape": [ |
| 2048, |
| 8192 |
| ], |
| "dtype": "torch.float16", |
| "num_elements": 16777216, |
| "n_planes": 3, |
| "group_sizes": [ |
| 32, |
| 32, |
| 32 |
| ], |
| "rescued_rows": 0, |
| "stored_bytes": 18874368, |
| "effective_bits": 9.0 |
| } |
| }, |
| "stats": { |
| "model.connector.modality_projection.proj": { |
| "mse": 3.4296051580895437e-06, |
| "rmse": 0.0018519193173811713, |
| "relative_error": 0.11355021084933176, |
| "max_error": 0.05395698547363281, |
| "sparsity": 0.39136114716529846, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.0.self_attn.q_proj": { |
| "mse": 0.0006577471503987908, |
| "rmse": 0.025646581651338856, |
| "relative_error": 0.16423382595057714, |
| "max_error": 2.508922576904297, |
| "sparsity": 0.3742976983388265, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.0.self_attn.k_proj": { |
| "mse": 0.0006063183536753058, |
| "rmse": 0.024623532518209198, |
| "relative_error": 0.1626188869171995, |
| "max_error": 2.065166473388672, |
| "sparsity": 0.37669801712036133, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.0.self_attn.v_proj": { |
| "mse": 5.711389530915767e-05, |
| "rmse": 0.007557373572158364, |
| "relative_error": 0.20627513709425288, |
| "max_error": 0.27435970306396484, |
| "sparsity": 0.37764477729797363, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.0.self_attn.o_proj": { |
| "mse": 1.2771037290804088e-05, |
| "rmse": 0.003573658810071841, |
| "relative_error": 0.12916616624639116, |
| "max_error": 0.5240488052368164, |
| "sparsity": 0.3812715212504069, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.0.mlp.gate_proj": { |
| "mse": 0.00010874148574657738, |
| "rmse": 0.010427918572110994, |
| "relative_error": 0.11862943092036356, |
| "max_error": 0.5052490234375, |
| "sparsity": 0.3853779236475627, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.0.mlp.up_proj": { |
| "mse": 0.00010343005124013871, |
| "rmse": 0.010170056599652665, |
| "relative_error": 0.1189827787892244, |
| "max_error": 0.33917236328125, |
| "sparsity": 0.385589599609375, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.0.mlp.down_proj": { |
| "mse": 0.0001387051015626639, |
| "rmse": 0.011777313002661681, |
| "relative_error": 0.13502964609806262, |
| "max_error": 0.6370925903320312, |
| "sparsity": 0.38009379307429, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.1.self_attn.q_proj": { |
| "mse": 0.00026919360971078277, |
| "rmse": 0.016407120701414455, |
| "relative_error": 0.13839612971922743, |
| "max_error": 0.726893424987793, |
| "sparsity": 0.3849904537200928, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.1.self_attn.k_proj": { |
| "mse": 0.0002586275804787874, |
| "rmse": 0.016081902265552648, |
| "relative_error": 0.13440342116408724, |
| "max_error": 0.5205841064453125, |
| "sparsity": 0.3860335350036621, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.1.self_attn.v_proj": { |
| "mse": 8.325559610966593e-05, |
| "rmse": 0.009124450455214601, |
| "relative_error": 0.14485086837839517, |
| "max_error": 0.3021749258041382, |
| "sparsity": 0.38692625363667804, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.1.self_attn.o_proj": { |
| "mse": 7.805755012668669e-05, |
| "rmse": 0.008835018399906516, |
| "relative_error": 0.14746445782389664, |
| "max_error": 1.331390380859375, |
| "sparsity": 0.3764190673828125, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.1.mlp.gate_proj": { |
| "mse": 0.00010081167420139536, |
| "rmse": 0.0100405016907222, |
| "relative_error": 0.11212122738948778, |
| "max_error": 0.9822454452514648, |
| "sparsity": 0.3886687954266866, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.1.mlp.up_proj": { |
| "mse": 8.722111670067534e-05, |
| "rmse": 0.00933922463059302, |
| "relative_error": 0.11204773790921406, |
| "max_error": 0.4900360107421875, |
| "sparsity": 0.3890010515848796, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.1.mlp.down_proj": { |
| "mse": 0.000125523962196894, |
| "rmse": 0.011203747685345917, |
| "relative_error": 0.13279524768918374, |
| "max_error": 0.6065788269042969, |
| "sparsity": 0.38065383831659955, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.2.self_attn.q_proj": { |
| "mse": 0.00016419387247879058, |
| "rmse": 0.012813815687717323, |
| "relative_error": 0.1146054007851603, |
| "max_error": 0.40304821729660034, |
| "sparsity": 0.39175184567769367, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.2.self_attn.k_proj": { |
| "mse": 0.00016570747538935393, |
| "rmse": 0.012872741564614506, |
| "relative_error": 0.11461909224693181, |
| "max_error": 0.3516998291015625, |
| "sparsity": 0.3909467856089274, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.2.self_attn.v_proj": { |
| "mse": 6.916217535035685e-05, |
| "rmse": 0.00831637994264072, |
| "relative_error": 0.1159744597091741, |
| "max_error": 0.25598716735839844, |
| "sparsity": 0.3940271536509196, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.2.self_attn.o_proj": { |
| "mse": 8.277458982774988e-05, |
| "rmse": 0.00909805417810588, |
| "relative_error": 0.1345205354400204, |
| "max_error": 0.5423431396484375, |
| "sparsity": 0.3798822561899821, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.2.mlp.gate_proj": { |
| "mse": 9.692814637674019e-05, |
| "rmse": 0.009845209310966435, |
| "relative_error": 0.11052952542386552, |
| "max_error": 0.6398468017578125, |
| "sparsity": 0.3880802392959595, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.2.mlp.up_proj": { |
| "mse": 8.710896509001032e-05, |
| "rmse": 0.009333218367209154, |
| "relative_error": 0.11117947991341522, |
| "max_error": 0.40167236328125, |
| "sparsity": 0.3885002136230469, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.2.mlp.down_proj": { |
| "mse": 0.00012658716877922416, |
| "rmse": 0.011251096336767548, |
| "relative_error": 0.1325751347848122, |
| "max_error": 0.8208770751953125, |
| "sparsity": 0.3796415527661641, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.3.self_attn.q_proj": { |
| "mse": 0.0001530556910438463, |
| "rmse": 0.012371567849058029, |
| "relative_error": 0.11541190388398646, |
| "max_error": 0.4719809889793396, |
| "sparsity": 0.39139819145202637, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.3.self_attn.k_proj": { |
| "mse": 0.00015896638797130436, |
| "rmse": 0.012608187338840756, |
| "relative_error": 0.11515529751944896, |
| "max_error": 0.39187145233154297, |
| "sparsity": 0.39035431543986004, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.3.self_attn.v_proj": { |
| "mse": 7.291566726053134e-05, |
| "rmse": 0.008539067118867923, |
| "relative_error": 0.11644146996502198, |
| "max_error": 0.2207043170928955, |
| "sparsity": 0.3927210172017415, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.3.self_attn.o_proj": { |
| "mse": 7.748796633677557e-05, |
| "rmse": 0.008802724938152706, |
| "relative_error": 0.1279519505773518, |
| "max_error": 0.43854331970214844, |
| "sparsity": 0.38050039609273273, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.3.mlp.gate_proj": { |
| "mse": 9.516256977804005e-05, |
| "rmse": 0.009755130433676427, |
| "relative_error": 0.11032415499084043, |
| "max_error": 0.597137451171875, |
| "sparsity": 0.38887778917948407, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.3.mlp.up_proj": { |
| "mse": 9.12658724701032e-05, |
| "rmse": 0.009553317354202319, |
| "relative_error": 0.11089436404123741, |
| "max_error": 0.34808349609375, |
| "sparsity": 0.389626423517863, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.3.mlp.down_proj": { |
| "mse": 0.00013019863399676979, |
| "rmse": 0.011410461603141644, |
| "relative_error": 0.13189586812749926, |
| "max_error": 0.5650882720947266, |
| "sparsity": 0.3811826705932617, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.4.self_attn.q_proj": { |
| "mse": 0.00015413184883072972, |
| "rmse": 0.012414984850201378, |
| "relative_error": 0.11780370464069141, |
| "max_error": 0.37637901306152344, |
| "sparsity": 0.39243324597676593, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.4.self_attn.k_proj": { |
| "mse": 0.00015679103671573102, |
| "rmse": 0.012521622766867361, |
| "relative_error": 0.11770231723548095, |
| "max_error": 0.3959388732910156, |
| "sparsity": 0.3918443520863851, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.4.self_attn.v_proj": { |
| "mse": 7.459029438905418e-05, |
| "rmse": 0.008636567280410325, |
| "relative_error": 0.11947416432783187, |
| "max_error": 0.23118281364440918, |
| "sparsity": 0.3938344319661458, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.4.self_attn.o_proj": { |
| "mse": 7.937617920106277e-05, |
| "rmse": 0.008909331018716432, |
| "relative_error": 0.13073696797025244, |
| "max_error": 0.6722698211669922, |
| "sparsity": 0.3798239231109619, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.4.mlp.gate_proj": { |
| "mse": 9.190893615595996e-05, |
| "rmse": 0.009586914840341493, |
| "relative_error": 0.10849688521505156, |
| "max_error": 0.5092010498046875, |
| "sparsity": 0.3894091447194417, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.4.mlp.up_proj": { |
| "mse": 9.020772995427251e-05, |
| "rmse": 0.009497775000192018, |
| "relative_error": 0.10945214365573647, |
| "max_error": 0.4986724853515625, |
| "sparsity": 0.3900378147761027, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.4.mlp.down_proj": { |
| "mse": 0.00012248774874024093, |
| "rmse": 0.011067418341250182, |
| "relative_error": 0.12746774428731608, |
| "max_error": 0.5980072021484375, |
| "sparsity": 0.38230576117833454, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.5.self_attn.q_proj": { |
| "mse": 0.00015130281099118292, |
| "rmse": 0.012300520760975242, |
| "relative_error": 0.11469356438408594, |
| "max_error": 0.3299667239189148, |
| "sparsity": 0.3918568293253581, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.5.self_attn.k_proj": { |
| "mse": 0.00015101679309736937, |
| "rmse": 0.012288889009888948, |
| "relative_error": 0.11402556294332304, |
| "max_error": 0.3680969476699829, |
| "sparsity": 0.3910319010416667, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.5.self_attn.v_proj": { |
| "mse": 7.671088678762317e-05, |
| "rmse": 0.008758475140549477, |
| "relative_error": 0.11636823438420277, |
| "max_error": 0.2230091094970703, |
| "sparsity": 0.3936762809753418, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.5.self_attn.o_proj": { |
| "mse": 9.098563896259293e-05, |
| "rmse": 0.009538639261581964, |
| "relative_error": 0.13238292097211185, |
| "max_error": 0.416259765625, |
| "sparsity": 0.3792380491892497, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.5.mlp.gate_proj": { |
| "mse": 9.076636342797428e-05, |
| "rmse": 0.009527138260147918, |
| "relative_error": 0.10842027764411528, |
| "max_error": 0.697662353515625, |
| "sparsity": 0.388599971930186, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.5.mlp.up_proj": { |
| "mse": 9.33339397306554e-05, |
| "rmse": 0.009660949214785025, |
| "relative_error": 0.1099180335797476, |
| "max_error": 0.32053184509277344, |
| "sparsity": 0.3889654080073039, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.5.mlp.down_proj": { |
| "mse": 0.00011332923168083653, |
| "rmse": 0.010645620305122503, |
| "relative_error": 0.12130861115388147, |
| "max_error": 0.5944061279296875, |
| "sparsity": 0.3844008247057597, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.6.self_attn.q_proj": { |
| "mse": 0.00014138600090518594, |
| "rmse": 0.011890584548506685, |
| "relative_error": 0.1137895158538832, |
| "max_error": 0.31124114990234375, |
| "sparsity": 0.39211424191792804, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.6.self_attn.k_proj": { |
| "mse": 0.00014205885236151516, |
| "rmse": 0.011918844422238053, |
| "relative_error": 0.11346255810575416, |
| "max_error": 0.3076510429382324, |
| "sparsity": 0.39174707730611164, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.6.self_attn.v_proj": { |
| "mse": 7.575999188702554e-05, |
| "rmse": 0.00870402159274812, |
| "relative_error": 0.1160057875043741, |
| "max_error": 0.2431955337524414, |
| "sparsity": 0.3937256336212158, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.6.self_attn.o_proj": { |
| "mse": 8.068876923061907e-05, |
| "rmse": 0.008982692760560114, |
| "relative_error": 0.12594970736566127, |
| "max_error": 0.5454425811767578, |
| "sparsity": 0.3837650616963704, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.6.mlp.gate_proj": { |
| "mse": 8.998379053082317e-05, |
| "rmse": 0.009485978627997386, |
| "relative_error": 0.10835457020640558, |
| "max_error": 0.5523300170898438, |
| "sparsity": 0.38953636089960736, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.6.mlp.up_proj": { |
| "mse": 9.37409422476776e-05, |
| "rmse": 0.009681990613901544, |
| "relative_error": 0.10956738752297024, |
| "max_error": 0.297149658203125, |
| "sparsity": 0.3902047872543335, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.6.mlp.down_proj": { |
| "mse": 0.00011795001773862168, |
| "rmse": 0.010860479627466812, |
| "relative_error": 0.12343403918579368, |
| "max_error": 0.44796276092529297, |
| "sparsity": 0.3843594789505005, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.7.self_attn.q_proj": { |
| "mse": 0.00015688124403823167, |
| "rmse": 0.012525224310894862, |
| "relative_error": 0.12189713132098018, |
| "max_error": 0.4827537536621094, |
| "sparsity": 0.3912874062856038, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.7.self_attn.k_proj": { |
| "mse": 0.00015296241326723248, |
| "rmse": 0.01236779742990774, |
| "relative_error": 0.1199153668104569, |
| "max_error": 0.54345703125, |
| "sparsity": 0.39050086339314777, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.7.self_attn.v_proj": { |
| "mse": 8.948285540100187e-05, |
| "rmse": 0.009459537800601141, |
| "relative_error": 0.12402144103539234, |
| "max_error": 0.25670433044433594, |
| "sparsity": 0.3926668167114258, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.7.self_attn.o_proj": { |
| "mse": 8.854311454342678e-05, |
| "rmse": 0.009409735094221664, |
| "relative_error": 0.13105534388713513, |
| "max_error": 0.7096405029296875, |
| "sparsity": 0.3785405158996582, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.7.mlp.gate_proj": { |
| "mse": 8.576437539886683e-05, |
| "rmse": 0.009260905754777273, |
| "relative_error": 0.10707372869116029, |
| "max_error": 0.48479461669921875, |
| "sparsity": 0.3907337586085002, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.7.mlp.up_proj": { |
| "mse": 9.252261952497065e-05, |
| "rmse": 0.009618867892063527, |
| "relative_error": 0.10801395651849023, |
| "max_error": 0.375885009765625, |
| "sparsity": 0.3915167252222697, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.7.mlp.down_proj": { |
| "mse": 0.0001230652560479939, |
| "rmse": 0.01109347808615467, |
| "relative_error": 0.12529995595955354, |
| "max_error": 0.7331366539001465, |
| "sparsity": 0.3860664963722229, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.8.self_attn.q_proj": { |
| "mse": 0.00012525853526312858, |
| "rmse": 0.011191895963737717, |
| "relative_error": 0.11461234808407195, |
| "max_error": 0.35657501220703125, |
| "sparsity": 0.39265839258829754, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.8.self_attn.k_proj": { |
| "mse": 0.00011599854769883677, |
| "rmse": 0.01077026219266907, |
| "relative_error": 0.11264466468077658, |
| "max_error": 0.35434722900390625, |
| "sparsity": 0.39147520065307617, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.8.self_attn.v_proj": { |
| "mse": 9.594231960363686e-05, |
| "rmse": 0.009795015038458943, |
| "relative_error": 0.11839808436837174, |
| "max_error": 0.37999963760375977, |
| "sparsity": 0.39471809069315594, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.8.self_attn.o_proj": { |
| "mse": 0.0001107770367525518, |
| "rmse": 0.010525067066415861, |
| "relative_error": 0.13228799125690793, |
| "max_error": 0.5034599304199219, |
| "sparsity": 0.37880786259969074, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.8.mlp.gate_proj": { |
| "mse": 8.402258390560746e-05, |
| "rmse": 0.009166383360170328, |
| "relative_error": 0.10532979753276703, |
| "max_error": 0.52838134765625, |
| "sparsity": 0.3913570245107015, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.8.mlp.up_proj": { |
| "mse": 8.96376877790317e-05, |
| "rmse": 0.009467718192839904, |
| "relative_error": 0.1067531770222323, |
| "max_error": 0.7634124755859375, |
| "sparsity": 0.3922878901163737, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.8.mlp.down_proj": { |
| "mse": 0.00010968392598442733, |
| "rmse": 0.01047300940438933, |
| "relative_error": 0.11969575410263461, |
| "max_error": 0.3441123962402344, |
| "sparsity": 0.3869070808092753, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.9.self_attn.q_proj": { |
| "mse": 0.00013906371896155179, |
| "rmse": 0.011792528098823923, |
| "relative_error": 0.11619249670618485, |
| "max_error": 0.5095748901367188, |
| "sparsity": 0.3935159047444661, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.9.self_attn.k_proj": { |
| "mse": 0.00013474057777784765, |
| "rmse": 0.011607780915310542, |
| "relative_error": 0.11522755122799154, |
| "max_error": 0.6333770751953125, |
| "sparsity": 0.39248037338256836, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.9.self_attn.v_proj": { |
| "mse": 9.845116437645629e-05, |
| "rmse": 0.009922256012442749, |
| "relative_error": 0.11942510072251349, |
| "max_error": 0.4576568603515625, |
| "sparsity": 0.3951539993286133, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.9.self_attn.o_proj": { |
| "mse": 0.00011052452464355156, |
| "rmse": 0.0105130644744314, |
| "relative_error": 0.13353254287792263, |
| "max_error": 0.45235252380371094, |
| "sparsity": 0.3810691038767497, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.9.mlp.gate_proj": { |
| "mse": 8.366839028894901e-05, |
| "rmse": 0.009147042707287913, |
| "relative_error": 0.10679062559175231, |
| "max_error": 0.4619140625, |
| "sparsity": 0.39163299401601154, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.9.mlp.up_proj": { |
| "mse": 9.668088023317978e-05, |
| "rmse": 0.009832643603486286, |
| "relative_error": 0.10821890520679862, |
| "max_error": 0.38425350189208984, |
| "sparsity": 0.39284054438273114, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.9.mlp.down_proj": { |
| "mse": 0.00013003291678614914, |
| "rmse": 0.011403197656190528, |
| "relative_error": 0.12745194232982762, |
| "max_error": 0.566009521484375, |
| "sparsity": 0.3845730423927307, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.10.self_attn.q_proj": { |
| "mse": 0.00012565116048790514, |
| "rmse": 0.011209422843657257, |
| "relative_error": 0.11854237750544573, |
| "max_error": 0.4283638000488281, |
| "sparsity": 0.3949253559112549, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.10.self_attn.k_proj": { |
| "mse": 0.00012736594362650067, |
| "rmse": 0.011285652113480225, |
| "relative_error": 0.11790091080028516, |
| "max_error": 0.4173877239227295, |
| "sparsity": 0.3928183714548747, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.10.self_attn.v_proj": { |
| "mse": 9.187028626911342e-05, |
| "rmse": 0.00958489886587821, |
| "relative_error": 0.12239239741322284, |
| "max_error": 0.30411529541015625, |
| "sparsity": 0.39554985364278156, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.10.self_attn.o_proj": { |
| "mse": 0.00012515991693362594, |
| "rmse": 0.011187489304291018, |
| "relative_error": 0.15035625413716164, |
| "max_error": 0.4181327819824219, |
| "sparsity": 0.3772312005360921, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.10.mlp.gate_proj": { |
| "mse": 8.393089228775352e-05, |
| "rmse": 0.009161380479368462, |
| "relative_error": 0.10709110477309372, |
| "max_error": 0.6112289428710938, |
| "sparsity": 0.39222437143325806, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.10.mlp.up_proj": { |
| "mse": 9.599251643521711e-05, |
| "rmse": 0.00979757706962375, |
| "relative_error": 0.10849474513968223, |
| "max_error": 0.5392261743545532, |
| "sparsity": 0.39365750551223755, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.10.mlp.down_proj": { |
| "mse": 0.00013664091238752007, |
| "rmse": 0.011689350383469565, |
| "relative_error": 0.13170919919705393, |
| "max_error": 0.5762062072753906, |
| "sparsity": 0.384915828704834, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.11.self_attn.q_proj": { |
| "mse": 0.00013748332276009023, |
| "rmse": 0.011725328258095389, |
| "relative_error": 0.11656835122202536, |
| "max_error": 1.0129890441894531, |
| "sparsity": 0.39558879534403485, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.11.self_attn.k_proj": { |
| "mse": 0.00012948595394846052, |
| "rmse": 0.01137918951193188, |
| "relative_error": 0.11476094516158379, |
| "max_error": 0.6671142578125, |
| "sparsity": 0.3946519692738851, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.11.self_attn.v_proj": { |
| "mse": 8.885016723070294e-05, |
| "rmse": 0.009426036666102193, |
| "relative_error": 0.1168312712891464, |
| "max_error": 0.3714599609375, |
| "sparsity": 0.3978262742360433, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.11.self_attn.o_proj": { |
| "mse": 0.00010273691441398114, |
| "rmse": 0.010135921981447032, |
| "relative_error": 0.13194819940886102, |
| "max_error": 0.6239433288574219, |
| "sparsity": 0.3784929911295573, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.11.mlp.gate_proj": { |
| "mse": 8.606556366430596e-05, |
| "rmse": 0.009277152777889666, |
| "relative_error": 0.10805389747567434, |
| "max_error": 0.801422119140625, |
| "sparsity": 0.3931015928586324, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.11.mlp.up_proj": { |
| "mse": 9.808417235035449e-05, |
| "rmse": 0.00990374536982623, |
| "relative_error": 0.10907260789107497, |
| "max_error": 0.476348876953125, |
| "sparsity": 0.3945031960805257, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.11.mlp.down_proj": { |
| "mse": 0.00013493798905983567, |
| "rmse": 0.011616281206127703, |
| "relative_error": 0.13058919739846145, |
| "max_error": 0.620574951171875, |
| "sparsity": 0.38569573561350506, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.12.self_attn.q_proj": { |
| "mse": 0.00011779519991250709, |
| "rmse": 0.010853349709306665, |
| "relative_error": 0.11593947411175125, |
| "max_error": 0.8235530853271484, |
| "sparsity": 0.39581966400146484, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.12.self_attn.k_proj": { |
| "mse": 0.0001068732890416868, |
| "rmse": 0.010337953813095066, |
| "relative_error": 0.11340367751333466, |
| "max_error": 0.39037322998046875, |
| "sparsity": 0.394009272257487, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.12.self_attn.v_proj": { |
| "mse": 9.546923683956265e-05, |
| "rmse": 0.009770836035855001, |
| "relative_error": 0.1183919527235388, |
| "max_error": 0.3798694610595703, |
| "sparsity": 0.3984665870666504, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.12.self_attn.o_proj": { |
| "mse": 0.00010976817429764196, |
| "rmse": 0.010477030795871603, |
| "relative_error": 0.13323159144961502, |
| "max_error": 0.4802589416503906, |
| "sparsity": 0.37735788027445477, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.12.mlp.gate_proj": { |
| "mse": 8.522283314960077e-05, |
| "rmse": 0.009231621371655186, |
| "relative_error": 0.10777059058583263, |
| "max_error": 0.591461181640625, |
| "sparsity": 0.39298417170842487, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.12.mlp.up_proj": { |
| "mse": 9.667361155152321e-05, |
| "rmse": 0.009832273976630392, |
| "relative_error": 0.10836841175849397, |
| "max_error": 0.5146751403808594, |
| "sparsity": 0.39399949709574383, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.12.mlp.down_proj": { |
| "mse": 0.00013385784404817969, |
| "rmse": 0.011569695071529745, |
| "relative_error": 0.13050353261534786, |
| "max_error": 0.5663547515869141, |
| "sparsity": 0.3858479857444763, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.13.self_attn.q_proj": { |
| "mse": 0.00012699411308858544, |
| "rmse": 0.011269166477099602, |
| "relative_error": 0.1169069941607757, |
| "max_error": 0.6077880859375, |
| "sparsity": 0.3962131341298421, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.13.self_attn.k_proj": { |
| "mse": 0.00011883420665981248, |
| "rmse": 0.010901110340686056, |
| "relative_error": 0.1141555243804878, |
| "max_error": 0.38907623291015625, |
| "sparsity": 0.39407960573832196, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.13.self_attn.v_proj": { |
| "mse": 9.517266880720854e-05, |
| "rmse": 0.009755648046501501, |
| "relative_error": 0.11651403462037346, |
| "max_error": 0.312164306640625, |
| "sparsity": 0.39721115430196124, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.13.self_attn.o_proj": { |
| "mse": 0.00011390489817131311, |
| "rmse": 0.01067262377165583, |
| "relative_error": 0.1345652980895498, |
| "max_error": 0.4114570617675781, |
| "sparsity": 0.3775333563486735, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.13.mlp.gate_proj": { |
| "mse": 8.87833593878895e-05, |
| "rmse": 0.009422492206836231, |
| "relative_error": 0.11013698642560844, |
| "max_error": 1.0255584716796875, |
| "sparsity": 0.39292343457539874, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.13.mlp.up_proj": { |
| "mse": 0.00010229789768345654, |
| "rmse": 0.010114242318802558, |
| "relative_error": 0.1105362572094837, |
| "max_error": 0.4052734375, |
| "sparsity": 0.3939278523127238, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.13.mlp.down_proj": { |
| "mse": 0.0001434191653970629, |
| "rmse": 0.011975774104293338, |
| "relative_error": 0.13397899523618045, |
| "max_error": 0.7285346984863281, |
| "sparsity": 0.3844974438349406, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.14.self_attn.q_proj": { |
| "mse": 0.00012377048551570624, |
| "rmse": 0.011125218447999402, |
| "relative_error": 0.1145393756245998, |
| "max_error": 0.37328338623046875, |
| "sparsity": 0.3949705759684245, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.14.self_attn.k_proj": { |
| "mse": 0.00012177479220554233, |
| "rmse": 0.01103516163024096, |
| "relative_error": 0.11414312930004775, |
| "max_error": 0.39933013916015625, |
| "sparsity": 0.3937130769093831, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.14.self_attn.v_proj": { |
| "mse": 0.00010253102664137259, |
| "rmse": 0.010125760546318118, |
| "relative_error": 0.11949763612113624, |
| "max_error": 0.45636940002441406, |
| "sparsity": 0.3970218499501546, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.14.self_attn.o_proj": { |
| "mse": 0.00011449969315435737, |
| "rmse": 0.010700452941551464, |
| "relative_error": 0.13257149022193387, |
| "max_error": 1.1525955200195312, |
| "sparsity": 0.37923351923624676, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.14.mlp.gate_proj": { |
| "mse": 9.163775393972173e-05, |
| "rmse": 0.009572761040563049, |
| "relative_error": 0.11190023092408524, |
| "max_error": 0.83123779296875, |
| "sparsity": 0.3928397297859192, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.14.mlp.up_proj": { |
| "mse": 0.00010870010737562552, |
| "rmse": 0.01042593436463253, |
| "relative_error": 0.11280875161412238, |
| "max_error": 0.5446014404296875, |
| "sparsity": 0.3936503529548645, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.14.mlp.down_proj": { |
| "mse": 0.00016400107415392995, |
| "rmse": 0.012806290413462047, |
| "relative_error": 0.14206598069839568, |
| "max_error": 0.7069358825683594, |
| "sparsity": 0.3815520207087199, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.15.self_attn.q_proj": { |
| "mse": 9.789341129362583e-05, |
| "rmse": 0.009894109929327944, |
| "relative_error": 0.11414555337074125, |
| "max_error": 0.33676910400390625, |
| "sparsity": 0.3949731985727946, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.15.self_attn.k_proj": { |
| "mse": 8.986466855276376e-05, |
| "rmse": 0.009479697703659319, |
| "relative_error": 0.1125592821962688, |
| "max_error": 0.3172111511230469, |
| "sparsity": 0.39313093821207684, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.15.self_attn.v_proj": { |
| "mse": 0.00011400174116715789, |
| "rmse": 0.010677159789342758, |
| "relative_error": 0.1173175552733063, |
| "max_error": 0.3896446228027344, |
| "sparsity": 0.3969427744547526, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.15.self_attn.o_proj": { |
| "mse": 0.0001631157356314361, |
| "rmse": 0.012771677087659087, |
| "relative_error": 0.1473888285697514, |
| "max_error": 1.012359619140625, |
| "sparsity": 0.3775180180867513, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.15.mlp.gate_proj": { |
| "mse": 9.187518298858777e-05, |
| "rmse": 0.009585154301762062, |
| "relative_error": 0.11252498530861223, |
| "max_error": 0.5476951599121094, |
| "sparsity": 0.39175093173980713, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.15.mlp.up_proj": { |
| "mse": 0.00011229647498112172, |
| "rmse": 0.010597003113197698, |
| "relative_error": 0.11439855196424717, |
| "max_error": 0.32222747802734375, |
| "sparsity": 0.39260558287302655, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.15.mlp.down_proj": { |
| "mse": 0.00017591984942555428, |
| "rmse": 0.01326347802899203, |
| "relative_error": 0.1462905444778315, |
| "max_error": 0.5610237121582031, |
| "sparsity": 0.37977610031763714, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.16.self_attn.q_proj": { |
| "mse": 0.0001145480782724917, |
| "rmse": 0.010702713593873831, |
| "relative_error": 0.11975183055006922, |
| "max_error": 0.4313373565673828, |
| "sparsity": 0.39432207743326825, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.16.self_attn.k_proj": { |
| "mse": 0.0001037448673741892, |
| "rmse": 0.010185522439923697, |
| "relative_error": 0.11635053860916868, |
| "max_error": 0.4398765563964844, |
| "sparsity": 0.392624298731486, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.16.self_attn.v_proj": { |
| "mse": 0.00011915640061488375, |
| "rmse": 0.010915878371202371, |
| "relative_error": 0.1201440636287298, |
| "max_error": 0.3507728576660156, |
| "sparsity": 0.39522210756937665, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.16.self_attn.o_proj": { |
| "mse": 0.00014365943206939846, |
| "rmse": 0.01198580126939365, |
| "relative_error": 0.13777784807752752, |
| "max_error": 0.62738037109375, |
| "sparsity": 0.37827134132385254, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.16.mlp.gate_proj": { |
| "mse": 9.585679799783975e-05, |
| "rmse": 0.009790648497308018, |
| "relative_error": 0.11537675215561138, |
| "max_error": 0.4750213623046875, |
| "sparsity": 0.39047886927922565, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.16.mlp.up_proj": { |
| "mse": 0.00011986482422798872, |
| "rmse": 0.010948279509949895, |
| "relative_error": 0.11754970772120121, |
| "max_error": 0.3110523223876953, |
| "sparsity": 0.39113156000773114, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.16.mlp.down_proj": { |
| "mse": 0.00017797393957152963, |
| "rmse": 0.013340687372528059, |
| "relative_error": 0.14594216595449572, |
| "max_error": 0.7946376800537109, |
| "sparsity": 0.3795015613238017, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.17.self_attn.q_proj": { |
| "mse": 0.00012077944120392203, |
| "rmse": 0.010989970027435108, |
| "relative_error": 0.12809202973827413, |
| "max_error": 0.36618804931640625, |
| "sparsity": 0.3912224769592285, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.17.self_attn.k_proj": { |
| "mse": 0.00010802940232679248, |
| "rmse": 0.010393719369253361, |
| "relative_error": 0.12436472277530712, |
| "max_error": 0.3511810302734375, |
| "sparsity": 0.39031481742858887, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.17.self_attn.v_proj": { |
| "mse": 0.00015826526214368641, |
| "rmse": 0.01258035222653509, |
| "relative_error": 0.12655687426742085, |
| "max_error": 0.40193939208984375, |
| "sparsity": 0.3940921624501546, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.17.self_attn.o_proj": { |
| "mse": 0.00017517831292934716, |
| "rmse": 0.013235494434638517, |
| "relative_error": 0.13974743449718308, |
| "max_error": 0.38881492614746094, |
| "sparsity": 0.37921682993570965, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.17.mlp.gate_proj": { |
| "mse": 9.643776138545945e-05, |
| "rmse": 0.009820272979172191, |
| "relative_error": 0.11469761926663953, |
| "max_error": 0.49706268310546875, |
| "sparsity": 0.38984374205271405, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.17.mlp.up_proj": { |
| "mse": 0.00011767178511945531, |
| "rmse": 0.010847662656971561, |
| "relative_error": 0.1163379295193364, |
| "max_error": 0.2964920997619629, |
| "sparsity": 0.3904351790746053, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.17.mlp.down_proj": { |
| "mse": 0.00019267346942797303, |
| "rmse": 0.013880686922050113, |
| "relative_error": 0.1512596588641169, |
| "max_error": 0.8572845458984375, |
| "sparsity": 0.3779032230377197, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.18.self_attn.q_proj": { |
| "mse": 0.00010977545753121376, |
| "rmse": 0.010477378371100939, |
| "relative_error": 0.12804331106255126, |
| "max_error": 0.3930988311767578, |
| "sparsity": 0.3909265200297038, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.18.self_attn.k_proj": { |
| "mse": 9.381980635225773e-05, |
| "rmse": 0.009686062479266677, |
| "relative_error": 0.12292216202745253, |
| "max_error": 0.33354949951171875, |
| "sparsity": 0.3890887101491292, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.18.self_attn.v_proj": { |
| "mse": 0.0001693356316536665, |
| "rmse": 0.013012902506883946, |
| "relative_error": 0.12694288127788866, |
| "max_error": 0.5638923645019531, |
| "sparsity": 0.3930181662241618, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.18.self_attn.o_proj": { |
| "mse": 0.0002035392972175032, |
| "rmse": 0.01426671991795953, |
| "relative_error": 0.14500096842541085, |
| "max_error": 0.6965866088867188, |
| "sparsity": 0.3788052399953206, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.18.mlp.gate_proj": { |
| "mse": 9.147105447482318e-05, |
| "rmse": 0.009564050108339207, |
| "relative_error": 0.11277841748947105, |
| "max_error": 0.40427398681640625, |
| "sparsity": 0.38996777931849164, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.18.mlp.up_proj": { |
| "mse": 0.00011604203609749675, |
| "rmse": 0.010772280914341991, |
| "relative_error": 0.11471436240503491, |
| "max_error": 0.28387451171875, |
| "sparsity": 0.3903919855753581, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.18.mlp.down_proj": { |
| "mse": 0.00020416387997101992, |
| "rmse": 0.01428859265186813, |
| "relative_error": 0.15430695516149365, |
| "max_error": 1.15777587890625, |
| "sparsity": 0.3757058580716451, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.19.self_attn.q_proj": { |
| "mse": 0.00010159470548387617, |
| "rmse": 0.010079419898182443, |
| "relative_error": 0.13041868423058764, |
| "max_error": 0.30798912048339844, |
| "sparsity": 0.39068396886189777, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.19.self_attn.k_proj": { |
| "mse": 8.797407645033672e-05, |
| "rmse": 0.0093794496880327, |
| "relative_error": 0.12614134813623457, |
| "max_error": 0.2617206573486328, |
| "sparsity": 0.3890175024668376, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.19.self_attn.v_proj": { |
| "mse": 0.00019485123630147427, |
| "rmse": 0.013958912432617173, |
| "relative_error": 0.12632223635733977, |
| "max_error": 0.3944683074951172, |
| "sparsity": 0.39249809583028156, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.19.self_attn.o_proj": { |
| "mse": 0.0002515649248380214, |
| "rmse": 0.01586079836698082, |
| "relative_error": 0.15257642322392984, |
| "max_error": 1.296630859375, |
| "sparsity": 0.37897300720214844, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.19.mlp.gate_proj": { |
| "mse": 8.597908890806139e-05, |
| "rmse": 0.009272490976434617, |
| "relative_error": 0.11023672718946166, |
| "max_error": 0.343109130859375, |
| "sparsity": 0.39063696066538495, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.19.mlp.up_proj": { |
| "mse": 0.00011183493188582361, |
| "rmse": 0.010575203633302936, |
| "relative_error": 0.1119852751134503, |
| "max_error": 0.2927464246749878, |
| "sparsity": 0.39104316631952923, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.19.mlp.down_proj": { |
| "mse": 0.0001817988813854754, |
| "rmse": 0.013483281551071882, |
| "relative_error": 0.14447528253129333, |
| "max_error": 0.69659423828125, |
| "sparsity": 0.37729281187057495, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.20.self_attn.q_proj": { |
| "mse": 0.00010160417150473222, |
| "rmse": 0.010079889458954014, |
| "relative_error": 0.13637383986791093, |
| "max_error": 0.2930039167404175, |
| "sparsity": 0.3889306386311849, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.20.self_attn.k_proj": { |
| "mse": 9.039611904881895e-05, |
| "rmse": 0.009507687365959135, |
| "relative_error": 0.13288590446574086, |
| "max_error": 0.29769229888916016, |
| "sparsity": 0.38741453488667804, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.20.self_attn.v_proj": { |
| "mse": 0.0002304271620232612, |
| "rmse": 0.015179827470141457, |
| "relative_error": 0.12986079018176874, |
| "max_error": 0.49585485458374023, |
| "sparsity": 0.39007751146952313, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.20.self_attn.o_proj": { |
| "mse": 0.00018717959756031632, |
| "rmse": 0.013681359492401197, |
| "relative_error": 0.12420474417407759, |
| "max_error": 0.5940694808959961, |
| "sparsity": 0.3826877276102702, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.20.mlp.gate_proj": { |
| "mse": 7.991305756149814e-05, |
| "rmse": 0.008939410358714837, |
| "relative_error": 0.10672288257016303, |
| "max_error": 0.37044525146484375, |
| "sparsity": 0.391612708568573, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.20.mlp.up_proj": { |
| "mse": 0.00010522006778046489, |
| "rmse": 0.010257683353489953, |
| "relative_error": 0.10832224226656331, |
| "max_error": 0.25397777557373047, |
| "sparsity": 0.39186179637908936, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.20.mlp.down_proj": { |
| "mse": 0.00016892752319108695, |
| "rmse": 0.012997212131495236, |
| "relative_error": 0.1382850726778082, |
| "max_error": 0.695037841796875, |
| "sparsity": 0.37873925765355426, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.21.self_attn.q_proj": { |
| "mse": 8.775975584285334e-05, |
| "rmse": 0.009368017711493362, |
| "relative_error": 0.13237826106082362, |
| "max_error": 0.3493930697441101, |
| "sparsity": 0.3894302050272624, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.21.self_attn.k_proj": { |
| "mse": 7.635178917553276e-05, |
| "rmse": 0.008737951085668353, |
| "relative_error": 0.12911753812683308, |
| "max_error": 0.31544029712677, |
| "sparsity": 0.38779481252034503, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.21.self_attn.v_proj": { |
| "mse": 0.00024096955894492567, |
| "rmse": 0.015523194224930823, |
| "relative_error": 0.12822875184818627, |
| "max_error": 0.45868492126464844, |
| "sparsity": 0.3911566734313965, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.21.self_attn.o_proj": { |
| "mse": 0.00024130858946591616, |
| "rmse": 0.015534110514152915, |
| "relative_error": 0.13656648222552506, |
| "max_error": 0.6318511962890625, |
| "sparsity": 0.379139502843221, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.21.mlp.gate_proj": { |
| "mse": 7.701908180024475e-05, |
| "rmse": 0.008776051606516722, |
| "relative_error": 0.10519762249970133, |
| "max_error": 0.29144287109375, |
| "sparsity": 0.3916556040445964, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.21.mlp.up_proj": { |
| "mse": 0.00010363446926930919, |
| "rmse": 0.010180101633545177, |
| "relative_error": 0.1068552422607346, |
| "max_error": 0.2808570861816406, |
| "sparsity": 0.39187947909037274, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.21.mlp.down_proj": { |
| "mse": 0.00017394559108652174, |
| "rmse": 0.013188843432481931, |
| "relative_error": 0.13934798047636146, |
| "max_error": 0.806182861328125, |
| "sparsity": 0.3770946264266968, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.22.self_attn.q_proj": { |
| "mse": 6.889259384479374e-05, |
| "rmse": 0.00830015625423966, |
| "relative_error": 0.11974573199073539, |
| "max_error": 0.37679290771484375, |
| "sparsity": 0.39107751846313477, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.22.self_attn.k_proj": { |
| "mse": 6.384578591678292e-05, |
| "rmse": 0.00799035580664484, |
| "relative_error": 0.11802824517589042, |
| "max_error": 0.2800483703613281, |
| "sparsity": 0.38954949378967285, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.22.self_attn.v_proj": { |
| "mse": 0.0002198831207351759, |
| "rmse": 0.014828456451538572, |
| "relative_error": 0.12174802900805574, |
| "max_error": 0.40467071533203125, |
| "sparsity": 0.3936028480529785, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.22.self_attn.o_proj": { |
| "mse": 0.00026977865491062403, |
| "rmse": 0.016424940027611183, |
| "relative_error": 0.14430395391322967, |
| "max_error": 0.4811992645263672, |
| "sparsity": 0.37709347407023114, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.22.mlp.gate_proj": { |
| "mse": 8.570995851187035e-05, |
| "rmse": 0.009257967299135937, |
| "relative_error": 0.1092938200920434, |
| "max_error": 0.8341293334960938, |
| "sparsity": 0.3915645281473796, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.22.mlp.up_proj": { |
| "mse": 0.00011422947864048183, |
| "rmse": 0.010687819171397026, |
| "relative_error": 0.11077979518624878, |
| "max_error": 0.6844406127929688, |
| "sparsity": 0.39192960659662884, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.22.mlp.down_proj": { |
| "mse": 0.0001934868050739169, |
| "rmse": 0.013909953453333944, |
| "relative_error": 0.1468472238781438, |
| "max_error": 0.861419677734375, |
| "sparsity": 0.3759889403978984, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.23.self_attn.q_proj": { |
| "mse": 4.9034068069886416e-05, |
| "rmse": 0.00700243301073894, |
| "relative_error": 0.11425448677610671, |
| "max_error": 0.2026386260986328, |
| "sparsity": 0.3900686899820964, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.23.self_attn.k_proj": { |
| "mse": 4.728814383270219e-05, |
| "rmse": 0.006876637538266954, |
| "relative_error": 0.1134439127209608, |
| "max_error": 0.22553253173828125, |
| "sparsity": 0.3899570306142171, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.23.self_attn.v_proj": { |
| "mse": 0.00021581994951702654, |
| "rmse": 0.01469081173785256, |
| "relative_error": 0.11788501133986323, |
| "max_error": 0.37439680099487305, |
| "sparsity": 0.39295633633931476, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.23.self_attn.o_proj": { |
| "mse": 0.0002346659603063017, |
| "rmse": 0.015318810668792199, |
| "relative_error": 0.13143557617866453, |
| "max_error": 1.225677490234375, |
| "sparsity": 0.3775166670481364, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.23.mlp.gate_proj": { |
| "mse": 0.00010114066390087828, |
| "rmse": 0.010056871476800242, |
| "relative_error": 0.11570676856337891, |
| "max_error": 0.42010498046875, |
| "sparsity": 0.3895459572474162, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.23.mlp.up_proj": { |
| "mse": 0.0001254764065379277, |
| "rmse": 0.011201625173961486, |
| "relative_error": 0.11651321952774005, |
| "max_error": 0.36852455139160156, |
| "sparsity": 0.38976337512334186, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| }, |
| "model.text_model.layers.23.mlp.down_proj": { |
| "mse": 0.00048252849956043065, |
| "rmse": 0.021966531350225294, |
| "relative_error": 0.24617655752789447, |
| "max_error": 1.2671375274658203, |
| "sparsity": 0.3508457938830058, |
| "effective_bits": 9.0, |
| "sparse_nnz": 0, |
| "n_planes": 3, |
| "rescued_rows": 0 |
| } |
| }, |
| "summary": { |
| "method_name": "Broad-tritplane3", |
| "model_family": "image_text_to_text", |
| "selected_components": [ |
| "text_backbone", |
| "multimodal_connector" |
| ], |
| "quantized_modules": 169, |
| "quantized_params": 1631846400, |
| "quantized_fraction": 0.7263029115631221, |
| "avg_relative_error": 0.12359459755394277, |
| "avg_effective_bits": 9.0, |
| "full_model_effective_bits": 10.915879619058146, |
| "compression_ratio": 1.4657545299478603 |
| }, |
| "method_name": "Broad-tritplane3", |
| "model_family": "image_text_to_text", |
| "format_family": "tritplane_small", |
| "format_version": "1.0", |
| "total_packed_bytes": 1835827200, |
| "total_fp16_bytes": 3263692800, |
| "compression_ratio": 1.7777777777777777 |
| } |