Automatic Speech Recognition
Transformers
ternary-quant
quantization
ternary
audio
speech-to-text
whisper
Instructions to use AsadIsmail/whisper-medium-ternary with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use AsadIsmail/whisper-medium-ternary with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("automatic-speech-recognition", model="AsadIsmail/whisper-medium-ternary")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("AsadIsmail/whisper-medium-ternary", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "model_name": "openai/whisper-medium", | |
| "model_config": { | |
| "transformers_version": "5.5.3", | |
| "architectures": [ | |
| "WhisperForConditionalGeneration" | |
| ], | |
| "output_hidden_states": false, | |
| "return_dict": true, | |
| "dtype": "float32", | |
| "chunk_size_feed_forward": 0, | |
| "is_encoder_decoder": true, | |
| "id2label": { | |
| "0": "LABEL_0", | |
| "1": "LABEL_1" | |
| }, | |
| "label2id": { | |
| "LABEL_0": 0, | |
| "LABEL_1": 1 | |
| }, | |
| "problem_type": null, | |
| "vocab_size": 51865, | |
| "num_mel_bins": 80, | |
| "encoder_layers": 24, | |
| "encoder_attention_heads": 16, | |
| "decoder_layers": 24, | |
| "decoder_attention_heads": 16, | |
| "decoder_ffn_dim": 4096, | |
| "encoder_ffn_dim": 4096, | |
| "encoder_layerdrop": 0.0, | |
| "decoder_layerdrop": 0.0, | |
| "decoder_start_token_id": 50258, | |
| "use_cache": true, | |
| "activation_function": "gelu", | |
| "d_model": 1024, | |
| "dropout": 0.0, | |
| "attention_dropout": 0.0, | |
| "activation_dropout": 0.0, | |
| "init_std": 0.02, | |
| "scale_embedding": false, | |
| "max_source_positions": 1500, | |
| "max_target_positions": 448, | |
| "pad_token_id": 50257, | |
| "bos_token_id": 50257, | |
| "eos_token_id": 50257, | |
| "suppress_tokens": [ | |
| 1, | |
| 2, | |
| 7, | |
| 8, | |
| 9, | |
| 10, | |
| 14, | |
| 25, | |
| 26, | |
| 27, | |
| 28, | |
| 29, | |
| 31, | |
| 58, | |
| 59, | |
| 60, | |
| 61, | |
| 62, | |
| 63, | |
| 90, | |
| 91, | |
| 92, | |
| 93, | |
| 359, | |
| 503, | |
| 522, | |
| 542, | |
| 873, | |
| 893, | |
| 902, | |
| 918, | |
| 922, | |
| 931, | |
| 1350, | |
| 1853, | |
| 1982, | |
| 2460, | |
| 2627, | |
| 3246, | |
| 3253, | |
| 3268, | |
| 3536, | |
| 3846, | |
| 3961, | |
| 4183, | |
| 4667, | |
| 6585, | |
| 6647, | |
| 7273, | |
| 9061, | |
| 9383, | |
| 10428, | |
| 10929, | |
| 11938, | |
| 12033, | |
| 12331, | |
| 12562, | |
| 13793, | |
| 14157, | |
| 14635, | |
| 15265, | |
| 15618, | |
| 16553, | |
| 16604, | |
| 18362, | |
| 18956, | |
| 20075, | |
| 21675, | |
| 22520, | |
| 26130, | |
| 26161, | |
| 26435, | |
| 28279, | |
| 29464, | |
| 31650, | |
| 32302, | |
| 32470, | |
| 36865, | |
| 42863, | |
| 47425, | |
| 49870, | |
| 50254, | |
| 50258, | |
| 50358, | |
| 50359, | |
| 50360, | |
| 50361, | |
| 50362 | |
| ], | |
| "begin_suppress_tokens": [ | |
| 220, | |
| 50257 | |
| ], | |
| "use_weighted_layer_sum": false, | |
| "classifier_proj_size": 256, | |
| "apply_spec_augment": false, | |
| "mask_time_prob": 0.05, | |
| "mask_time_length": 10, | |
| "mask_time_min_masks": 2, | |
| "mask_feature_prob": 0.0, | |
| "mask_feature_length": 10, | |
| "mask_feature_min_masks": 0, | |
| "median_filter_width": 7, | |
| "tie_word_embeddings": true, | |
| "_name_or_path": "openai/whisper-medium", | |
| "forced_decoder_ids": [ | |
| [ | |
| 1, | |
| 50259 | |
| ], | |
| [ | |
| 2, | |
| 50359 | |
| ], | |
| [ | |
| 3, | |
| 50363 | |
| ] | |
| ], | |
| "model_type": "whisper", | |
| "output_attentions": false | |
| }, | |
| "quant_config": { | |
| "components": [ | |
| "decoder" | |
| ], | |
| "scheme": "tritplane3", | |
| "group_size": 32, | |
| "n_iter": 10, | |
| "salient_fraction": 0.0, | |
| "rescue_fraction": 0.0, | |
| "n_planes": 3, | |
| "allow_all_linear": false, | |
| "target_module_names": [ | |
| "Wqkv", | |
| "att_proj", | |
| "attn.proj", | |
| "attn.qkv", | |
| "c_attn", | |
| "c_fc", | |
| "c_proj", | |
| "dense", | |
| "dense_4h_to_h", | |
| "dense_h_to_4h", | |
| "down_proj", | |
| "fc1", | |
| "fc2", | |
| "ff_proj", | |
| "gate_proj", | |
| "gate_up_proj", | |
| "k", | |
| "k_proj", | |
| "linear", | |
| "o", | |
| "o_proj", | |
| "out_proj", | |
| "per_layer_input_gate", | |
| "per_layer_projection", | |
| "proj", | |
| "q", | |
| "q_proj", | |
| "qkv", | |
| "qkv_proj", | |
| "query_key_value", | |
| "up_proj", | |
| "v", | |
| "v_proj", | |
| "w1", | |
| "w2", | |
| "w3", | |
| "wi", | |
| "wi_0", | |
| "wi_1", | |
| "wo" | |
| ], | |
| "max_length": 160, | |
| "calibration_batch_size": 2, | |
| "calibration_prompts": null, | |
| "vlm_use_demo_image": true | |
| }, | |
| "plan": {}, | |
| "layer_info": { | |
| "model.decoder.layers.0.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.0.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.1.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.2.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.3.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.4.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.5.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.6.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.7.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.8.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.9.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.10.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.11.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.12.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.13.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.14.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.15.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.16.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.17.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.18.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.19.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.20.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.21.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.22.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.self_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.self_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.self_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.self_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.k_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.v_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.q_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.out_proj": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 1048576, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 1179648, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.fc1": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 4096, | |
| 1024 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| }, | |
| "model.decoder.layers.23.fc2": { | |
| "scheme": "tritplane_small_v1", | |
| "shape": [ | |
| 1024, | |
| 4096 | |
| ], | |
| "dtype": "torch.float32", | |
| "num_elements": 4194304, | |
| "n_planes": 3, | |
| "group_sizes": [ | |
| 32, | |
| 32, | |
| 32 | |
| ], | |
| "rescued_rows": 0, | |
| "stored_bytes": 4718592, | |
| "effective_bits": 9.0 | |
| } | |
| }, | |
| "stats": { | |
| "model.decoder.layers.0.self_attn.k_proj": { | |
| "mse": 8.22391393739963e-06, | |
| "rmse": 0.0028677367273513147, | |
| "relative_error": 0.11225825031610676, | |
| "max_error": 0.07249832153320312, | |
| "sparsity": 0.39663855234781903, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.self_attn.v_proj": { | |
| "mse": 2.537128693802515e-06, | |
| "rmse": 0.001592836681459375, | |
| "relative_error": 0.1253849898161065, | |
| "max_error": 0.052634596824645996, | |
| "sparsity": 0.3911902109781901, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.self_attn.q_proj": { | |
| "mse": 7.867356544011272e-06, | |
| "rmse": 0.002804880843103905, | |
| "relative_error": 0.11485303222807783, | |
| "max_error": 0.12257003784179688, | |
| "sparsity": 0.3965638478597005, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.self_attn.out_proj": { | |
| "mse": 2.3530305952590425e-06, | |
| "rmse": 0.0015339591243768663, | |
| "relative_error": 0.12316348329498344, | |
| "max_error": 0.05019688606262207, | |
| "sparsity": 0.3834683100382487, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.k_proj": { | |
| "mse": 2.932305505964905e-06, | |
| "rmse": 0.0017123975899203156, | |
| "relative_error": 0.14878440781545277, | |
| "max_error": 0.052741289138793945, | |
| "sparsity": 0.38413047790527344, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.v_proj": { | |
| "mse": 2.9527541300922167e-06, | |
| "rmse": 0.0017183579749552235, | |
| "relative_error": 0.15262952638414398, | |
| "max_error": 0.04509568214416504, | |
| "sparsity": 0.3845523198445638, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.q_proj": { | |
| "mse": 1.7530026070744498e-06, | |
| "rmse": 0.001324010047950713, | |
| "relative_error": 0.11584667248381872, | |
| "max_error": 0.0369831919670105, | |
| "sparsity": 0.38655567169189453, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.encoder_attn.out_proj": { | |
| "mse": 3.997231488028774e-06, | |
| "rmse": 0.0019993077522054412, | |
| "relative_error": 0.1804483932496295, | |
| "max_error": 0.04954719543457031, | |
| "sparsity": 0.3609612782796224, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.fc1": { | |
| "mse": 3.4595873330545146e-06, | |
| "rmse": 0.0018599965949040106, | |
| "relative_error": 0.12480082603415626, | |
| "max_error": 0.06718039512634277, | |
| "sparsity": 0.39367198944091797, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.0.fc2": { | |
| "mse": 9.965755452867597e-06, | |
| "rmse": 0.0031568584784351034, | |
| "relative_error": 0.258659206797803, | |
| "max_error": 0.10831642150878906, | |
| "sparsity": 0.3432497978210449, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.self_attn.k_proj": { | |
| "mse": 2.03306717594387e-06, | |
| "rmse": 0.0014258566463511927, | |
| "relative_error": 0.10631741618956413, | |
| "max_error": 0.050121307373046875, | |
| "sparsity": 0.38936614990234375, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.self_attn.v_proj": { | |
| "mse": 1.2314576451899484e-06, | |
| "rmse": 0.0011097106132636329, | |
| "relative_error": 0.12502401296497923, | |
| "max_error": 0.036118507385253906, | |
| "sparsity": 0.39406808217366535, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.self_attn.q_proj": { | |
| "mse": 2.091504939016886e-06, | |
| "rmse": 0.0014462036298588405, | |
| "relative_error": 0.114342204814357, | |
| "max_error": 0.052300095558166504, | |
| "sparsity": 0.3865979512532552, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.self_attn.out_proj": { | |
| "mse": 2.3585996586916735e-06, | |
| "rmse": 0.0015357733096689997, | |
| "relative_error": 0.16983263226843545, | |
| "max_error": 0.055203378200531006, | |
| "sparsity": 0.36684226989746094, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.k_proj": { | |
| "mse": 2.569649268480134e-06, | |
| "rmse": 0.0016030125603001787, | |
| "relative_error": 0.145535704383827, | |
| "max_error": 0.061995506286621094, | |
| "sparsity": 0.3825670878092448, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.v_proj": { | |
| "mse": 2.6175509901804617e-06, | |
| "rmse": 0.001617884727099079, | |
| "relative_error": 0.152717885751071, | |
| "max_error": 0.05068695545196533, | |
| "sparsity": 0.385009765625, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.q_proj": { | |
| "mse": 1.3694378822037834e-06, | |
| "rmse": 0.0011702298416139383, | |
| "relative_error": 0.10105443997993861, | |
| "max_error": 0.051283836364746094, | |
| "sparsity": 0.3925361633300781, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.encoder_attn.out_proj": { | |
| "mse": 3.2205025490839034e-06, | |
| "rmse": 0.001794575868857013, | |
| "relative_error": 0.1678963406077039, | |
| "max_error": 0.07158446311950684, | |
| "sparsity": 0.3641344706217448, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.fc1": { | |
| "mse": 2.8058025236532558e-06, | |
| "rmse": 0.001675052991297068, | |
| "relative_error": 0.12601454584557661, | |
| "max_error": 0.08018875122070312, | |
| "sparsity": 0.38286805152893066, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.1.fc2": { | |
| "mse": 7.15729493094841e-06, | |
| "rmse": 0.0026753121184169165, | |
| "relative_error": 0.2385123906472085, | |
| "max_error": 0.1420459747314453, | |
| "sparsity": 0.37098320325215656, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.self_attn.k_proj": { | |
| "mse": 1.408790922141634e-06, | |
| "rmse": 0.0011869249858949106, | |
| "relative_error": 0.15586319641563348, | |
| "max_error": 0.05228686332702637, | |
| "sparsity": 0.38851292928059894, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.self_attn.v_proj": { | |
| "mse": 2.4211458367062733e-06, | |
| "rmse": 0.0015560031608921215, | |
| "relative_error": 0.19859374281883027, | |
| "max_error": 0.0744476318359375, | |
| "sparsity": 0.39120133717854816, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.self_attn.q_proj": { | |
| "mse": 1.3298435987962876e-06, | |
| "rmse": 0.0011531884489519861, | |
| "relative_error": 0.1712732837930552, | |
| "max_error": 0.044757843017578125, | |
| "sparsity": 0.38156604766845703, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.self_attn.out_proj": { | |
| "mse": 3.029804929610691e-06, | |
| "rmse": 0.0017406334851457646, | |
| "relative_error": 0.2584208819885451, | |
| "max_error": 0.07974696159362793, | |
| "sparsity": 0.35305945078531903, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.k_proj": { | |
| "mse": 2.2631470528722275e-06, | |
| "rmse": 0.001504375967925647, | |
| "relative_error": 0.1425945197962898, | |
| "max_error": 0.04911303520202637, | |
| "sparsity": 0.3848470052083333, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.v_proj": { | |
| "mse": 3.066921181016369e-06, | |
| "rmse": 0.0017512627390018805, | |
| "relative_error": 0.15523498710637637, | |
| "max_error": 0.043721556663513184, | |
| "sparsity": 0.3850396474202474, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.q_proj": { | |
| "mse": 1.306824174207577e-06, | |
| "rmse": 0.0011431641064202362, | |
| "relative_error": 0.10384071954394945, | |
| "max_error": 0.0584259033203125, | |
| "sparsity": 0.39062023162841797, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.encoder_attn.out_proj": { | |
| "mse": 2.39322457673552e-06, | |
| "rmse": 0.0015470050344893902, | |
| "relative_error": 0.13837125067483497, | |
| "max_error": 0.03503692150115967, | |
| "sparsity": 0.3704687754313151, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.fc1": { | |
| "mse": 2.6717477794591105e-06, | |
| "rmse": 0.0016345481881728388, | |
| "relative_error": 0.13424853534381614, | |
| "max_error": 0.12715303897857666, | |
| "sparsity": 0.37990784645080566, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.2.fc2": { | |
| "mse": 6.862672307761386e-06, | |
| "rmse": 0.002619670266991895, | |
| "relative_error": 0.24381065264040552, | |
| "max_error": 0.22676372528076172, | |
| "sparsity": 0.3882887363433838, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.self_attn.k_proj": { | |
| "mse": 1.7996961787503096e-06, | |
| "rmse": 0.0013415275542270123, | |
| "relative_error": 0.1192658384835929, | |
| "max_error": 0.04256439208984375, | |
| "sparsity": 0.37576770782470703, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.self_attn.v_proj": { | |
| "mse": 2.0297898117860314e-06, | |
| "rmse": 0.0014247069213652438, | |
| "relative_error": 0.13746902832804708, | |
| "max_error": 0.03883171081542969, | |
| "sparsity": 0.38527584075927734, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.self_attn.q_proj": { | |
| "mse": 2.1074170035717543e-06, | |
| "rmse": 0.0014516945283260367, | |
| "relative_error": 0.11590249312665402, | |
| "max_error": 0.07066059112548828, | |
| "sparsity": 0.38370641072591144, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.self_attn.out_proj": { | |
| "mse": 1.333709519713011e-06, | |
| "rmse": 0.0011548634203718685, | |
| "relative_error": 0.117272895602355, | |
| "max_error": 0.07680273056030273, | |
| "sparsity": 0.4009917577107747, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.k_proj": { | |
| "mse": 2.471836751283263e-06, | |
| "rmse": 0.001572207604384123, | |
| "relative_error": 0.14458506102228255, | |
| "max_error": 0.04527485370635986, | |
| "sparsity": 0.3834635416666667, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.v_proj": { | |
| "mse": 3.16734735861246e-06, | |
| "rmse": 0.0017797042896538907, | |
| "relative_error": 0.15301164247019403, | |
| "max_error": 0.059139907360076904, | |
| "sparsity": 0.3846127192179362, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.q_proj": { | |
| "mse": 1.4525229516948457e-06, | |
| "rmse": 0.001205206601249282, | |
| "relative_error": 0.10425491084889804, | |
| "max_error": 0.09468531608581543, | |
| "sparsity": 0.3902708689371745, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.encoder_attn.out_proj": { | |
| "mse": 2.678065584404976e-06, | |
| "rmse": 0.0016364796315276814, | |
| "relative_error": 0.1430544824579208, | |
| "max_error": 0.051494598388671875, | |
| "sparsity": 0.37117163340250653, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.fc1": { | |
| "mse": 2.3618308659933973e-06, | |
| "rmse": 0.0015368249301704463, | |
| "relative_error": 0.1245729986149135, | |
| "max_error": 0.05836009979248047, | |
| "sparsity": 0.37219882011413574, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.3.fc2": { | |
| "mse": 1.5611518392688595e-05, | |
| "rmse": 0.003951141403783037, | |
| "relative_error": 0.3720975771798556, | |
| "max_error": 0.5879669189453125, | |
| "sparsity": 0.38265403111775714, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.self_attn.k_proj": { | |
| "mse": 2.8701028895739e-06, | |
| "rmse": 0.0016941378012351592, | |
| "relative_error": 0.11716970889271452, | |
| "max_error": 0.055669307708740234, | |
| "sparsity": 0.3828910191853841, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.self_attn.v_proj": { | |
| "mse": 1.9905580757040298e-06, | |
| "rmse": 0.0014108713887892226, | |
| "relative_error": 0.12301872679402027, | |
| "max_error": 0.053325772285461426, | |
| "sparsity": 0.3898334503173828, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.self_attn.q_proj": { | |
| "mse": 2.990354914800264e-06, | |
| "rmse": 0.0017292642697980733, | |
| "relative_error": 0.11543029947746948, | |
| "max_error": 0.06011366844177246, | |
| "sparsity": 0.3869177500406901, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.self_attn.out_proj": { | |
| "mse": 1.7686877527012257e-06, | |
| "rmse": 0.001329920205388739, | |
| "relative_error": 0.1234683605164969, | |
| "max_error": 0.06157732009887695, | |
| "sparsity": 0.3810313542683919, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.k_proj": { | |
| "mse": 2.4499702249158872e-06, | |
| "rmse": 0.0015652380729192243, | |
| "relative_error": 0.14753199522550947, | |
| "max_error": 0.04569506645202637, | |
| "sparsity": 0.38436158498128253, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.v_proj": { | |
| "mse": 3.2714881399442675e-06, | |
| "rmse": 0.0018087255568339459, | |
| "relative_error": 0.15390755351674237, | |
| "max_error": 0.04762768745422363, | |
| "sparsity": 0.38466930389404297, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.q_proj": { | |
| "mse": 1.2864924201494432e-06, | |
| "rmse": 0.001134236492160891, | |
| "relative_error": 0.10177971901165225, | |
| "max_error": 0.0303804874420166, | |
| "sparsity": 0.39114030202229816, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.encoder_attn.out_proj": { | |
| "mse": 2.4311630113516003e-06, | |
| "rmse": 0.0015592187182533438, | |
| "relative_error": 0.13463845742763503, | |
| "max_error": 0.04355788230895996, | |
| "sparsity": 0.3733196258544922, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.fc1": { | |
| "mse": 2.469627816026332e-06, | |
| "rmse": 0.001571504952593638, | |
| "relative_error": 0.12524985734406285, | |
| "max_error": 0.08176088333129883, | |
| "sparsity": 0.3696654637654622, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.4.fc2": { | |
| "mse": 5.365253400668735e-06, | |
| "rmse": 0.002316301664435946, | |
| "relative_error": 0.21985876196410048, | |
| "max_error": 0.16083449125289917, | |
| "sparsity": 0.3907614549001058, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.self_attn.k_proj": { | |
| "mse": 2.7977457648376003e-06, | |
| "rmse": 0.0016726463358515452, | |
| "relative_error": 0.10678882412118908, | |
| "max_error": 0.06360578536987305, | |
| "sparsity": 0.3896319071451823, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.self_attn.v_proj": { | |
| "mse": 2.054051037703175e-06, | |
| "rmse": 0.0014331960918531612, | |
| "relative_error": 0.11478326691502795, | |
| "max_error": 0.04179668426513672, | |
| "sparsity": 0.3933906555175781, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.self_attn.q_proj": { | |
| "mse": 2.887436266973964e-06, | |
| "rmse": 0.0016992457935725378, | |
| "relative_error": 0.10747942741596392, | |
| "max_error": 0.07212746143341064, | |
| "sparsity": 0.39190673828125, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.self_attn.out_proj": { | |
| "mse": 2.422082388875424e-06, | |
| "rmse": 0.0015563040798235491, | |
| "relative_error": 0.12821936986008606, | |
| "max_error": 0.03042781352996826, | |
| "sparsity": 0.37738768259684247, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.k_proj": { | |
| "mse": 2.1594835288851755e-06, | |
| "rmse": 0.0014695181281240376, | |
| "relative_error": 0.14820830447000122, | |
| "max_error": 0.05715823173522949, | |
| "sparsity": 0.38480599721272785, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.v_proj": { | |
| "mse": 2.402921154498472e-06, | |
| "rmse": 0.0015501358503365026, | |
| "relative_error": 0.15172739547983966, | |
| "max_error": 0.049695730209350586, | |
| "sparsity": 0.38500118255615234, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.q_proj": { | |
| "mse": 1.2580545671880827e-06, | |
| "rmse": 0.0011216303166320365, | |
| "relative_error": 0.10791561656075024, | |
| "max_error": 0.07042884826660156, | |
| "sparsity": 0.39124329884847003, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.encoder_attn.out_proj": { | |
| "mse": 2.0925813259964343e-06, | |
| "rmse": 0.0014465757242524272, | |
| "relative_error": 0.1422147514369969, | |
| "max_error": 0.05846059322357178, | |
| "sparsity": 0.37059179941813153, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.fc1": { | |
| "mse": 3.071619175898377e-06, | |
| "rmse": 0.0017526035421333533, | |
| "relative_error": 0.12686562610632215, | |
| "max_error": 0.0797877311706543, | |
| "sparsity": 0.3853236834208171, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.5.fc2": { | |
| "mse": 5.129960754857166e-06, | |
| "rmse": 0.002264941666987732, | |
| "relative_error": 0.1860462585742565, | |
| "max_error": 0.05859053134918213, | |
| "sparsity": 0.38653842608133954, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.self_attn.k_proj": { | |
| "mse": 2.7227015380049124e-06, | |
| "rmse": 0.001650061070992499, | |
| "relative_error": 0.1084165589968581, | |
| "max_error": 0.05523967742919922, | |
| "sparsity": 0.3882598876953125, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.self_attn.v_proj": { | |
| "mse": 1.960485178642557e-06, | |
| "rmse": 0.0014001732673646348, | |
| "relative_error": 0.11180013694606943, | |
| "max_error": 0.06603431701660156, | |
| "sparsity": 0.3924814860026042, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.self_attn.q_proj": { | |
| "mse": 2.7193116238777293e-06, | |
| "rmse": 0.0016490335423749662, | |
| "relative_error": 0.10691578744139109, | |
| "max_error": 0.04374641180038452, | |
| "sparsity": 0.3899396260579427, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.self_attn.out_proj": { | |
| "mse": 2.0261477402527817e-06, | |
| "rmse": 0.0014234281647672923, | |
| "relative_error": 0.11932234164492085, | |
| "max_error": 0.05954170227050781, | |
| "sparsity": 0.3812958399454753, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.k_proj": { | |
| "mse": 1.914561835292261e-06, | |
| "rmse": 0.0013836769259087402, | |
| "relative_error": 0.1409572228796846, | |
| "max_error": 0.05499941110610962, | |
| "sparsity": 0.3855908711751302, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.v_proj": { | |
| "mse": 2.064477939711651e-06, | |
| "rmse": 0.0014368291268315975, | |
| "relative_error": 0.15090229510718225, | |
| "max_error": 0.041681647300720215, | |
| "sparsity": 0.38470935821533203, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.q_proj": { | |
| "mse": 1.2734592473861994e-06, | |
| "rmse": 0.0011284765160986733, | |
| "relative_error": 0.10325489365368369, | |
| "max_error": 0.03181350231170654, | |
| "sparsity": 0.3910401662190755, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.encoder_attn.out_proj": { | |
| "mse": 1.8945763713418273e-06, | |
| "rmse": 0.0013764361123357043, | |
| "relative_error": 0.1406672765822477, | |
| "max_error": 0.03437471389770508, | |
| "sparsity": 0.37631797790527344, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.fc1": { | |
| "mse": 3.0933047128200997e-06, | |
| "rmse": 0.0017587793246510773, | |
| "relative_error": 0.12332660056166439, | |
| "max_error": 0.09690666198730469, | |
| "sparsity": 0.3895715077718099, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.6.fc2": { | |
| "mse": 5.023699941375526e-06, | |
| "rmse": 0.002241361180482861, | |
| "relative_error": 0.17801352059730607, | |
| "max_error": 0.07062911987304688, | |
| "sparsity": 0.38503503799438477, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.self_attn.k_proj": { | |
| "mse": 2.669979267011513e-06, | |
| "rmse": 0.001634007119633055, | |
| "relative_error": 0.10356912057004246, | |
| "max_error": 0.05674147605895996, | |
| "sparsity": 0.39308420817057294, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.self_attn.v_proj": { | |
| "mse": 1.693527110546711e-06, | |
| "rmse": 0.0013013558739048711, | |
| "relative_error": 0.10513660505308145, | |
| "max_error": 0.04603254795074463, | |
| "sparsity": 0.39510377248128253, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.self_attn.q_proj": { | |
| "mse": 2.754891966105788e-06, | |
| "rmse": 0.0016597867230779345, | |
| "relative_error": 0.10414344160911279, | |
| "max_error": 0.0881616473197937, | |
| "sparsity": 0.3932339350382487, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.self_attn.out_proj": { | |
| "mse": 2.3079376205714652e-06, | |
| "rmse": 0.0015191897908330826, | |
| "relative_error": 0.12670858213626052, | |
| "max_error": 0.0412135124206543, | |
| "sparsity": 0.37942059834798175, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.k_proj": { | |
| "mse": 3.342863919897354e-06, | |
| "rmse": 0.0018283500539823753, | |
| "relative_error": 0.15063672352428492, | |
| "max_error": 0.0661466121673584, | |
| "sparsity": 0.3860619862874349, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.v_proj": { | |
| "mse": 2.972983565996401e-06, | |
| "rmse": 0.00172423419696873, | |
| "relative_error": 0.15345657818512687, | |
| "max_error": 0.21555328369140625, | |
| "sparsity": 0.38444073994954425, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.q_proj": { | |
| "mse": 1.6744468211982166e-06, | |
| "rmse": 0.0012940041812908552, | |
| "relative_error": 0.10236945613803666, | |
| "max_error": 0.060260772705078125, | |
| "sparsity": 0.39227835337320965, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.encoder_attn.out_proj": { | |
| "mse": 2.9717300549236825e-06, | |
| "rmse": 0.0017238706607294186, | |
| "relative_error": 0.15377582478905313, | |
| "max_error": 0.08759880065917969, | |
| "sparsity": 0.3660281499226888, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.fc1": { | |
| "mse": 2.8657561870204518e-06, | |
| "rmse": 0.001692854449449347, | |
| "relative_error": 0.11476676538134392, | |
| "max_error": 0.1609211564064026, | |
| "sparsity": 0.39171473185221356, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.7.fc2": { | |
| "mse": 5.369829978008056e-06, | |
| "rmse": 0.0023172893600083818, | |
| "relative_error": 0.17915047105444631, | |
| "max_error": 0.06115150451660156, | |
| "sparsity": 0.3791286150614421, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.self_attn.k_proj": { | |
| "mse": 2.874462779800524e-06, | |
| "rmse": 0.001695424070785986, | |
| "relative_error": 0.10408217734095601, | |
| "max_error": 0.04898780584335327, | |
| "sparsity": 0.39197508494059247, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.self_attn.v_proj": { | |
| "mse": 1.954801291503827e-06, | |
| "rmse": 0.0013981420855921, | |
| "relative_error": 0.10715554077548149, | |
| "max_error": 0.04911994934082031, | |
| "sparsity": 0.39489396413167316, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.self_attn.q_proj": { | |
| "mse": 2.988884716614848e-06, | |
| "rmse": 0.0017288391239831565, | |
| "relative_error": 0.10488478135736144, | |
| "max_error": 0.052776336669921875, | |
| "sparsity": 0.3927752176920573, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.self_attn.out_proj": { | |
| "mse": 2.2270126009971136e-06, | |
| "rmse": 0.0014923178619171969, | |
| "relative_error": 0.11837970535523554, | |
| "max_error": 0.04682779312133789, | |
| "sparsity": 0.38154157002766925, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.k_proj": { | |
| "mse": 4.145021193835419e-06, | |
| "rmse": 0.002035932512102358, | |
| "relative_error": 0.14794230558981988, | |
| "max_error": 0.06369626522064209, | |
| "sparsity": 0.38465213775634766, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.v_proj": { | |
| "mse": 3.111481191808707e-06, | |
| "rmse": 0.0017639391122736372, | |
| "relative_error": 0.15156701666066377, | |
| "max_error": 0.04685235023498535, | |
| "sparsity": 0.38491566975911456, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.q_proj": { | |
| "mse": 2.2250369511311874e-06, | |
| "rmse": 0.0014916557750135207, | |
| "relative_error": 0.10434207297549174, | |
| "max_error": 0.050441741943359375, | |
| "sparsity": 0.39132245381673175, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.encoder_attn.out_proj": { | |
| "mse": 3.1934757771523437e-06, | |
| "rmse": 0.0017870298758421313, | |
| "relative_error": 0.15283987169747038, | |
| "max_error": 0.08969151973724365, | |
| "sparsity": 0.3676007588704427, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.fc1": { | |
| "mse": 2.79184632745455e-06, | |
| "rmse": 0.0016708819011092765, | |
| "relative_error": 0.11036536299564526, | |
| "max_error": 0.12213349342346191, | |
| "sparsity": 0.3927458127339681, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.8.fc2": { | |
| "mse": 5.2020336624991614e-06, | |
| "rmse": 0.002280796716610045, | |
| "relative_error": 0.17299622967154382, | |
| "max_error": 0.07349264621734619, | |
| "sparsity": 0.3765496412913005, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.self_attn.k_proj": { | |
| "mse": 3.0386931939574424e-06, | |
| "rmse": 0.0017431847847997762, | |
| "relative_error": 0.1058218625977078, | |
| "max_error": 0.0761750340461731, | |
| "sparsity": 0.3904520670572917, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.self_attn.v_proj": { | |
| "mse": 2.23638562601991e-06, | |
| "rmse": 0.0014954549896335596, | |
| "relative_error": 0.11097779406996414, | |
| "max_error": 0.041104793548583984, | |
| "sparsity": 0.3938833872477214, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.self_attn.q_proj": { | |
| "mse": 3.189991730323527e-06, | |
| "rmse": 0.0017860547948827122, | |
| "relative_error": 0.10832728289648673, | |
| "max_error": 0.06943631172180176, | |
| "sparsity": 0.3921623229980469, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.self_attn.out_proj": { | |
| "mse": 2.696985120564932e-06, | |
| "rmse": 0.001642250017678469, | |
| "relative_error": 0.1281785123233537, | |
| "max_error": 0.18055415153503418, | |
| "sparsity": 0.37871678670247394, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.k_proj": { | |
| "mse": 3.2500684028491378e-06, | |
| "rmse": 0.0018027946091690916, | |
| "relative_error": 0.14403063462528215, | |
| "max_error": 0.056221961975097656, | |
| "sparsity": 0.3845011393229167, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.v_proj": { | |
| "mse": 3.189758899679873e-06, | |
| "rmse": 0.0017859896135419917, | |
| "relative_error": 0.15269143882686897, | |
| "max_error": 0.04081469774246216, | |
| "sparsity": 0.38484986623128253, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.q_proj": { | |
| "mse": 1.9499902919051237e-06, | |
| "rmse": 0.001396420528316998, | |
| "relative_error": 0.10472704022264365, | |
| "max_error": 0.03749656677246094, | |
| "sparsity": 0.3913602828979492, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.encoder_attn.out_proj": { | |
| "mse": 2.9134057513147127e-06, | |
| "rmse": 0.0017068701624068283, | |
| "relative_error": 0.14487531428174485, | |
| "max_error": 0.10330677032470703, | |
| "sparsity": 0.3698616027832031, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.fc1": { | |
| "mse": 2.8058291263732826e-06, | |
| "rmse": 0.0016750609321374798, | |
| "relative_error": 0.1102062185498671, | |
| "max_error": 0.19041013717651367, | |
| "sparsity": 0.3934587637583415, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.9.fc2": { | |
| "mse": 5.31405476067448e-06, | |
| "rmse": 0.002305223364594954, | |
| "relative_error": 0.17306741105103132, | |
| "max_error": 0.0740041732788086, | |
| "sparsity": 0.37359078725179035, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.self_attn.k_proj": { | |
| "mse": 2.8235283480171347e-06, | |
| "rmse": 0.0016803357843053676, | |
| "relative_error": 0.10581116696721167, | |
| "max_error": 0.12559711933135986, | |
| "sparsity": 0.39281590779622394, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.self_attn.v_proj": { | |
| "mse": 2.129301947206841e-06, | |
| "rmse": 0.0014592127833893318, | |
| "relative_error": 0.10461520351896639, | |
| "max_error": 0.06537044048309326, | |
| "sparsity": 0.39534536997477215, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.self_attn.q_proj": { | |
| "mse": 2.9481207093340345e-06, | |
| "rmse": 0.0017170092339105327, | |
| "relative_error": 0.10566365640016753, | |
| "max_error": 0.0826263427734375, | |
| "sparsity": 0.3939990997314453, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.self_attn.out_proj": { | |
| "mse": 3.0388287086680066e-06, | |
| "rmse": 0.0017432236542302903, | |
| "relative_error": 0.12941463793508615, | |
| "max_error": 0.04368305206298828, | |
| "sparsity": 0.37840938568115234, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.k_proj": { | |
| "mse": 4.262691163603449e-06, | |
| "rmse": 0.002064628577638954, | |
| "relative_error": 0.1455730293477049, | |
| "max_error": 0.06619501113891602, | |
| "sparsity": 0.3850278854370117, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.v_proj": { | |
| "mse": 4.141321824135957e-06, | |
| "rmse": 0.002035023789574942, | |
| "relative_error": 0.15507668501761301, | |
| "max_error": 0.046338558197021484, | |
| "sparsity": 0.38466326395670575, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.q_proj": { | |
| "mse": 2.3088205125532113e-06, | |
| "rmse": 0.0015194803429308362, | |
| "relative_error": 0.10376620421198642, | |
| "max_error": 0.04837995767593384, | |
| "sparsity": 0.3922373453776042, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.encoder_attn.out_proj": { | |
| "mse": 3.749622464965796e-06, | |
| "rmse": 0.0019363941915234604, | |
| "relative_error": 0.15077165945466575, | |
| "max_error": 0.043268442153930664, | |
| "sparsity": 0.3687426249186198, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.fc1": { | |
| "mse": 2.742124252108624e-06, | |
| "rmse": 0.0016559360652237225, | |
| "relative_error": 0.10881992614733726, | |
| "max_error": 0.09426462650299072, | |
| "sparsity": 0.39169470469156903, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.10.fc2": { | |
| "mse": 5.248308752925368e-06, | |
| "rmse": 0.0022909187573821486, | |
| "relative_error": 0.16801593877256882, | |
| "max_error": 0.09567546844482422, | |
| "sparsity": 0.37460168202718097, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.self_attn.k_proj": { | |
| "mse": 3.0592714210797567e-06, | |
| "rmse": 0.0017490773056328176, | |
| "relative_error": 0.10734568285249818, | |
| "max_error": 0.09254008531570435, | |
| "sparsity": 0.3920777638753255, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.self_attn.v_proj": { | |
| "mse": 2.1376101813075365e-06, | |
| "rmse": 0.0014620568324478828, | |
| "relative_error": 0.10559583271255113, | |
| "max_error": 0.029812395572662354, | |
| "sparsity": 0.3952624003092448, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.self_attn.q_proj": { | |
| "mse": 3.1045001378515735e-06, | |
| "rmse": 0.0017619591759889255, | |
| "relative_error": 0.10693716839240887, | |
| "max_error": 0.090118408203125, | |
| "sparsity": 0.3924741744995117, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.self_attn.out_proj": { | |
| "mse": 3.1066649626154685e-06, | |
| "rmse": 0.0017625733921217206, | |
| "relative_error": 0.13225657965413115, | |
| "max_error": 0.06195068359375, | |
| "sparsity": 0.3758252461751302, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.k_proj": { | |
| "mse": 4.285246177460067e-06, | |
| "rmse": 0.0020700836160551747, | |
| "relative_error": 0.14720766834565957, | |
| "max_error": 0.06798171997070312, | |
| "sparsity": 0.3851782480875651, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.v_proj": { | |
| "mse": 4.828083092434099e-06, | |
| "rmse": 0.0021972899427326604, | |
| "relative_error": 0.1586933413077182, | |
| "max_error": 0.05201244354248047, | |
| "sparsity": 0.3848873774210612, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.q_proj": { | |
| "mse": 2.283047706441721e-06, | |
| "rmse": 0.001510975746476998, | |
| "relative_error": 0.10365660136115314, | |
| "max_error": 0.048856914043426514, | |
| "sparsity": 0.3916358947753906, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.encoder_attn.out_proj": { | |
| "mse": 4.28708926847321e-06, | |
| "rmse": 0.00207052874128161, | |
| "relative_error": 0.15415400301442833, | |
| "max_error": 0.04470396041870117, | |
| "sparsity": 0.3667583465576172, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.fc1": { | |
| "mse": 2.7612836674961727e-06, | |
| "rmse": 0.001661711066189358, | |
| "relative_error": 0.10893671218267384, | |
| "max_error": 0.1811065673828125, | |
| "sparsity": 0.3911479314168294, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.11.fc2": { | |
| "mse": 5.29205226484919e-06, | |
| "rmse": 0.002300446101270184, | |
| "relative_error": 0.16487893138147464, | |
| "max_error": 0.11721420288085938, | |
| "sparsity": 0.373733123143514, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.self_attn.k_proj": { | |
| "mse": 2.9784519028908107e-06, | |
| "rmse": 0.0017258191976249455, | |
| "relative_error": 0.10706666687076272, | |
| "max_error": 0.10212254524230957, | |
| "sparsity": 0.3907483418782552, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.self_attn.v_proj": { | |
| "mse": 2.395839828750468e-06, | |
| "rmse": 0.0015478500666248225, | |
| "relative_error": 0.10913041404713451, | |
| "max_error": 0.05898064374923706, | |
| "sparsity": 0.39325809478759766, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.self_attn.q_proj": { | |
| "mse": 3.0919227356207557e-06, | |
| "rmse": 0.0017583864011134628, | |
| "relative_error": 0.10877376011568239, | |
| "max_error": 0.09144771099090576, | |
| "sparsity": 0.39208539326985675, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.self_attn.out_proj": { | |
| "mse": 3.2778616514406167e-06, | |
| "rmse": 0.0018104865786413929, | |
| "relative_error": 0.13259861849468188, | |
| "max_error": 0.05882430076599121, | |
| "sparsity": 0.3744872411092122, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.k_proj": { | |
| "mse": 4.489171260502189e-06, | |
| "rmse": 0.002118766447842279, | |
| "relative_error": 0.14508222072774984, | |
| "max_error": 0.06339359283447266, | |
| "sparsity": 0.3842233022054036, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.v_proj": { | |
| "mse": 4.473969056562055e-06, | |
| "rmse": 0.0021151758925824714, | |
| "relative_error": 0.1564127825375384, | |
| "max_error": 0.05270886421203613, | |
| "sparsity": 0.38474464416503906, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.q_proj": { | |
| "mse": 2.4208638933487236e-06, | |
| "rmse": 0.0015559125596731726, | |
| "relative_error": 0.10374680012869537, | |
| "max_error": 0.03216981887817383, | |
| "sparsity": 0.39106400807698566, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.encoder_attn.out_proj": { | |
| "mse": 4.03118428948801e-06, | |
| "rmse": 0.002007780936628299, | |
| "relative_error": 0.15214832136551018, | |
| "max_error": 0.05477547645568848, | |
| "sparsity": 0.3683900833129883, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.fc1": { | |
| "mse": 2.6280795282218605e-06, | |
| "rmse": 0.0016211352590767558, | |
| "relative_error": 0.10722597587911696, | |
| "max_error": 0.1983776092529297, | |
| "sparsity": 0.3908070723215739, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.12.fc2": { | |
| "mse": 4.76806872029556e-06, | |
| "rmse": 0.0021835907859064528, | |
| "relative_error": 0.15451343321763653, | |
| "max_error": 0.06719207763671875, | |
| "sparsity": 0.37807003657023114, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.self_attn.k_proj": { | |
| "mse": 3.1032777769723907e-06, | |
| "rmse": 0.0017616122663549976, | |
| "relative_error": 0.10859283459981478, | |
| "max_error": 0.08640223741531372, | |
| "sparsity": 0.3908227284749349, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.self_attn.v_proj": { | |
| "mse": 2.4164837668649852e-06, | |
| "rmse": 0.0015545043476507183, | |
| "relative_error": 0.11088639104509466, | |
| "max_error": 0.09391403198242188, | |
| "sparsity": 0.3939778010050456, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.self_attn.q_proj": { | |
| "mse": 3.1459003366762772e-06, | |
| "rmse": 0.0017736686095988386, | |
| "relative_error": 0.10887852983292154, | |
| "max_error": 0.0973324179649353, | |
| "sparsity": 0.39158089955647785, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.self_attn.out_proj": { | |
| "mse": 3.010093223565491e-06, | |
| "rmse": 0.001734962023666654, | |
| "relative_error": 0.12833051491091624, | |
| "max_error": 0.08125495910644531, | |
| "sparsity": 0.37650807698567706, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.k_proj": { | |
| "mse": 4.259429260855541e-06, | |
| "rmse": 0.002063838477414243, | |
| "relative_error": 0.14530331557484114, | |
| "max_error": 0.061621904373168945, | |
| "sparsity": 0.38376013437906903, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.v_proj": { | |
| "mse": 4.011252713098656e-06, | |
| "rmse": 0.002002811202559706, | |
| "relative_error": 0.15357150644740222, | |
| "max_error": 0.05202364921569824, | |
| "sparsity": 0.38488419850667316, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.q_proj": { | |
| "mse": 2.3202264856081456e-06, | |
| "rmse": 0.0015232289669016097, | |
| "relative_error": 0.10355130654722193, | |
| "max_error": 0.04495662450790405, | |
| "sparsity": 0.3904116948445638, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.encoder_attn.out_proj": { | |
| "mse": 3.6354515486891614e-06, | |
| "rmse": 0.001906686012087245, | |
| "relative_error": 0.14821881625212166, | |
| "max_error": 0.07686328887939453, | |
| "sparsity": 0.36955706278483075, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.fc1": { | |
| "mse": 2.724333626247244e-06, | |
| "rmse": 0.0016505555507910796, | |
| "relative_error": 0.10972522648773587, | |
| "max_error": 0.21759366989135742, | |
| "sparsity": 0.39084283510843915, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.13.fc2": { | |
| "mse": 4.931787771056406e-06, | |
| "rmse": 0.0022207628804211416, | |
| "relative_error": 0.15521514437074152, | |
| "max_error": 0.09429550170898438, | |
| "sparsity": 0.37915460268656415, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.self_attn.k_proj": { | |
| "mse": 3.087111963395728e-06, | |
| "rmse": 0.0017570179177787938, | |
| "relative_error": 0.1092595745005226, | |
| "max_error": 0.09504544734954834, | |
| "sparsity": 0.390896479288737, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.self_attn.v_proj": { | |
| "mse": 2.5206579721270828e-06, | |
| "rmse": 0.001587658014853036, | |
| "relative_error": 0.11081719637216424, | |
| "max_error": 0.051631927490234375, | |
| "sparsity": 0.39329179128011066, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.self_attn.q_proj": { | |
| "mse": 3.190672487107804e-06, | |
| "rmse": 0.0017862453602760747, | |
| "relative_error": 0.11018739252519799, | |
| "max_error": 0.08269834518432617, | |
| "sparsity": 0.3911660512288411, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.self_attn.out_proj": { | |
| "mse": 3.2235734579444397e-06, | |
| "rmse": 0.0017954312735230051, | |
| "relative_error": 0.12984946467693861, | |
| "max_error": 0.07321488857269287, | |
| "sparsity": 0.37707042694091797, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.k_proj": { | |
| "mse": 4.632142918126192e-06, | |
| "rmse": 0.002152241370786788, | |
| "relative_error": 0.14750340403551016, | |
| "max_error": 0.06115221977233887, | |
| "sparsity": 0.3844248453776042, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.v_proj": { | |
| "mse": 4.610026280715829e-06, | |
| "rmse": 0.0021470971754244915, | |
| "relative_error": 0.15607740424439667, | |
| "max_error": 0.05488014221191406, | |
| "sparsity": 0.3850485483805339, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.q_proj": { | |
| "mse": 2.4240475795522798e-06, | |
| "rmse": 0.0015569353164317007, | |
| "relative_error": 0.10406009770556121, | |
| "max_error": 0.042209625244140625, | |
| "sparsity": 0.3910134633382161, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.encoder_attn.out_proj": { | |
| "mse": 3.991907306044595e-06, | |
| "rmse": 0.001997975802166932, | |
| "relative_error": 0.14917388540435966, | |
| "max_error": 0.1012420654296875, | |
| "sparsity": 0.3692118326822917, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.fc1": { | |
| "mse": 2.651945123943733e-06, | |
| "rmse": 0.001628479390088721, | |
| "relative_error": 0.10843630002577104, | |
| "max_error": 0.17707645893096924, | |
| "sparsity": 0.3903570969899495, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.14.fc2": { | |
| "mse": 5.093220352136996e-06, | |
| "rmse": 0.0022568164196799427, | |
| "relative_error": 0.15510645443953647, | |
| "max_error": 0.09729671478271484, | |
| "sparsity": 0.378409465154012, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.self_attn.k_proj": { | |
| "mse": 2.952674776679487e-06, | |
| "rmse": 0.00171833488490442, | |
| "relative_error": 0.11039714548231819, | |
| "max_error": 0.05137348175048828, | |
| "sparsity": 0.39000829060872394, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.self_attn.v_proj": { | |
| "mse": 2.5764302336028777e-06, | |
| "rmse": 0.0016051262360334397, | |
| "relative_error": 0.11392904939485686, | |
| "max_error": 0.07793331146240234, | |
| "sparsity": 0.3936421076456706, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.self_attn.q_proj": { | |
| "mse": 3.0681758289574645e-06, | |
| "rmse": 0.0017516209147408193, | |
| "relative_error": 0.111066066762652, | |
| "max_error": 0.061458587646484375, | |
| "sparsity": 0.3910897572835286, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.self_attn.out_proj": { | |
| "mse": 3.2459129215567373e-06, | |
| "rmse": 0.0018016417295224757, | |
| "relative_error": 0.13252200909794726, | |
| "max_error": 0.17857837677001953, | |
| "sparsity": 0.3764670689900716, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.k_proj": { | |
| "mse": 3.870277396345045e-06, | |
| "rmse": 0.001967302060270625, | |
| "relative_error": 0.1451358525235946, | |
| "max_error": 0.05497002601623535, | |
| "sparsity": 0.38480281829833984, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.v_proj": { | |
| "mse": 4.471807187655941e-06, | |
| "rmse": 0.002114664793213322, | |
| "relative_error": 0.15706173162807832, | |
| "max_error": 0.07093095779418945, | |
| "sparsity": 0.3840920130411784, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.q_proj": { | |
| "mse": 2.2638698737864615e-06, | |
| "rmse": 0.0015046161881976617, | |
| "relative_error": 0.10758771192543139, | |
| "max_error": 0.06101179122924805, | |
| "sparsity": 0.389739990234375, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.encoder_attn.out_proj": { | |
| "mse": 4.6661739361297805e-06, | |
| "rmse": 0.00216013285150006, | |
| "relative_error": 0.164261855187405, | |
| "max_error": 0.16168975830078125, | |
| "sparsity": 0.36803849538167316, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.fc1": { | |
| "mse": 2.6208585950371344e-06, | |
| "rmse": 0.0016189066047913741, | |
| "relative_error": 0.10883047265246418, | |
| "max_error": 0.2597169876098633, | |
| "sparsity": 0.3904755115509033, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.15.fc2": { | |
| "mse": 5.432469151855912e-06, | |
| "rmse": 0.002330765786572283, | |
| "relative_error": 0.15863985173106748, | |
| "max_error": 0.09306836128234863, | |
| "sparsity": 0.37816667556762695, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.self_attn.k_proj": { | |
| "mse": 2.975872803290258e-06, | |
| "rmse": 0.0017250718255453186, | |
| "relative_error": 0.11149507911807895, | |
| "max_error": 0.0687759518623352, | |
| "sparsity": 0.38975874582926434, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.self_attn.v_proj": { | |
| "mse": 2.708741931201075e-06, | |
| "rmse": 0.001645825607772912, | |
| "relative_error": 0.11401836328792606, | |
| "max_error": 0.10725784301757812, | |
| "sparsity": 0.3934755325317383, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.self_attn.q_proj": { | |
| "mse": 3.0313924526126357e-06, | |
| "rmse": 0.0017410894441735713, | |
| "relative_error": 0.11183478459241353, | |
| "max_error": 0.08448153734207153, | |
| "sparsity": 0.3898779551188151, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.self_attn.out_proj": { | |
| "mse": 2.974707740577287e-06, | |
| "rmse": 0.001724734107211105, | |
| "relative_error": 0.12364482519876267, | |
| "max_error": 0.10401153564453125, | |
| "sparsity": 0.3794857660929362, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.k_proj": { | |
| "mse": 5.051839252701029e-06, | |
| "rmse": 0.0022476296965249923, | |
| "relative_error": 0.1462108008993025, | |
| "max_error": 0.06560587882995605, | |
| "sparsity": 0.38422679901123047, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.v_proj": { | |
| "mse": 4.101235390407965e-06, | |
| "rmse": 0.00202515070807285, | |
| "relative_error": 0.15203837714653734, | |
| "max_error": 0.062149763107299805, | |
| "sparsity": 0.38334306081136066, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.q_proj": { | |
| "mse": 2.8545221084641526e-06, | |
| "rmse": 0.0016895331036899373, | |
| "relative_error": 0.10754797639395516, | |
| "max_error": 0.05772113800048828, | |
| "sparsity": 0.39005088806152344, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.encoder_attn.out_proj": { | |
| "mse": 3.7151462493056897e-06, | |
| "rmse": 0.001927471465237732, | |
| "relative_error": 0.14317596856972012, | |
| "max_error": 0.07494163513183594, | |
| "sparsity": 0.37266890207926434, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.fc1": { | |
| "mse": 2.5293929866165854e-06, | |
| "rmse": 0.0015904065475898248, | |
| "relative_error": 0.1073629947850975, | |
| "max_error": 0.15852278470993042, | |
| "sparsity": 0.3905927340189616, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.16.fc2": { | |
| "mse": 6.293004844337702e-06, | |
| "rmse": 0.002508586224218275, | |
| "relative_error": 0.16603359634059295, | |
| "max_error": 0.10226941108703613, | |
| "sparsity": 0.37468798955281574, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.self_attn.k_proj": { | |
| "mse": 3.057536559936125e-06, | |
| "rmse": 0.0017485812992069098, | |
| "relative_error": 0.11201393995950695, | |
| "max_error": 0.09579706192016602, | |
| "sparsity": 0.3894125620524089, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.self_attn.v_proj": { | |
| "mse": 2.4118428427755134e-06, | |
| "rmse": 0.0015530108958972289, | |
| "relative_error": 0.11385278737880668, | |
| "max_error": 0.04882359504699707, | |
| "sparsity": 0.3921810785929362, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.self_attn.q_proj": { | |
| "mse": 3.1471593047172064e-06, | |
| "rmse": 0.0017740234791899475, | |
| "relative_error": 0.11315092671556473, | |
| "max_error": 0.08912181854248047, | |
| "sparsity": 0.38965892791748047, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.self_attn.out_proj": { | |
| "mse": 3.0684352623211453e-06, | |
| "rmse": 0.0017516949684009328, | |
| "relative_error": 0.13184404672823571, | |
| "max_error": 0.062131404876708984, | |
| "sparsity": 0.3765624364217122, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.k_proj": { | |
| "mse": 4.313059889682336e-06, | |
| "rmse": 0.002076790766948451, | |
| "relative_error": 0.1438282830189719, | |
| "max_error": 0.0576934814453125, | |
| "sparsity": 0.38422075907389325, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.v_proj": { | |
| "mse": 5.9649946706485935e-06, | |
| "rmse": 0.0024423338573275753, | |
| "relative_error": 0.15639097085047413, | |
| "max_error": 0.13858509063720703, | |
| "sparsity": 0.38591798146565753, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.q_proj": { | |
| "mse": 2.7382375265005976e-06, | |
| "rmse": 0.001654762075496232, | |
| "relative_error": 0.1126409982320043, | |
| "max_error": 0.05305671691894531, | |
| "sparsity": 0.38878027598063153, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.encoder_attn.out_proj": { | |
| "mse": 5.209771188674495e-06, | |
| "rmse": 0.002282492319521469, | |
| "relative_error": 0.15552468907321956, | |
| "max_error": 0.14420032501220703, | |
| "sparsity": 0.3756395975748698, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.fc1": { | |
| "mse": 2.62208232015837e-06, | |
| "rmse": 0.0016192845087131444, | |
| "relative_error": 0.10946967739129748, | |
| "max_error": 0.18319380283355713, | |
| "sparsity": 0.39118019739786786, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.17.fc2": { | |
| "mse": 5.671533472195733e-06, | |
| "rmse": 0.0023814981570842615, | |
| "relative_error": 0.15605075908659866, | |
| "max_error": 0.10937494039535522, | |
| "sparsity": 0.37987271944681805, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.self_attn.k_proj": { | |
| "mse": 3.062216137550422e-06, | |
| "rmse": 0.001749918894563523, | |
| "relative_error": 0.11481806587196931, | |
| "max_error": 0.06973004341125488, | |
| "sparsity": 0.3889427185058594, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.self_attn.v_proj": { | |
| "mse": 2.6578031793178525e-06, | |
| "rmse": 0.0016302770253296991, | |
| "relative_error": 0.11580165808777118, | |
| "max_error": 0.07330894470214844, | |
| "sparsity": 0.3923803965250651, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.self_attn.q_proj": { | |
| "mse": 3.1825525184103753e-06, | |
| "rmse": 0.0017839709970765712, | |
| "relative_error": 0.11659504798291477, | |
| "max_error": 0.11487168073654175, | |
| "sparsity": 0.38934771219889325, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.self_attn.out_proj": { | |
| "mse": 3.1932124784361804e-06, | |
| "rmse": 0.0017869562049575194, | |
| "relative_error": 0.12925187774123928, | |
| "max_error": 0.061557769775390625, | |
| "sparsity": 0.3762219746907552, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.k_proj": { | |
| "mse": 3.7404693102871533e-06, | |
| "rmse": 0.001934029294061275, | |
| "relative_error": 0.14225427023342369, | |
| "max_error": 0.06442117691040039, | |
| "sparsity": 0.3841543197631836, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.v_proj": { | |
| "mse": 5.93807635596022e-06, | |
| "rmse": 0.002436816849080008, | |
| "relative_error": 0.15289312638788924, | |
| "max_error": 0.09826511144638062, | |
| "sparsity": 0.3840119043986003, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.q_proj": { | |
| "mse": 2.600288553367136e-06, | |
| "rmse": 0.0016125410237780421, | |
| "relative_error": 0.11576435007585356, | |
| "max_error": 0.04055023193359375, | |
| "sparsity": 0.3882484436035156, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.encoder_attn.out_proj": { | |
| "mse": 6.008140189805999e-06, | |
| "rmse": 0.0024511507888757065, | |
| "relative_error": 0.16401463535585767, | |
| "max_error": 0.19881439208984375, | |
| "sparsity": 0.3687610626220703, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.fc1": { | |
| "mse": 2.5828803700278513e-06, | |
| "rmse": 0.0016071342103346103, | |
| "relative_error": 0.10860366900771316, | |
| "max_error": 0.18947696685791016, | |
| "sparsity": 0.39104294776916504, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.18.fc2": { | |
| "mse": 7.289684617717285e-06, | |
| "rmse": 0.002699941595241883, | |
| "relative_error": 0.17422221641452565, | |
| "max_error": 0.1270887851715088, | |
| "sparsity": 0.37224801381429035, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.self_attn.k_proj": { | |
| "mse": 3.049122142328997e-06, | |
| "rmse": 0.0017461735716500228, | |
| "relative_error": 0.11567203769595212, | |
| "max_error": 0.0702664852142334, | |
| "sparsity": 0.38868459065755206, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.self_attn.v_proj": { | |
| "mse": 2.7155556381330825e-06, | |
| "rmse": 0.0016478943042965719, | |
| "relative_error": 0.1192114629832996, | |
| "max_error": 0.07558798789978027, | |
| "sparsity": 0.39115556081136066, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.self_attn.q_proj": { | |
| "mse": 3.1372007924801437e-06, | |
| "rmse": 0.001771214496462849, | |
| "relative_error": 0.11738424348854617, | |
| "max_error": 0.06286239624023438, | |
| "sparsity": 0.38935597737630206, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.self_attn.out_proj": { | |
| "mse": 3.676388359963312e-06, | |
| "rmse": 0.0019173910294885891, | |
| "relative_error": 0.14067619140069135, | |
| "max_error": 0.07267498970031738, | |
| "sparsity": 0.3723777135213216, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.k_proj": { | |
| "mse": 4.325136615079828e-06, | |
| "rmse": 0.002079696279527332, | |
| "relative_error": 0.1461241549187539, | |
| "max_error": 0.10210031270980835, | |
| "sparsity": 0.38515504201253253, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.v_proj": { | |
| "mse": 7.3299588621011935e-06, | |
| "rmse": 0.0027073896768107088, | |
| "relative_error": 0.15419761759787312, | |
| "max_error": 0.11008405685424805, | |
| "sparsity": 0.38483206431070965, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.q_proj": { | |
| "mse": 3.1062227208167315e-06, | |
| "rmse": 0.0017624479342144355, | |
| "relative_error": 0.12209272833975149, | |
| "max_error": 0.04779624938964844, | |
| "sparsity": 0.38499895731608075, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.encoder_attn.out_proj": { | |
| "mse": 8.42092777020298e-06, | |
| "rmse": 0.0029018834866691287, | |
| "relative_error": 0.1775200950024603, | |
| "max_error": 0.21614456176757812, | |
| "sparsity": 0.3764657974243164, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.fc1": { | |
| "mse": 2.607575652291416e-06, | |
| "rmse": 0.0016147989510435707, | |
| "relative_error": 0.10923039530021018, | |
| "max_error": 0.14707237482070923, | |
| "sparsity": 0.3912237485249837, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.19.fc2": { | |
| "mse": 8.792547305347398e-06, | |
| "rmse": 0.0029652229773403883, | |
| "relative_error": 0.18922121151644503, | |
| "max_error": 0.125, | |
| "sparsity": 0.3674290180206299, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.self_attn.k_proj": { | |
| "mse": 2.585725269454997e-06, | |
| "rmse": 0.0016080190513345907, | |
| "relative_error": 0.1106971274399791, | |
| "max_error": 0.04056340456008911, | |
| "sparsity": 0.38825352986653644, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.self_attn.v_proj": { | |
| "mse": 2.6116106255358318e-06, | |
| "rmse": 0.001616047841351187, | |
| "relative_error": 0.1165965263213501, | |
| "max_error": 0.0487060546875, | |
| "sparsity": 0.39172935485839844, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.self_attn.q_proj": { | |
| "mse": 2.7351811695552897e-06, | |
| "rmse": 0.0016538383142119092, | |
| "relative_error": 0.11324436363197928, | |
| "max_error": 0.05264711380004883, | |
| "sparsity": 0.3888041178385417, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.self_attn.out_proj": { | |
| "mse": 3.686214313347591e-06, | |
| "rmse": 0.0019199516434919893, | |
| "relative_error": 0.14039298497893754, | |
| "max_error": 0.08768653869628906, | |
| "sparsity": 0.37349573771158856, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.k_proj": { | |
| "mse": 4.4726807573169935e-06, | |
| "rmse": 0.0021148713335134585, | |
| "relative_error": 0.14211945336391, | |
| "max_error": 0.08284521102905273, | |
| "sparsity": 0.38464196523030597, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.v_proj": { | |
| "mse": 7.222415661090054e-06, | |
| "rmse": 0.0026874552388998136, | |
| "relative_error": 0.15828476986233206, | |
| "max_error": 0.11456680297851562, | |
| "sparsity": 0.38407135009765625, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.q_proj": { | |
| "mse": 3.376102540642023e-06, | |
| "rmse": 0.001837417356139324, | |
| "relative_error": 0.12240313810067552, | |
| "max_error": 0.07167434692382812, | |
| "sparsity": 0.3857393264770508, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.encoder_attn.out_proj": { | |
| "mse": 5.347591013560304e-06, | |
| "rmse": 0.002312485894780832, | |
| "relative_error": 0.14779052883935034, | |
| "max_error": 0.14819598197937012, | |
| "sparsity": 0.370513916015625, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.fc1": { | |
| "mse": 2.671948550414527e-06, | |
| "rmse": 0.0016346096018360246, | |
| "relative_error": 0.11068499087775575, | |
| "max_error": 0.20884323120117188, | |
| "sparsity": 0.39017558097839355, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.20.fc2": { | |
| "mse": 8.031453035073355e-06, | |
| "rmse": 0.002833981833934959, | |
| "relative_error": 0.17959298852793198, | |
| "max_error": 0.14152908325195312, | |
| "sparsity": 0.37109629313151044, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.self_attn.k_proj": { | |
| "mse": 2.8241770451131742e-06, | |
| "rmse": 0.001680528799251347, | |
| "relative_error": 0.11666074817263411, | |
| "max_error": 0.08865010738372803, | |
| "sparsity": 0.38890234629313153, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.self_attn.v_proj": { | |
| "mse": 2.7611768018687144e-06, | |
| "rmse": 0.0016616789105807158, | |
| "relative_error": 0.11628176687983355, | |
| "max_error": 0.06029319763183594, | |
| "sparsity": 0.39069143931070965, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.self_attn.q_proj": { | |
| "mse": 2.902990217990009e-06, | |
| "rmse": 0.0017038163686236875, | |
| "relative_error": 0.11756411493004902, | |
| "max_error": 0.09850597381591797, | |
| "sparsity": 0.38947486877441406, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.self_attn.out_proj": { | |
| "mse": 3.309012527097366e-06, | |
| "rmse": 0.00181906913752539, | |
| "relative_error": 0.12909355415486923, | |
| "max_error": 0.10249519348144531, | |
| "sparsity": 0.3772468566894531, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.k_proj": { | |
| "mse": 4.397637894726358e-06, | |
| "rmse": 0.0020970545760009103, | |
| "relative_error": 0.14199766950719964, | |
| "max_error": 0.056479454040527344, | |
| "sparsity": 0.3843771616617839, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.v_proj": { | |
| "mse": 9.549195965519175e-06, | |
| "rmse": 0.0030901773356102357, | |
| "relative_error": 0.15587564523772446, | |
| "max_error": 0.1548752784729004, | |
| "sparsity": 0.38514550526936847, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.q_proj": { | |
| "mse": 3.236666543671163e-06, | |
| "rmse": 0.0017990738016188115, | |
| "relative_error": 0.12158806361064572, | |
| "max_error": 0.044310808181762695, | |
| "sparsity": 0.3864482243855794, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.encoder_attn.out_proj": { | |
| "mse": 1.2453544513846282e-05, | |
| "rmse": 0.0035289579926440443, | |
| "relative_error": 0.19398958543544084, | |
| "max_error": 0.2276768684387207, | |
| "sparsity": 0.37346363067626953, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.fc1": { | |
| "mse": 2.6899620024778415e-06, | |
| "rmse": 0.0016401103628956928, | |
| "relative_error": 0.11088570231448468, | |
| "max_error": 0.19092488288879395, | |
| "sparsity": 0.39104406038920086, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.21.fc2": { | |
| "mse": 1.1716691915353294e-05, | |
| "rmse": 0.0034229653687049325, | |
| "relative_error": 0.21299683368336433, | |
| "max_error": 0.18568956851959229, | |
| "sparsity": 0.35869065920511883, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.self_attn.k_proj": { | |
| "mse": 2.4202918211813085e-06, | |
| "rmse": 0.0015557287106630477, | |
| "relative_error": 0.11283075734818315, | |
| "max_error": 0.08211708068847656, | |
| "sparsity": 0.3905862172444661, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.self_attn.v_proj": { | |
| "mse": 2.446655571475276e-06, | |
| "rmse": 0.0015641788809069364, | |
| "relative_error": 0.1136433297025706, | |
| "max_error": 0.04386502504348755, | |
| "sparsity": 0.3926156361897786, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.self_attn.q_proj": { | |
| "mse": 2.585672973509645e-06, | |
| "rmse": 0.001608002790267991, | |
| "relative_error": 0.11486764879772378, | |
| "max_error": 0.08422493934631348, | |
| "sparsity": 0.3920370737711589, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.self_attn.out_proj": { | |
| "mse": 3.3158557926071808e-06, | |
| "rmse": 0.0018209491460793683, | |
| "relative_error": 0.13301228810832066, | |
| "max_error": 0.07436180114746094, | |
| "sparsity": 0.37401390075683594, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.k_proj": { | |
| "mse": 4.3133932194905356e-06, | |
| "rmse": 0.0020768710165753037, | |
| "relative_error": 0.14067432703895769, | |
| "max_error": 0.05864429473876953, | |
| "sparsity": 0.38448651631673175, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.v_proj": { | |
| "mse": 8.616913873993326e-06, | |
| "rmse": 0.00293545803478662, | |
| "relative_error": 0.15301533169596557, | |
| "max_error": 0.17897844314575195, | |
| "sparsity": 0.3843116760253906, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.q_proj": { | |
| "mse": 2.9848199574189493e-06, | |
| "rmse": 0.0017276631492912468, | |
| "relative_error": 0.11710600067795825, | |
| "max_error": 0.0589747428894043, | |
| "sparsity": 0.3881543477376302, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.encoder_attn.out_proj": { | |
| "mse": 8.628910109109711e-06, | |
| "rmse": 0.0029375006568696646, | |
| "relative_error": 0.17018883610312907, | |
| "max_error": 0.1784210205078125, | |
| "sparsity": 0.37581761678059894, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.fc1": { | |
| "mse": 2.6359286948718363e-06, | |
| "rmse": 0.0016235543399812143, | |
| "relative_error": 0.10956246786200693, | |
| "max_error": 0.15522384643554688, | |
| "sparsity": 0.3922089735666911, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.22.fc2": { | |
| "mse": 1.6377167412429117e-05, | |
| "rmse": 0.004046871311572573, | |
| "relative_error": 0.2533474753294872, | |
| "max_error": 0.22931241989135742, | |
| "sparsity": 0.334999958674113, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.self_attn.k_proj": { | |
| "mse": 2.916458697654889e-06, | |
| "rmse": 0.0017077642394823967, | |
| "relative_error": 0.11842748635264559, | |
| "max_error": 0.06891632080078125, | |
| "sparsity": 0.3846117655436198, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.self_attn.v_proj": { | |
| "mse": 2.9219972930150107e-06, | |
| "rmse": 0.001709385062826691, | |
| "relative_error": 0.12511812783253307, | |
| "max_error": 0.04297161102294922, | |
| "sparsity": 0.3864247004191081, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.self_attn.q_proj": { | |
| "mse": 2.794949296003324e-06, | |
| "rmse": 0.0016718101853988462, | |
| "relative_error": 0.11630976302364118, | |
| "max_error": 0.05386066436767578, | |
| "sparsity": 0.3846616744995117, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.self_attn.out_proj": { | |
| "mse": 2.7796131689683534e-06, | |
| "rmse": 0.0016672171931000333, | |
| "relative_error": 0.12332751993743979, | |
| "max_error": 0.09098243713378906, | |
| "sparsity": 0.3754889170328776, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.k_proj": { | |
| "mse": 4.40665189671563e-06, | |
| "rmse": 0.0020992026811900823, | |
| "relative_error": 0.14626904581808187, | |
| "max_error": 0.09139752388000488, | |
| "sparsity": 0.38378461201985675, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.v_proj": { | |
| "mse": 5.509216862265021e-06, | |
| "rmse": 0.0023471720989874222, | |
| "relative_error": 0.14926709862472565, | |
| "max_error": 0.11292362213134766, | |
| "sparsity": 0.3838071823120117, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.q_proj": { | |
| "mse": 3.3597671063034795e-06, | |
| "rmse": 0.0018329667499175973, | |
| "relative_error": 0.12338244477979932, | |
| "max_error": 0.04654979705810547, | |
| "sparsity": 0.38701915740966797, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.encoder_attn.out_proj": { | |
| "mse": 3.527855142237968e-06, | |
| "rmse": 0.0018782585397750671, | |
| "relative_error": 0.1269762413270245, | |
| "max_error": 0.10678958892822266, | |
| "sparsity": 0.37538401285807294, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.fc1": { | |
| "mse": 3.3838575745903654e-06, | |
| "rmse": 0.0018395264538979496, | |
| "relative_error": 0.11950819790999201, | |
| "max_error": 0.0970156192779541, | |
| "sparsity": 0.38993239402770996, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| }, | |
| "model.decoder.layers.23.fc2": { | |
| "mse": 1.7770662452676333e-05, | |
| "rmse": 0.00421552635535307, | |
| "relative_error": 0.2633558020199924, | |
| "max_error": 0.3130340576171875, | |
| "sparsity": 0.33670878410339355, | |
| "effective_bits": 9.0, | |
| "sparse_nnz": 0, | |
| "n_planes": 3, | |
| "rescued_rows": 0 | |
| } | |
| }, | |
| "summary": { | |
| "method_name": "Broad-tritplane3", | |
| "model_family": "seq2seq_lm", | |
| "selected_components": [ | |
| "decoder" | |
| ], | |
| "quantized_modules": 240, | |
| "quantized_params": 402653184, | |
| "quantized_fraction": 0.5271309931564236, | |
| "avg_relative_error": 0.13568142455431076, | |
| "avg_effective_bits": 9.0, | |
| "full_model_effective_bits": 12.310083047905035, | |
| "compression_ratio": 1.299747527107295 | |
| }, | |
| "method_name": "Broad-tritplane3", | |
| "model_family": "seq2seq_lm", | |
| "format_family": "tritplane_small", | |
| "format_version": "1.0", | |
| "total_packed_bytes": 452984832, | |
| "total_fp16_bytes": 805306368, | |
| "compression_ratio": 1.7777777777777777 | |
| } |