{ "metadata": { "total_parameters": 3531684708, "total_size": 14126743616 }, "weight_map": { "af2_to_esm": "model-00001-of-00003.safetensors", "distogram_head.bias": "model-00003-of-00003.safetensors", "distogram_head.weight": "model-00003-of-00003.safetensors", "embedding.weight": "model-00003-of-00003.safetensors", "esm.contact_head.regression.bias": "model-00003-of-00003.safetensors", "esm.contact_head.regression.weight": "model-00003-of-00003.safetensors", "esm.embeddings.word_embeddings.weight": "model-00001-of-00003.safetensors", "esm.encoder.emb_layer_norm_after.bias": "model-00003-of-00003.safetensors", "esm.encoder.emb_layer_norm_after.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.0.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.0.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.1.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.10.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.11.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.12.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.13.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.14.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.15.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.15.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.15.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.15.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.16.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.17.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.18.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.19.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.2.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.2.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.20.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.20.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.21.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.22.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.23.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.24.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.25.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.26.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.27.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.28.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.29.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.3.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.3.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.30.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.intermediate.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.intermediate.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.30.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.31.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.31.attention.LayerNorm.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.LayerNorm.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.output.dense.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.output.dense.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.self.key.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.self.key.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.self.query.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.self.query.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.self.rotary_embeddings.inv_freq": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.self.value.bias": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.attention.self.value.weight": "model-00002-of-00003.safetensors", "esm.encoder.layer.31.intermediate.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.31.intermediate.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.31.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.31.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.self.key.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.self.key.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.self.query.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.self.query.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.self.rotary_embeddings.inv_freq": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.self.value.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.attention.self.value.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.intermediate.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.intermediate.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.32.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.self.key.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.self.key.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.self.query.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.self.query.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.self.rotary_embeddings.inv_freq": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.self.value.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.attention.self.value.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.intermediate.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.intermediate.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.33.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.self.key.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.self.key.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.self.query.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.self.query.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.self.rotary_embeddings.inv_freq": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.self.value.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.attention.self.value.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.intermediate.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.intermediate.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.34.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.LayerNorm.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.LayerNorm.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.self.key.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.self.key.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.self.query.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.self.query.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.self.rotary_embeddings.inv_freq": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.self.value.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.attention.self.value.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.intermediate.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.intermediate.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.output.dense.bias": "model-00003-of-00003.safetensors", "esm.encoder.layer.35.output.dense.weight": "model-00003-of-00003.safetensors", "esm.encoder.layer.4.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.4.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.5.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.6.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.7.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.8.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.LayerNorm.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.LayerNorm.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.output.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.self.key.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.self.key.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.self.query.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.self.query.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.self.rotary_embeddings.inv_freq": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.self.value.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.attention.self.value.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.intermediate.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.intermediate.dense.weight": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.output.dense.bias": "model-00001-of-00003.safetensors", "esm.encoder.layer.9.output.dense.weight": "model-00001-of-00003.safetensors", "esm_s_combine": "model-00001-of-00003.safetensors", "esm_s_mlp.0.bias": "model-00003-of-00003.safetensors", "esm_s_mlp.0.weight": "model-00003-of-00003.safetensors", "esm_s_mlp.1.bias": "model-00003-of-00003.safetensors", "esm_s_mlp.1.weight": "model-00003-of-00003.safetensors", "esm_s_mlp.3.bias": "model-00003-of-00003.safetensors", "esm_s_mlp.3.weight": "model-00003-of-00003.safetensors", "lddt_head.0.bias": "model-00003-of-00003.safetensors", "lddt_head.0.weight": "model-00003-of-00003.safetensors", "lddt_head.1.bias": "model-00003-of-00003.safetensors", "lddt_head.1.weight": "model-00003-of-00003.safetensors", "lddt_head.2.bias": "model-00003-of-00003.safetensors", "lddt_head.2.weight": "model-00003-of-00003.safetensors", "lddt_head.3.bias": "model-00003-of-00003.safetensors", "lddt_head.3.weight": "model-00003-of-00003.safetensors", "lm_head.bias": "model-00003-of-00003.safetensors", "lm_head.weight": "model-00003-of-00003.safetensors", "mlm_head.bias": "model-00003-of-00003.safetensors", "mlm_head.decoder.weight": "model-00003-of-00003.safetensors", "mlm_head.dense.bias": "model-00003-of-00003.safetensors", "mlm_head.dense.weight": "model-00003-of-00003.safetensors", "mlm_head.layer_norm.bias": "model-00003-of-00003.safetensors", "mlm_head.layer_norm.weight": "model-00003-of-00003.safetensors", "ptm_head.bias": "model-00003-of-00003.safetensors", "ptm_head.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.0.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.1.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.10.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.11.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.12.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.13.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.14.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.15.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.16.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.17.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.18.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.19.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.2.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.20.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.21.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.22.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.23.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.24.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.25.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.26.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.27.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.28.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.29.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.3.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.30.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.31.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.32.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.33.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.34.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.35.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.36.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.37.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.38.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.39.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.4.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.40.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.41.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.42.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.43.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.44.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.45.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.46.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.47.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.5.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.6.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.7.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.8.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.layernorm_1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.layernorm_1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_pair.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_pair.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_pair.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_pair.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_pair.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_pair.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_seq.mlp.0.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_seq.mlp.0.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_seq.mlp.1.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_seq.mlp.1.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_seq.mlp.3.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.mlp_seq.mlp.3.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.pair_to_sequence.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.pair_to_sequence.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.pair_to_sequence.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.seq_attention.g_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.seq_attention.g_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.seq_attention.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.seq_attention.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.seq_attention.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.sequence_to_pair.layernorm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.sequence_to_pair.layernorm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.sequence_to_pair.o_proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.sequence_to_pair.o_proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.sequence_to_pair.proj.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.sequence_to_pair.proj.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_end.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.linear.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.mha.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.mha.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.mha.linear_k.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.mha.linear_o.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.mha.linear_o.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.mha.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_att_start.mha.linear_v.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_in.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.layer_norm_in.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.layer_norm_in.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.layer_norm_out.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.layer_norm_out.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_a_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_a_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_a_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_a_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_b_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_b_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_b_p.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_b_p.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_g.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_g.weight": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_z.bias": "model-00003-of-00003.safetensors", "trunk.blocks.9.tri_mul_out.linear_z.weight": "model-00003-of-00003.safetensors", "trunk.pairwise_positional_embedding.embedding.weight": "model-00003-of-00003.safetensors", "trunk.recycle_disto.weight": "model-00003-of-00003.safetensors", "trunk.recycle_s_norm.bias": "model-00003-of-00003.safetensors", "trunk.recycle_s_norm.weight": "model-00003-of-00003.safetensors", "trunk.recycle_z_norm.bias": "model-00003-of-00003.safetensors", "trunk.recycle_z_norm.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.0.linear_1.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.0.linear_1.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.0.linear_2.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.0.linear_2.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.1.linear_1.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.1.linear_1.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.1.linear_2.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.layers.1.linear_2.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.linear_in.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.linear_in.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.linear_initial.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.linear_initial.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.linear_out.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.angle_resnet.linear_out.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.bb_update.linear.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.bb_update.linear.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.head_weights": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_b.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_b.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_kv.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_kv.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_kv_points.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_kv_points.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_out.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_out.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_q.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_q.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_q_points.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.ipa.linear_q_points.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.layer_norm_ipa.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.layer_norm_ipa.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.layer_norm_s.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.layer_norm_s.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.layer_norm_z.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.layer_norm_z.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.linear_in.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.linear_in.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layer_norm.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layer_norm.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layers.0.linear_1.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layers.0.linear_1.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layers.0.linear_2.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layers.0.linear_2.weight": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layers.0.linear_3.bias": "model-00003-of-00003.safetensors", "trunk.structure_module.transition.layers.0.linear_3.weight": "model-00003-of-00003.safetensors", "trunk.trunk2sm_s.bias": "model-00003-of-00003.safetensors", "trunk.trunk2sm_s.weight": "model-00003-of-00003.safetensors", "trunk.trunk2sm_z.bias": "model-00003-of-00003.safetensors", "trunk.trunk2sm_z.weight": "model-00003-of-00003.safetensors" } }