| { |
| "metadata": { |
| "total_size": 13536316416 |
| }, |
| "weight_map": { |
| "transformer.wte.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.7.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.8.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.8.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.8.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.8.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.9.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.9.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.9.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.9.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.11.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.16.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.17.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.17.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.17.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.17.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.19.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.20.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.20.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.20.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.20.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.22.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.28.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.28.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.28.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.28.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.33.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.33.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.33.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.33.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.0.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.0.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.5.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.5.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.5.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.10.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.10.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.13.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.13.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.13.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.13.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.16.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.16.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.16.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.16.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.21.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.21.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.21.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.21.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.28.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.29.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.29.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.29.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.29.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.31.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.32.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.32.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.32.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.32.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.34.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.35.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.35.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.35.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.35.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.37.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.38.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.0.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.2.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.2.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.2.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.3.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.7.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.7.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.12.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.14.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.15.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.15.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.15.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.15.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.17.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.18.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.18.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.18.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.18.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.18.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.18.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.mlp_block.c_fc_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.23.mlp_block.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.25.mlp_block.c_proj_shared.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.ln_1.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.sequence_mixer.c_attn.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.26.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.31.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.31.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.31.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.31.mlp_block.c_fc.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.36.sequence_mixer.c_proj.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.36.ln_2.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.36.mlp_block.gate.weight": "model-00001-of-00003.safetensors", |
| "transformer.h.36.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.0.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.1.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.1.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.1.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.5.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.5.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.5.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.8.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.8.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.8.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.8.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.13.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.13.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.13.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.13.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.18.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.18.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.20.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.21.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.21.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.21.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.21.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.23.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.24.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.24.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.24.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.24.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.26.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.32.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.32.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.32.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.32.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.37.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.37.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.37.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.37.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.0.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.0.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.1.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.1.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.3.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.6.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.9.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.9.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.9.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.9.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.13.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.17.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.17.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.17.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.17.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.21.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.24.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.25.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.25.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.25.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.25.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.32.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.33.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.33.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.33.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.33.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.35.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.36.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.36.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.36.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.36.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.38.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.39.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.39.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.39.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.39.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.0.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.1.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.1.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.2.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.2.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.2.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.4.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.8.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.11.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.11.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.11.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.11.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.14.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.18.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.19.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.19.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.19.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.19.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.22.mlp_block.c_fc.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.ln_1.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.sequence_mixer.c_attn.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.mlp_block.c_fc_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.27.mlp_block.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.29.mlp_block.c_proj_shared.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.sequence_mixer.c_proj.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.ln_2.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.mlp_block.gate.weight": "model-00002-of-00003.safetensors", |
| "transformer.h.30.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.35.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.35.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.35.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.35.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.0.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.4.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.5.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.5.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.7.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.7.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.7.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.7.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.7.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.7.mlp_block.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.9.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.10.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.10.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.10.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.10.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.10.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.10.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.15.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.15.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.15.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.15.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.20.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.20.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.20.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.20.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.25.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.25.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.25.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.25.mlp_block.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.27.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.28.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.28.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.28.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.28.mlp_block.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.30.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.31.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.31.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.31.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.31.mlp_block.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.33.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.mlp_block.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.36.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.37.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.37.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.37.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.37.mlp_block.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.39.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.ln_f.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.0.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.1.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.1.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.2.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.2.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.2.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.5.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.6.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.10.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.11.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.11.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.11.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.11.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.15.mlp_block.c_proj_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.16.ln_1.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.16.sequence_mixer.c_attn.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.16.mlp_block.c_fc_shared.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.16.mlp_block.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.19.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.19.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.19.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.19.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.24.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.24.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.24.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.24.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.29.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.29.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.29.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.29.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.34.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.39.sequence_mixer.c_proj.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.39.ln_2.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.39.mlp_block.gate.weight": "model-00003-of-00003.safetensors", |
| "transformer.h.39.mlp_block.c_fc.weight": "model-00003-of-00003.safetensors" |
| } |
| } |