mllm-dev commited on
Commit
96c5b98
·
verified ·
1 Parent(s): 3d37ab0

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -1,9 +1,9 @@
1
  ---
2
  base_model:
3
- - mllm-dev/gpt2_f_experiment_2
4
  - mllm-dev/gpt2_f_experiment_3
5
  - mllm-dev/gpt2_f_experiment_1
6
  - mllm-dev/gpt2_f_experiment_0
 
7
  - mllm-dev/gpt2_f_experiment_4
8
  library_name: transformers
9
  tags:
@@ -23,10 +23,10 @@ This model was merged using the [linear](https://arxiv.org/abs/2203.05482) merge
23
  ### Models Merged
24
 
25
  The following models were included in the merge:
26
- * [mllm-dev/gpt2_f_experiment_2](https://huggingface.co/mllm-dev/gpt2_f_experiment_2)
27
  * [mllm-dev/gpt2_f_experiment_3](https://huggingface.co/mllm-dev/gpt2_f_experiment_3)
28
  * [mllm-dev/gpt2_f_experiment_1](https://huggingface.co/mllm-dev/gpt2_f_experiment_1)
29
  * [mllm-dev/gpt2_f_experiment_0](https://huggingface.co/mllm-dev/gpt2_f_experiment_0)
 
30
  * [mllm-dev/gpt2_f_experiment_4](https://huggingface.co/mllm-dev/gpt2_f_experiment_4)
31
 
32
  ### Configuration
 
1
  ---
2
  base_model:
 
3
  - mllm-dev/gpt2_f_experiment_3
4
  - mllm-dev/gpt2_f_experiment_1
5
  - mllm-dev/gpt2_f_experiment_0
6
+ - mllm-dev/gpt2_f_experiment_2
7
  - mllm-dev/gpt2_f_experiment_4
8
  library_name: transformers
9
  tags:
 
23
  ### Models Merged
24
 
25
  The following models were included in the merge:
 
26
  * [mllm-dev/gpt2_f_experiment_3](https://huggingface.co/mllm-dev/gpt2_f_experiment_3)
27
  * [mllm-dev/gpt2_f_experiment_1](https://huggingface.co/mllm-dev/gpt2_f_experiment_1)
28
  * [mllm-dev/gpt2_f_experiment_0](https://huggingface.co/mllm-dev/gpt2_f_experiment_0)
29
+ * [mllm-dev/gpt2_f_experiment_2](https://huggingface.co/mllm-dev/gpt2_f_experiment_2)
30
  * [mllm-dev/gpt2_f_experiment_4](https://huggingface.co/mllm-dev/gpt2_f_experiment_4)
31
 
32
  ### Configuration
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "mllm-dev/gpt2_f_experiment_2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2ForSequenceClassification"
 
1
  {
2
+ "_name_or_path": "mllm-dev/gpt2_f_experiment_3",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2ForSequenceClassification"
model-00001-of-00001.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ea189e90d0c9eea2a0e90b0cce5f0d694c2be754af7f82b42f23921e62d042b
3
  size 248902264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109d22198c42220534f2b55ff9566334f14c2d3c6976f90d83b3d654b92dbc74
3
  size 248902264
model.safetensors.index.json CHANGED
@@ -1 +1 @@
1
- {"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"score.weight": "model-00001-of-00001.safetensors", "transformer.ln_f.bias": "model-00001-of-00001.safetensors", "transformer.ln_f.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.wpe.weight": "model-00001-of-00001.safetensors", "transformer.wte.weight": "model-00001-of-00001.safetensors"}}
 
1
+ {"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"score.weight": "model-00001-of-00001.safetensors", "transformer.ln_f.bias": "model-00001-of-00001.safetensors", "transformer.ln_f.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.wpe.weight": "model-00001-of-00001.safetensors", "transformer.wte.weight": "model-00001-of-00001.safetensors"}}