Upload folder using huggingface_hub
Browse files- README.md +2 -2
- config.json +1 -1
- model-00001-of-00001.safetensors +1 -1
- model.safetensors.index.json +1 -1
README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
---
|
| 2 |
base_model:
|
| 3 |
-
- mllm-dev/gpt2_f_experiment_2
|
| 4 |
- mllm-dev/gpt2_f_experiment_3
|
| 5 |
- mllm-dev/gpt2_f_experiment_1
|
| 6 |
- mllm-dev/gpt2_f_experiment_0
|
|
|
|
| 7 |
- mllm-dev/gpt2_f_experiment_4
|
| 8 |
library_name: transformers
|
| 9 |
tags:
|
|
@@ -23,10 +23,10 @@ This model was merged using the [linear](https://arxiv.org/abs/2203.05482) merge
|
|
| 23 |
### Models Merged
|
| 24 |
|
| 25 |
The following models were included in the merge:
|
| 26 |
-
* [mllm-dev/gpt2_f_experiment_2](https://huggingface.co/mllm-dev/gpt2_f_experiment_2)
|
| 27 |
* [mllm-dev/gpt2_f_experiment_3](https://huggingface.co/mllm-dev/gpt2_f_experiment_3)
|
| 28 |
* [mllm-dev/gpt2_f_experiment_1](https://huggingface.co/mllm-dev/gpt2_f_experiment_1)
|
| 29 |
* [mllm-dev/gpt2_f_experiment_0](https://huggingface.co/mllm-dev/gpt2_f_experiment_0)
|
|
|
|
| 30 |
* [mllm-dev/gpt2_f_experiment_4](https://huggingface.co/mllm-dev/gpt2_f_experiment_4)
|
| 31 |
|
| 32 |
### Configuration
|
|
|
|
| 1 |
---
|
| 2 |
base_model:
|
|
|
|
| 3 |
- mllm-dev/gpt2_f_experiment_3
|
| 4 |
- mllm-dev/gpt2_f_experiment_1
|
| 5 |
- mllm-dev/gpt2_f_experiment_0
|
| 6 |
+
- mllm-dev/gpt2_f_experiment_2
|
| 7 |
- mllm-dev/gpt2_f_experiment_4
|
| 8 |
library_name: transformers
|
| 9 |
tags:
|
|
|
|
| 23 |
### Models Merged
|
| 24 |
|
| 25 |
The following models were included in the merge:
|
|
|
|
| 26 |
* [mllm-dev/gpt2_f_experiment_3](https://huggingface.co/mllm-dev/gpt2_f_experiment_3)
|
| 27 |
* [mllm-dev/gpt2_f_experiment_1](https://huggingface.co/mllm-dev/gpt2_f_experiment_1)
|
| 28 |
* [mllm-dev/gpt2_f_experiment_0](https://huggingface.co/mllm-dev/gpt2_f_experiment_0)
|
| 29 |
+
* [mllm-dev/gpt2_f_experiment_2](https://huggingface.co/mllm-dev/gpt2_f_experiment_2)
|
| 30 |
* [mllm-dev/gpt2_f_experiment_4](https://huggingface.co/mllm-dev/gpt2_f_experiment_4)
|
| 31 |
|
| 32 |
### Configuration
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "mllm-dev/
|
| 3 |
"activation_function": "gelu_new",
|
| 4 |
"architectures": [
|
| 5 |
"GPT2ForSequenceClassification"
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "mllm-dev/gpt2_f_experiment_3",
|
| 3 |
"activation_function": "gelu_new",
|
| 4 |
"architectures": [
|
| 5 |
"GPT2ForSequenceClassification"
|
model-00001-of-00001.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 248902264
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:109d22198c42220534f2b55ff9566334f14c2d3c6976f90d83b3d654b92dbc74
|
| 3 |
size 248902264
|
model.safetensors.index.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"score.weight": "model-00001-of-00001.safetensors", "transformer.ln_f.bias": "model-00001-of-00001.safetensors", "transformer.ln_f.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.wpe.weight": "model-00001-of-00001.safetensors", "transformer.wte.weight": "model-00001-of-00001.safetensors"}}
|
|
|
|
| 1 |
+
{"metadata": {"mergekit_version": "0.0.4.1"}, "weight_map": {"score.weight": "model-00001-of-00001.safetensors", "transformer.ln_f.bias": "model-00001-of-00001.safetensors", "transformer.ln_f.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.11.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.11.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.10.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.10.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.9.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.9.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.8.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.8.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.7.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.7.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.6.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.6.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.5.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.5.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.4.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.4.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.3.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.3.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.2.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.2.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.1.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.1.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_fc.weight": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_2.weight": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.bias": "model-00001-of-00001.safetensors", "transformer.h.0.ln_1.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_proj.weight": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.bias": "model-00001-of-00001.safetensors", "transformer.h.0.attn.c_attn.weight": "model-00001-of-00001.safetensors", "transformer.wpe.weight": "model-00001-of-00001.safetensors", "transformer.wte.weight": "model-00001-of-00001.safetensors"}}
|