Training in progress, epoch 1
Browse files- adapter_config.json +51 -0
- adapter_model.safetensors +3 -0
- training_args.bin +3 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"auto_mapping": null,
|
| 3 |
+
"base_model_name_or_path": "EleutherAI/pythia-70m",
|
| 4 |
+
"candidate_reselection_proportion": 0.2,
|
| 5 |
+
"candidate_reselection_steps": null,
|
| 6 |
+
"density": 0.01,
|
| 7 |
+
"dtype": "float32",
|
| 8 |
+
"inference_mode": true,
|
| 9 |
+
"initial_reselection_rate": 0.2,
|
| 10 |
+
"modules_to_save": null,
|
| 11 |
+
"num_deltas": {
|
| 12 |
+
"gpt_neox.layers.0.attention.dense": 9781,
|
| 13 |
+
"gpt_neox.layers.0.attention.query_key_value": 29344,
|
| 14 |
+
"gpt_neox.layers.0.mlp.dense_4h_to_h": 39125,
|
| 15 |
+
"gpt_neox.layers.0.mlp.dense_h_to_4h": 39125,
|
| 16 |
+
"gpt_neox.layers.1.attention.dense": 9781,
|
| 17 |
+
"gpt_neox.layers.1.attention.query_key_value": 29344,
|
| 18 |
+
"gpt_neox.layers.1.mlp.dense_4h_to_h": 39125,
|
| 19 |
+
"gpt_neox.layers.1.mlp.dense_h_to_4h": 39125,
|
| 20 |
+
"gpt_neox.layers.2.attention.dense": 9781,
|
| 21 |
+
"gpt_neox.layers.2.attention.query_key_value": 29344,
|
| 22 |
+
"gpt_neox.layers.2.mlp.dense_4h_to_h": 39125,
|
| 23 |
+
"gpt_neox.layers.2.mlp.dense_h_to_4h": 39125,
|
| 24 |
+
"gpt_neox.layers.3.attention.dense": 9781,
|
| 25 |
+
"gpt_neox.layers.3.attention.query_key_value": 29344,
|
| 26 |
+
"gpt_neox.layers.3.mlp.dense_4h_to_h": 39125,
|
| 27 |
+
"gpt_neox.layers.3.mlp.dense_h_to_4h": 39125,
|
| 28 |
+
"gpt_neox.layers.4.attention.dense": 9781,
|
| 29 |
+
"gpt_neox.layers.4.attention.query_key_value": 29344,
|
| 30 |
+
"gpt_neox.layers.4.mlp.dense_4h_to_h": 39125,
|
| 31 |
+
"gpt_neox.layers.4.mlp.dense_h_to_4h": 39125,
|
| 32 |
+
"gpt_neox.layers.5.attention.dense": 9781,
|
| 33 |
+
"gpt_neox.layers.5.attention.query_key_value": 29344,
|
| 34 |
+
"gpt_neox.layers.5.mlp.dense_4h_to_h": 39125,
|
| 35 |
+
"gpt_neox.layers.5.mlp.dense_h_to_4h": 39125
|
| 36 |
+
},
|
| 37 |
+
"num_tunable_weights": null,
|
| 38 |
+
"peft_type": "SFT",
|
| 39 |
+
"reselection_rate_policy": "linear",
|
| 40 |
+
"reselection_steps": 20,
|
| 41 |
+
"revision": null,
|
| 42 |
+
"selection_accumulation_steps": 5,
|
| 43 |
+
"selection_algorithm": "rigl",
|
| 44 |
+
"target_modules": [
|
| 45 |
+
"query_key_value",
|
| 46 |
+
"dense",
|
| 47 |
+
"dense_h_to_4h",
|
| 48 |
+
"dense_4h_to_h"
|
| 49 |
+
],
|
| 50 |
+
"task_type": "CAUSAL_LM"
|
| 51 |
+
}
|
adapter_model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ebfd489e9b3bf2e8f39fbb0500620fcd8ebedce1571f788b4f1a97682a0a47e8
|
| 3 |
+
size 5640680
|
training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cc1addda1218733c9dee0892baa2171358bade3f18bc49b318555fc96871ce4
|
| 3 |
+
size 5368
|