bew commited on
Commit
9a8a18c
·
verified ·
1 Parent(s): fb6252a

Training in progress, epoch 1

Browse files
adapter_config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "EleutherAI/pythia-70m",
4
+ "candidate_reselection_proportion": 0.2,
5
+ "candidate_reselection_steps": null,
6
+ "density": 0.01,
7
+ "dtype": "float32",
8
+ "inference_mode": true,
9
+ "initial_reselection_rate": 0.2,
10
+ "modules_to_save": null,
11
+ "num_deltas": {
12
+ "gpt_neox.layers.0.attention.dense": 9781,
13
+ "gpt_neox.layers.0.attention.query_key_value": 29344,
14
+ "gpt_neox.layers.0.mlp.dense_4h_to_h": 39125,
15
+ "gpt_neox.layers.0.mlp.dense_h_to_4h": 39125,
16
+ "gpt_neox.layers.1.attention.dense": 9781,
17
+ "gpt_neox.layers.1.attention.query_key_value": 29344,
18
+ "gpt_neox.layers.1.mlp.dense_4h_to_h": 39125,
19
+ "gpt_neox.layers.1.mlp.dense_h_to_4h": 39125,
20
+ "gpt_neox.layers.2.attention.dense": 9781,
21
+ "gpt_neox.layers.2.attention.query_key_value": 29344,
22
+ "gpt_neox.layers.2.mlp.dense_4h_to_h": 39125,
23
+ "gpt_neox.layers.2.mlp.dense_h_to_4h": 39125,
24
+ "gpt_neox.layers.3.attention.dense": 9781,
25
+ "gpt_neox.layers.3.attention.query_key_value": 29344,
26
+ "gpt_neox.layers.3.mlp.dense_4h_to_h": 39125,
27
+ "gpt_neox.layers.3.mlp.dense_h_to_4h": 39125,
28
+ "gpt_neox.layers.4.attention.dense": 9781,
29
+ "gpt_neox.layers.4.attention.query_key_value": 29344,
30
+ "gpt_neox.layers.4.mlp.dense_4h_to_h": 39125,
31
+ "gpt_neox.layers.4.mlp.dense_h_to_4h": 39125,
32
+ "gpt_neox.layers.5.attention.dense": 9781,
33
+ "gpt_neox.layers.5.attention.query_key_value": 29344,
34
+ "gpt_neox.layers.5.mlp.dense_4h_to_h": 39125,
35
+ "gpt_neox.layers.5.mlp.dense_h_to_4h": 39125
36
+ },
37
+ "num_tunable_weights": null,
38
+ "peft_type": "SFT",
39
+ "reselection_rate_policy": "linear",
40
+ "reselection_steps": 20,
41
+ "revision": null,
42
+ "selection_accumulation_steps": 5,
43
+ "selection_algorithm": "rigl",
44
+ "target_modules": [
45
+ "query_key_value",
46
+ "dense",
47
+ "dense_h_to_4h",
48
+ "dense_4h_to_h"
49
+ ],
50
+ "task_type": "CAUSAL_LM"
51
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebfd489e9b3bf2e8f39fbb0500620fcd8ebedce1571f788b4f1a97682a0a47e8
3
+ size 5640680
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cc1addda1218733c9dee0892baa2171358bade3f18bc49b318555fc96871ce4
3
+ size 5368