TinyPixel commited on
Commit
f89c0a3
·
1 Parent(s): 561b4b4

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -14,13 +14,13 @@
14
  "r": 64,
15
  "revision": null,
16
  "target_modules": [
 
17
  "up_proj",
18
- "down_proj",
19
- "v_proj",
20
  "k_proj",
 
21
  "gate_proj",
22
- "o_proj",
23
- "q_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 64,
15
  "revision": null,
16
  "target_modules": [
17
+ "q_proj",
18
  "up_proj",
19
+ "o_proj",
 
20
  "k_proj",
21
+ "down_proj",
22
  "gate_proj",
23
+ "v_proj"
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6cdd50744bdd0187ef74f63033e373aedda4c10768963245424e90b63b8e85a
3
  size 639792909
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4590eccec48a795c79c444a5e95a4897a5e1fd17be423ccb30c2452c03c56f0f
3
  size 639792909
adapter_model/adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "TinyPixel/Llama-2-7B-bf16-sharded",
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16.0,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "up_proj",
19
+ "o_proj",
20
+ "k_proj",
21
+ "down_proj",
22
+ "gate_proj",
23
+ "v_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e4116d3f32577ecef8b6114563278b1e981034f6e619c6024c25251ea75aca5
3
- size 175079062
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4590eccec48a795c79c444a5e95a4897a5e1fd17be423ccb30c2452c03c56f0f
3
+ size 639792909
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da46423194ed38a0e436c518a9bec15976c71aede09eabf05b09e03b4d2107a0
3
  size 1279539525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd9ccc53819f905b62767339beeda3cf066988a317e3f2305ec00e20759a9907
3
  size 1279539525
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a7bd131b478223910aa63dcd22b0dafd3c52112e49fb85d9750f1813c6cc619
3
  size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e614bccb1a8ef816bb038432b434c3b616bc3cc1d34ce452da7fcf26bfcd14fb
3
  size 14511
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3c11fb283cf3e4b97ac923c1ffa6de0c0ce17563d0234b23425c5d68bd12f790
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d98c4ed03f69fb5fab6228cdb31470eee8f856f271fa65776fdc56518749e9f1
3
  size 627
trainer_state.json CHANGED
@@ -1,166 +1,46 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.09068238494672409,
5
- "global_step": 50,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.0,
12
- "learning_rate": 0.0001,
13
- "loss": 1.3698,
14
  "step": 2
15
  },
16
  {
17
  "epoch": 0.01,
18
- "learning_rate": 0.0001,
19
- "loss": 1.2409,
20
  "step": 4
21
  },
22
- {
23
- "epoch": 0.01,
24
- "learning_rate": 0.0001,
25
- "loss": 1.1957,
26
- "step": 6
27
- },
28
- {
29
- "epoch": 0.01,
30
- "learning_rate": 0.0001,
31
- "loss": 1.2257,
32
- "step": 8
33
- },
34
  {
35
  "epoch": 0.02,
36
- "learning_rate": 0.0001,
37
- "loss": 1.3086,
38
- "step": 10
39
- },
40
- {
41
- "epoch": 0.02,
42
- "learning_rate": 0.0001,
43
- "loss": 1.3006,
44
- "step": 12
45
- },
46
- {
47
- "epoch": 0.03,
48
- "learning_rate": 0.0001,
49
- "loss": 1.254,
50
- "step": 14
51
- },
52
- {
53
- "epoch": 0.03,
54
- "learning_rate": 0.0001,
55
- "loss": 1.1302,
56
- "step": 16
57
  },
58
  {
59
  "epoch": 0.03,
60
- "learning_rate": 0.0001,
61
- "loss": 1.1735,
62
- "step": 18
63
- },
64
- {
65
- "epoch": 0.04,
66
- "learning_rate": 0.0001,
67
- "loss": 1.1811,
68
- "step": 20
69
- },
70
- {
71
- "epoch": 0.04,
72
- "learning_rate": 0.0001,
73
- "loss": 1.1911,
74
- "step": 22
75
  },
76
  {
77
  "epoch": 0.04,
78
- "learning_rate": 0.0001,
79
- "loss": 1.3651,
80
- "step": 24
81
- },
82
- {
83
- "epoch": 0.05,
84
- "learning_rate": 0.0001,
85
- "loss": 1.1776,
86
- "step": 26
87
- },
88
- {
89
- "epoch": 0.05,
90
- "learning_rate": 0.0001,
91
- "loss": 1.2349,
92
- "step": 28
93
- },
94
- {
95
- "epoch": 0.05,
96
- "learning_rate": 0.0001,
97
- "loss": 1.397,
98
- "step": 30
99
- },
100
- {
101
- "epoch": 0.06,
102
- "learning_rate": 0.0001,
103
- "loss": 1.3597,
104
- "step": 32
105
- },
106
- {
107
- "epoch": 0.06,
108
- "learning_rate": 0.0001,
109
- "loss": 1.4713,
110
- "step": 34
111
- },
112
- {
113
- "epoch": 0.07,
114
- "learning_rate": 0.0001,
115
- "loss": 1.5044,
116
- "step": 36
117
- },
118
- {
119
- "epoch": 0.07,
120
- "learning_rate": 0.0001,
121
- "loss": 1.4494,
122
- "step": 38
123
- },
124
- {
125
- "epoch": 0.07,
126
- "learning_rate": 0.0001,
127
- "loss": 1.4663,
128
- "step": 40
129
- },
130
- {
131
- "epoch": 0.08,
132
- "learning_rate": 0.0001,
133
- "loss": 1.4471,
134
- "step": 42
135
- },
136
- {
137
- "epoch": 0.08,
138
- "learning_rate": 0.0001,
139
- "loss": 1.4528,
140
- "step": 44
141
- },
142
- {
143
- "epoch": 0.08,
144
- "learning_rate": 0.0001,
145
- "loss": 1.5861,
146
- "step": 46
147
- },
148
- {
149
- "epoch": 0.09,
150
- "learning_rate": 0.0001,
151
- "loss": 1.4502,
152
- "step": 48
153
- },
154
- {
155
- "epoch": 0.09,
156
- "learning_rate": 0.0001,
157
- "loss": 1.4986,
158
- "step": 50
159
  }
160
  ],
161
  "max_steps": 1000,
162
- "num_train_epochs": 2,
163
- "total_flos": 5795675526709248.0,
164
  "trial_name": null,
165
  "trial_params": null
166
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.03627295397868964,
5
+ "global_step": 10,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.01,
12
+ "learning_rate": 2e-05,
13
+ "loss": 1.4096,
14
  "step": 2
15
  },
16
  {
17
  "epoch": 0.01,
18
+ "learning_rate": 2e-05,
19
+ "loss": 1.3036,
20
  "step": 4
21
  },
 
 
 
 
 
 
 
 
 
 
 
 
22
  {
23
  "epoch": 0.02,
24
+ "learning_rate": 2e-05,
25
+ "loss": 1.3123,
26
+ "step": 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  },
28
  {
29
  "epoch": 0.03,
30
+ "learning_rate": 2e-05,
31
+ "loss": 1.3386,
32
+ "step": 8
 
 
 
 
 
 
 
 
 
 
 
 
33
  },
34
  {
35
  "epoch": 0.04,
36
+ "learning_rate": 2e-05,
37
+ "loss": 1.3342,
38
+ "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  }
40
  ],
41
  "max_steps": 1000,
42
+ "num_train_epochs": 4,
43
+ "total_flos": 3476088198856704.0,
44
  "trial_name": null,
45
  "trial_params": null
46
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:daf4265965e10cc9d642d2d1187c1167f2ce8e9622861d30ff6fbc03bdb2fbda
3
- size 5755
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9a0a46c7cfa60235583101b3ab0112a1794f2418dc4cea13c87968e1699eb02
3
+ size 5691