roy1201 commited on
Commit
48c4aa9
·
1 Parent(s): 4021f91
output/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "decapoda-research/llama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "v_proj"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
output/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264ddd3f28010ed2b3694ad851bfabf22177a5f2dd3bc83d3559e4957795916d
3
+ size 16822989
output/checkpoint-200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63c42f5446e07f8fdaa2033bf0d8f37570b439506b8ad6642138c012304b787d
3
+ size 33661573
output/checkpoint-200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b2505f42aa0ff61b4406a8139865c60599c0c4077ef16f301ee4e04a855d4a
3
+ size 16822989
output/checkpoint-200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601d4500e39cb956c51c0f30a51406f3b11a9e8229a2f77210205587ec4663b2
3
+ size 14575
output/checkpoint-200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fccf0f9be1bb8f24861e4393745b3e09cc2687125a69e3757955fb0f0925ea5
3
+ size 557
output/checkpoint-200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5847d9cb02afd98e15bd9d3c1a9609ab5d64f04e31347c2e0420149d64790a46
3
+ size 627
output/checkpoint-200/trainer_state.json ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4859355092048645,
3
+ "best_model_checkpoint": "./output/checkpoint-200",
4
+ "epoch": 2.1498152502519314,
5
+ "global_step": 200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "learning_rate": 5e-06,
13
+ "loss": 1.5355,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.21,
18
+ "learning_rate": 1e-05,
19
+ "loss": 1.5218,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.32,
24
+ "learning_rate": 1.5e-05,
25
+ "loss": 1.4822,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.43,
30
+ "learning_rate": 2e-05,
31
+ "loss": 1.4292,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.54,
36
+ "learning_rate": 2.5e-05,
37
+ "loss": 1.3539,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.64,
42
+ "learning_rate": 3e-05,
43
+ "loss": 1.2282,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.75,
48
+ "learning_rate": 3.5e-05,
49
+ "loss": 1.0398,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.86,
54
+ "learning_rate": 4e-05,
55
+ "loss": 0.8365,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.97,
60
+ "learning_rate": 4.5e-05,
61
+ "loss": 0.6591,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 1.07,
66
+ "learning_rate": 5e-05,
67
+ "loss": 0.5647,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 1.18,
72
+ "learning_rate": 4.863013698630137e-05,
73
+ "loss": 0.5343,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 1.29,
78
+ "learning_rate": 4.726027397260274e-05,
79
+ "loss": 0.5249,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 1.4,
84
+ "learning_rate": 4.589041095890411e-05,
85
+ "loss": 0.5126,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 1.5,
90
+ "learning_rate": 4.452054794520548e-05,
91
+ "loss": 0.5099,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 1.61,
96
+ "learning_rate": 4.3150684931506855e-05,
97
+ "loss": 0.5067,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 1.72,
102
+ "learning_rate": 4.1780821917808224e-05,
103
+ "loss": 0.5047,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 1.83,
108
+ "learning_rate": 4.041095890410959e-05,
109
+ "loss": 0.4949,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 1.93,
114
+ "learning_rate": 3.904109589041096e-05,
115
+ "loss": 0.4961,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 2.04,
120
+ "learning_rate": 3.767123287671233e-05,
121
+ "loss": 0.4933,
122
+ "step": 190
123
+ },
124
+ {
125
+ "epoch": 2.15,
126
+ "learning_rate": 3.63013698630137e-05,
127
+ "loss": 0.488,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 2.15,
132
+ "eval_loss": 0.4859355092048645,
133
+ "eval_runtime": 182.5747,
134
+ "eval_samples_per_second": 10.954,
135
+ "eval_steps_per_second": 1.369,
136
+ "step": 200
137
+ }
138
+ ],
139
+ "max_steps": 465,
140
+ "num_train_epochs": 5,
141
+ "total_flos": 1.0396612701664051e+18,
142
+ "trial_name": null,
143
+ "trial_params": null
144
+ }
output/checkpoint-200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa36d102a7c05bde31c70e8c3969cc59f38e6bbea8fa02842c42b16923b4cbe
3
+ size 3899
output/checkpoint-400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79eaea366a249efe8d0f6ce8b45c1d0632cd88cca387bfe5d8fb0dad8038173f
3
+ size 33661573
output/checkpoint-400/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d83107d6383e8e913fd506b45b52b5e8aec39ed53f3355232c01818e1f9267bf
3
+ size 16822989
output/checkpoint-400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e1d4cf89ceee65d888e964ca4066fc976bb2f8e750e435b6eb2d1e97d43ca0f
3
+ size 14575
output/checkpoint-400/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc40a4be6a52cee4d7658df4041e660ffa02b0d8b5bd143bb8bb397f7b71b1a5
3
+ size 557
output/checkpoint-400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:738a2afbc585e0ae0fb6c994dcc0db2ea2a077e0ec8539560958f8a2dfc96a47
3
+ size 627
output/checkpoint-400/trainer_state.json ADDED
@@ -0,0 +1,272 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.4640955328941345,
3
+ "best_model_checkpoint": "./output/checkpoint-400",
4
+ "epoch": 4.299630500503863,
5
+ "global_step": 400,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.11,
12
+ "learning_rate": 5e-06,
13
+ "loss": 1.5355,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.21,
18
+ "learning_rate": 1e-05,
19
+ "loss": 1.5218,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 0.32,
24
+ "learning_rate": 1.5e-05,
25
+ "loss": 1.4822,
26
+ "step": 30
27
+ },
28
+ {
29
+ "epoch": 0.43,
30
+ "learning_rate": 2e-05,
31
+ "loss": 1.4292,
32
+ "step": 40
33
+ },
34
+ {
35
+ "epoch": 0.54,
36
+ "learning_rate": 2.5e-05,
37
+ "loss": 1.3539,
38
+ "step": 50
39
+ },
40
+ {
41
+ "epoch": 0.64,
42
+ "learning_rate": 3e-05,
43
+ "loss": 1.2282,
44
+ "step": 60
45
+ },
46
+ {
47
+ "epoch": 0.75,
48
+ "learning_rate": 3.5e-05,
49
+ "loss": 1.0398,
50
+ "step": 70
51
+ },
52
+ {
53
+ "epoch": 0.86,
54
+ "learning_rate": 4e-05,
55
+ "loss": 0.8365,
56
+ "step": 80
57
+ },
58
+ {
59
+ "epoch": 0.97,
60
+ "learning_rate": 4.5e-05,
61
+ "loss": 0.6591,
62
+ "step": 90
63
+ },
64
+ {
65
+ "epoch": 1.07,
66
+ "learning_rate": 5e-05,
67
+ "loss": 0.5647,
68
+ "step": 100
69
+ },
70
+ {
71
+ "epoch": 1.18,
72
+ "learning_rate": 4.863013698630137e-05,
73
+ "loss": 0.5343,
74
+ "step": 110
75
+ },
76
+ {
77
+ "epoch": 1.29,
78
+ "learning_rate": 4.726027397260274e-05,
79
+ "loss": 0.5249,
80
+ "step": 120
81
+ },
82
+ {
83
+ "epoch": 1.4,
84
+ "learning_rate": 4.589041095890411e-05,
85
+ "loss": 0.5126,
86
+ "step": 130
87
+ },
88
+ {
89
+ "epoch": 1.5,
90
+ "learning_rate": 4.452054794520548e-05,
91
+ "loss": 0.5099,
92
+ "step": 140
93
+ },
94
+ {
95
+ "epoch": 1.61,
96
+ "learning_rate": 4.3150684931506855e-05,
97
+ "loss": 0.5067,
98
+ "step": 150
99
+ },
100
+ {
101
+ "epoch": 1.72,
102
+ "learning_rate": 4.1780821917808224e-05,
103
+ "loss": 0.5047,
104
+ "step": 160
105
+ },
106
+ {
107
+ "epoch": 1.83,
108
+ "learning_rate": 4.041095890410959e-05,
109
+ "loss": 0.4949,
110
+ "step": 170
111
+ },
112
+ {
113
+ "epoch": 1.93,
114
+ "learning_rate": 3.904109589041096e-05,
115
+ "loss": 0.4961,
116
+ "step": 180
117
+ },
118
+ {
119
+ "epoch": 2.04,
120
+ "learning_rate": 3.767123287671233e-05,
121
+ "loss": 0.4933,
122
+ "step": 190
123
+ },
124
+ {
125
+ "epoch": 2.15,
126
+ "learning_rate": 3.63013698630137e-05,
127
+ "loss": 0.488,
128
+ "step": 200
129
+ },
130
+ {
131
+ "epoch": 2.15,
132
+ "eval_loss": 0.4859355092048645,
133
+ "eval_runtime": 182.5747,
134
+ "eval_samples_per_second": 10.954,
135
+ "eval_steps_per_second": 1.369,
136
+ "step": 200
137
+ },
138
+ {
139
+ "epoch": 2.26,
140
+ "learning_rate": 3.493150684931507e-05,
141
+ "loss": 0.4836,
142
+ "step": 210
143
+ },
144
+ {
145
+ "epoch": 2.36,
146
+ "learning_rate": 3.356164383561644e-05,
147
+ "loss": 0.4798,
148
+ "step": 220
149
+ },
150
+ {
151
+ "epoch": 2.47,
152
+ "learning_rate": 3.219178082191781e-05,
153
+ "loss": 0.4815,
154
+ "step": 230
155
+ },
156
+ {
157
+ "epoch": 2.58,
158
+ "learning_rate": 3.082191780821918e-05,
159
+ "loss": 0.4738,
160
+ "step": 240
161
+ },
162
+ {
163
+ "epoch": 2.69,
164
+ "learning_rate": 2.945205479452055e-05,
165
+ "loss": 0.4751,
166
+ "step": 250
167
+ },
168
+ {
169
+ "epoch": 2.79,
170
+ "learning_rate": 2.808219178082192e-05,
171
+ "loss": 0.4771,
172
+ "step": 260
173
+ },
174
+ {
175
+ "epoch": 2.9,
176
+ "learning_rate": 2.671232876712329e-05,
177
+ "loss": 0.4767,
178
+ "step": 270
179
+ },
180
+ {
181
+ "epoch": 3.01,
182
+ "learning_rate": 2.534246575342466e-05,
183
+ "loss": 0.4756,
184
+ "step": 280
185
+ },
186
+ {
187
+ "epoch": 3.12,
188
+ "learning_rate": 2.3972602739726026e-05,
189
+ "loss": 0.4681,
190
+ "step": 290
191
+ },
192
+ {
193
+ "epoch": 3.22,
194
+ "learning_rate": 2.2602739726027396e-05,
195
+ "loss": 0.4707,
196
+ "step": 300
197
+ },
198
+ {
199
+ "epoch": 3.33,
200
+ "learning_rate": 2.1232876712328768e-05,
201
+ "loss": 0.4704,
202
+ "step": 310
203
+ },
204
+ {
205
+ "epoch": 3.44,
206
+ "learning_rate": 1.9863013698630137e-05,
207
+ "loss": 0.4677,
208
+ "step": 320
209
+ },
210
+ {
211
+ "epoch": 3.55,
212
+ "learning_rate": 1.8493150684931506e-05,
213
+ "loss": 0.4676,
214
+ "step": 330
215
+ },
216
+ {
217
+ "epoch": 3.65,
218
+ "learning_rate": 1.7123287671232875e-05,
219
+ "loss": 0.4659,
220
+ "step": 340
221
+ },
222
+ {
223
+ "epoch": 3.76,
224
+ "learning_rate": 1.5753424657534248e-05,
225
+ "loss": 0.4717,
226
+ "step": 350
227
+ },
228
+ {
229
+ "epoch": 3.87,
230
+ "learning_rate": 1.4383561643835617e-05,
231
+ "loss": 0.4679,
232
+ "step": 360
233
+ },
234
+ {
235
+ "epoch": 3.98,
236
+ "learning_rate": 1.3013698630136986e-05,
237
+ "loss": 0.466,
238
+ "step": 370
239
+ },
240
+ {
241
+ "epoch": 4.08,
242
+ "learning_rate": 1.1643835616438355e-05,
243
+ "loss": 0.4644,
244
+ "step": 380
245
+ },
246
+ {
247
+ "epoch": 4.19,
248
+ "learning_rate": 1.0273972602739726e-05,
249
+ "loss": 0.4671,
250
+ "step": 390
251
+ },
252
+ {
253
+ "epoch": 4.3,
254
+ "learning_rate": 8.904109589041095e-06,
255
+ "loss": 0.4649,
256
+ "step": 400
257
+ },
258
+ {
259
+ "epoch": 4.3,
260
+ "eval_loss": 0.4640955328941345,
261
+ "eval_runtime": 182.5257,
262
+ "eval_samples_per_second": 10.957,
263
+ "eval_steps_per_second": 1.37,
264
+ "step": 400
265
+ }
266
+ ],
267
+ "max_steps": 465,
268
+ "num_train_epochs": 5,
269
+ "total_flos": 2.0793225403328102e+18,
270
+ "trial_name": null,
271
+ "trial_params": null
272
+ }
output/checkpoint-400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa36d102a7c05bde31c70e8c3969cc59f38e6bbea8fa02842c42b16923b4cbe
3
+ size 3899