chiayisu commited on
Commit
c6199d8
·
verified ·
1 Parent(s): 0108957

Delete llama/cce

Browse files
llama/cce/adapter_config.json DELETED
@@ -1,18 +0,0 @@
1
- {
2
- "base_model_name_or_path": "decapoda-research/llama-7b-hf",
3
- "bias": "none",
4
- "enable_lora": null,
5
- "fan_in_fan_out": false,
6
- "inference_mode": true,
7
- "lora_alpha": 16,
8
- "lora_dropout": 0.05,
9
- "merge_weights": false,
10
- "modules_to_save": null,
11
- "peft_type": "LORA",
12
- "r": 8,
13
- "target_modules": [
14
- "q_proj",
15
- "v_proj"
16
- ],
17
- "task_type": "CAUSAL_LM"
18
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/cce/checkpoint-1000/trainer_state.json DELETED
@@ -1,356 +0,0 @@
1
- {
2
- "best_metric": 0.6784626245498657,
3
- "best_model_checkpoint": "lora-alpaca/checkpoint-1000",
4
- "epoch": 0.7511913425197775,
5
- "global_step": 1000,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 5.9999999999999995e-05,
13
- "loss": 2.2234,
14
- "step": 20
15
- },
16
- {
17
- "epoch": 0.03,
18
- "learning_rate": 0.00011999999999999999,
19
- "loss": 1.6986,
20
- "step": 40
21
- },
22
- {
23
- "epoch": 0.05,
24
- "learning_rate": 0.00017999999999999998,
25
- "loss": 1.0108,
26
- "step": 60
27
- },
28
- {
29
- "epoch": 0.06,
30
- "learning_rate": 0.00023999999999999998,
31
- "loss": 0.8498,
32
- "step": 80
33
- },
34
- {
35
- "epoch": 0.08,
36
- "learning_rate": 0.0003,
37
- "loss": 0.7975,
38
- "step": 100
39
- },
40
- {
41
- "epoch": 0.09,
42
- "learning_rate": 0.00029512591389114535,
43
- "loss": 0.7621,
44
- "step": 120
45
- },
46
- {
47
- "epoch": 0.11,
48
- "learning_rate": 0.0002902518277822908,
49
- "loss": 0.7465,
50
- "step": 140
51
- },
52
- {
53
- "epoch": 0.12,
54
- "learning_rate": 0.0002853777416734362,
55
- "loss": 0.7367,
56
- "step": 160
57
- },
58
- {
59
- "epoch": 0.14,
60
- "learning_rate": 0.0002805036555645816,
61
- "loss": 0.732,
62
- "step": 180
63
- },
64
- {
65
- "epoch": 0.15,
66
- "learning_rate": 0.00027562956945572704,
67
- "loss": 0.7253,
68
- "step": 200
69
- },
70
- {
71
- "epoch": 0.15,
72
- "eval_loss": 0.7244793176651001,
73
- "eval_runtime": 129.0223,
74
- "eval_samples_per_second": 15.501,
75
- "eval_steps_per_second": 1.938,
76
- "step": 200
77
- },
78
- {
79
- "epoch": 0.17,
80
- "learning_rate": 0.0002707554833468724,
81
- "loss": 0.7226,
82
- "step": 220
83
- },
84
- {
85
- "epoch": 0.18,
86
- "learning_rate": 0.00026588139723801785,
87
- "loss": 0.716,
88
- "step": 240
89
- },
90
- {
91
- "epoch": 0.2,
92
- "learning_rate": 0.00026100731112916323,
93
- "loss": 0.7182,
94
- "step": 260
95
- },
96
- {
97
- "epoch": 0.21,
98
- "learning_rate": 0.00025613322502030867,
99
- "loss": 0.7026,
100
- "step": 280
101
- },
102
- {
103
- "epoch": 0.23,
104
- "learning_rate": 0.0002512591389114541,
105
- "loss": 0.7139,
106
- "step": 300
107
- },
108
- {
109
- "epoch": 0.24,
110
- "learning_rate": 0.0002463850528025995,
111
- "loss": 0.7156,
112
- "step": 320
113
- },
114
- {
115
- "epoch": 0.26,
116
- "learning_rate": 0.00024151096669374492,
117
- "loss": 0.7069,
118
- "step": 340
119
- },
120
- {
121
- "epoch": 0.27,
122
- "learning_rate": 0.0002366368805848903,
123
- "loss": 0.6998,
124
- "step": 360
125
- },
126
- {
127
- "epoch": 0.29,
128
- "learning_rate": 0.0002317627944760357,
129
- "loss": 0.7058,
130
- "step": 380
131
- },
132
- {
133
- "epoch": 0.3,
134
- "learning_rate": 0.00022688870836718114,
135
- "loss": 0.7004,
136
- "step": 400
137
- },
138
- {
139
- "epoch": 0.3,
140
- "eval_loss": 0.6993061900138855,
141
- "eval_runtime": 129.0376,
142
- "eval_samples_per_second": 15.499,
143
- "eval_steps_per_second": 1.937,
144
- "step": 400
145
- },
146
- {
147
- "epoch": 0.32,
148
- "learning_rate": 0.00022201462225832652,
149
- "loss": 0.6933,
150
- "step": 420
151
- },
152
- {
153
- "epoch": 0.33,
154
- "learning_rate": 0.00021714053614947196,
155
- "loss": 0.6868,
156
- "step": 440
157
- },
158
- {
159
- "epoch": 0.35,
160
- "learning_rate": 0.00021226645004061737,
161
- "loss": 0.6955,
162
- "step": 460
163
- },
164
- {
165
- "epoch": 0.36,
166
- "learning_rate": 0.00020739236393176277,
167
- "loss": 0.6878,
168
- "step": 480
169
- },
170
- {
171
- "epoch": 0.38,
172
- "learning_rate": 0.00020251827782290818,
173
- "loss": 0.6891,
174
- "step": 500
175
- },
176
- {
177
- "epoch": 0.39,
178
- "learning_rate": 0.0001976441917140536,
179
- "loss": 0.686,
180
- "step": 520
181
- },
182
- {
183
- "epoch": 0.41,
184
- "learning_rate": 0.00019277010560519902,
185
- "loss": 0.6977,
186
- "step": 540
187
- },
188
- {
189
- "epoch": 0.42,
190
- "learning_rate": 0.00018789601949634443,
191
- "loss": 0.688,
192
- "step": 560
193
- },
194
- {
195
- "epoch": 0.44,
196
- "learning_rate": 0.0001830219333874898,
197
- "loss": 0.6838,
198
- "step": 580
199
- },
200
- {
201
- "epoch": 0.45,
202
- "learning_rate": 0.00017814784727863525,
203
- "loss": 0.682,
204
- "step": 600
205
- },
206
- {
207
- "epoch": 0.45,
208
- "eval_loss": 0.6886340379714966,
209
- "eval_runtime": 128.9722,
210
- "eval_samples_per_second": 15.507,
211
- "eval_steps_per_second": 1.938,
212
- "step": 600
213
- },
214
- {
215
- "epoch": 0.47,
216
- "learning_rate": 0.00017327376116978065,
217
- "loss": 0.6879,
218
- "step": 620
219
- },
220
- {
221
- "epoch": 0.48,
222
- "learning_rate": 0.0001683996750609261,
223
- "loss": 0.6871,
224
- "step": 640
225
- },
226
- {
227
- "epoch": 0.5,
228
- "learning_rate": 0.00016352558895207147,
229
- "loss": 0.6823,
230
- "step": 660
231
- },
232
- {
233
- "epoch": 0.51,
234
- "learning_rate": 0.00015865150284321688,
235
- "loss": 0.6732,
236
- "step": 680
237
- },
238
- {
239
- "epoch": 0.53,
240
- "learning_rate": 0.0001537774167343623,
241
- "loss": 0.6832,
242
- "step": 700
243
- },
244
- {
245
- "epoch": 0.54,
246
- "learning_rate": 0.0001489033306255077,
247
- "loss": 0.6795,
248
- "step": 720
249
- },
250
- {
251
- "epoch": 0.56,
252
- "learning_rate": 0.00014402924451665313,
253
- "loss": 0.686,
254
- "step": 740
255
- },
256
- {
257
- "epoch": 0.57,
258
- "learning_rate": 0.00013915515840779853,
259
- "loss": 0.6799,
260
- "step": 760
261
- },
262
- {
263
- "epoch": 0.59,
264
- "learning_rate": 0.00013428107229894394,
265
- "loss": 0.6738,
266
- "step": 780
267
- },
268
- {
269
- "epoch": 0.6,
270
- "learning_rate": 0.00012940698619008935,
271
- "loss": 0.6801,
272
- "step": 800
273
- },
274
- {
275
- "epoch": 0.6,
276
- "eval_loss": 0.6822482943534851,
277
- "eval_runtime": 128.9861,
278
- "eval_samples_per_second": 15.506,
279
- "eval_steps_per_second": 1.938,
280
- "step": 800
281
- },
282
- {
283
- "epoch": 0.62,
284
- "learning_rate": 0.00012453290008123476,
285
- "loss": 0.6803,
286
- "step": 820
287
- },
288
- {
289
- "epoch": 0.63,
290
- "learning_rate": 0.00011965881397238017,
291
- "loss": 0.6647,
292
- "step": 840
293
- },
294
- {
295
- "epoch": 0.65,
296
- "learning_rate": 0.00011478472786352559,
297
- "loss": 0.6838,
298
- "step": 860
299
- },
300
- {
301
- "epoch": 0.66,
302
- "learning_rate": 0.000109910641754671,
303
- "loss": 0.6793,
304
- "step": 880
305
- },
306
- {
307
- "epoch": 0.68,
308
- "learning_rate": 0.0001050365556458164,
309
- "loss": 0.6882,
310
- "step": 900
311
- },
312
- {
313
- "epoch": 0.69,
314
- "learning_rate": 0.00010016246953696181,
315
- "loss": 0.6733,
316
- "step": 920
317
- },
318
- {
319
- "epoch": 0.71,
320
- "learning_rate": 9.528838342810722e-05,
321
- "loss": 0.6786,
322
- "step": 940
323
- },
324
- {
325
- "epoch": 0.72,
326
- "learning_rate": 9.041429731925264e-05,
327
- "loss": 0.682,
328
- "step": 960
329
- },
330
- {
331
- "epoch": 0.74,
332
- "learning_rate": 8.554021121039803e-05,
333
- "loss": 0.6737,
334
- "step": 980
335
- },
336
- {
337
- "epoch": 0.75,
338
- "learning_rate": 8.066612510154345e-05,
339
- "loss": 0.6681,
340
- "step": 1000
341
- },
342
- {
343
- "epoch": 0.75,
344
- "eval_loss": 0.6784626245498657,
345
- "eval_runtime": 129.063,
346
- "eval_samples_per_second": 15.496,
347
- "eval_steps_per_second": 1.937,
348
- "step": 1000
349
- }
350
- ],
351
- "max_steps": 1331,
352
- "num_train_epochs": 1,
353
- "total_flos": 1.29988124737536e+18,
354
- "trial_name": null,
355
- "trial_params": null
356
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/cce/checkpoint-1200/trainer_state.json DELETED
@@ -1,424 +0,0 @@
1
- {
2
- "best_metric": 0.6754332184791565,
3
- "best_model_checkpoint": "lora-alpaca/checkpoint-1200",
4
- "epoch": 0.901429611023733,
5
- "global_step": 1200,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 5.9999999999999995e-05,
13
- "loss": 2.2234,
14
- "step": 20
15
- },
16
- {
17
- "epoch": 0.03,
18
- "learning_rate": 0.00011999999999999999,
19
- "loss": 1.6986,
20
- "step": 40
21
- },
22
- {
23
- "epoch": 0.05,
24
- "learning_rate": 0.00017999999999999998,
25
- "loss": 1.0108,
26
- "step": 60
27
- },
28
- {
29
- "epoch": 0.06,
30
- "learning_rate": 0.00023999999999999998,
31
- "loss": 0.8498,
32
- "step": 80
33
- },
34
- {
35
- "epoch": 0.08,
36
- "learning_rate": 0.0003,
37
- "loss": 0.7975,
38
- "step": 100
39
- },
40
- {
41
- "epoch": 0.09,
42
- "learning_rate": 0.00029512591389114535,
43
- "loss": 0.7621,
44
- "step": 120
45
- },
46
- {
47
- "epoch": 0.11,
48
- "learning_rate": 0.0002902518277822908,
49
- "loss": 0.7465,
50
- "step": 140
51
- },
52
- {
53
- "epoch": 0.12,
54
- "learning_rate": 0.0002853777416734362,
55
- "loss": 0.7367,
56
- "step": 160
57
- },
58
- {
59
- "epoch": 0.14,
60
- "learning_rate": 0.0002805036555645816,
61
- "loss": 0.732,
62
- "step": 180
63
- },
64
- {
65
- "epoch": 0.15,
66
- "learning_rate": 0.00027562956945572704,
67
- "loss": 0.7253,
68
- "step": 200
69
- },
70
- {
71
- "epoch": 0.15,
72
- "eval_loss": 0.7244793176651001,
73
- "eval_runtime": 129.0223,
74
- "eval_samples_per_second": 15.501,
75
- "eval_steps_per_second": 1.938,
76
- "step": 200
77
- },
78
- {
79
- "epoch": 0.17,
80
- "learning_rate": 0.0002707554833468724,
81
- "loss": 0.7226,
82
- "step": 220
83
- },
84
- {
85
- "epoch": 0.18,
86
- "learning_rate": 0.00026588139723801785,
87
- "loss": 0.716,
88
- "step": 240
89
- },
90
- {
91
- "epoch": 0.2,
92
- "learning_rate": 0.00026100731112916323,
93
- "loss": 0.7182,
94
- "step": 260
95
- },
96
- {
97
- "epoch": 0.21,
98
- "learning_rate": 0.00025613322502030867,
99
- "loss": 0.7026,
100
- "step": 280
101
- },
102
- {
103
- "epoch": 0.23,
104
- "learning_rate": 0.0002512591389114541,
105
- "loss": 0.7139,
106
- "step": 300
107
- },
108
- {
109
- "epoch": 0.24,
110
- "learning_rate": 0.0002463850528025995,
111
- "loss": 0.7156,
112
- "step": 320
113
- },
114
- {
115
- "epoch": 0.26,
116
- "learning_rate": 0.00024151096669374492,
117
- "loss": 0.7069,
118
- "step": 340
119
- },
120
- {
121
- "epoch": 0.27,
122
- "learning_rate": 0.0002366368805848903,
123
- "loss": 0.6998,
124
- "step": 360
125
- },
126
- {
127
- "epoch": 0.29,
128
- "learning_rate": 0.0002317627944760357,
129
- "loss": 0.7058,
130
- "step": 380
131
- },
132
- {
133
- "epoch": 0.3,
134
- "learning_rate": 0.00022688870836718114,
135
- "loss": 0.7004,
136
- "step": 400
137
- },
138
- {
139
- "epoch": 0.3,
140
- "eval_loss": 0.6993061900138855,
141
- "eval_runtime": 129.0376,
142
- "eval_samples_per_second": 15.499,
143
- "eval_steps_per_second": 1.937,
144
- "step": 400
145
- },
146
- {
147
- "epoch": 0.32,
148
- "learning_rate": 0.00022201462225832652,
149
- "loss": 0.6933,
150
- "step": 420
151
- },
152
- {
153
- "epoch": 0.33,
154
- "learning_rate": 0.00021714053614947196,
155
- "loss": 0.6868,
156
- "step": 440
157
- },
158
- {
159
- "epoch": 0.35,
160
- "learning_rate": 0.00021226645004061737,
161
- "loss": 0.6955,
162
- "step": 460
163
- },
164
- {
165
- "epoch": 0.36,
166
- "learning_rate": 0.00020739236393176277,
167
- "loss": 0.6878,
168
- "step": 480
169
- },
170
- {
171
- "epoch": 0.38,
172
- "learning_rate": 0.00020251827782290818,
173
- "loss": 0.6891,
174
- "step": 500
175
- },
176
- {
177
- "epoch": 0.39,
178
- "learning_rate": 0.0001976441917140536,
179
- "loss": 0.686,
180
- "step": 520
181
- },
182
- {
183
- "epoch": 0.41,
184
- "learning_rate": 0.00019277010560519902,
185
- "loss": 0.6977,
186
- "step": 540
187
- },
188
- {
189
- "epoch": 0.42,
190
- "learning_rate": 0.00018789601949634443,
191
- "loss": 0.688,
192
- "step": 560
193
- },
194
- {
195
- "epoch": 0.44,
196
- "learning_rate": 0.0001830219333874898,
197
- "loss": 0.6838,
198
- "step": 580
199
- },
200
- {
201
- "epoch": 0.45,
202
- "learning_rate": 0.00017814784727863525,
203
- "loss": 0.682,
204
- "step": 600
205
- },
206
- {
207
- "epoch": 0.45,
208
- "eval_loss": 0.6886340379714966,
209
- "eval_runtime": 128.9722,
210
- "eval_samples_per_second": 15.507,
211
- "eval_steps_per_second": 1.938,
212
- "step": 600
213
- },
214
- {
215
- "epoch": 0.47,
216
- "learning_rate": 0.00017327376116978065,
217
- "loss": 0.6879,
218
- "step": 620
219
- },
220
- {
221
- "epoch": 0.48,
222
- "learning_rate": 0.0001683996750609261,
223
- "loss": 0.6871,
224
- "step": 640
225
- },
226
- {
227
- "epoch": 0.5,
228
- "learning_rate": 0.00016352558895207147,
229
- "loss": 0.6823,
230
- "step": 660
231
- },
232
- {
233
- "epoch": 0.51,
234
- "learning_rate": 0.00015865150284321688,
235
- "loss": 0.6732,
236
- "step": 680
237
- },
238
- {
239
- "epoch": 0.53,
240
- "learning_rate": 0.0001537774167343623,
241
- "loss": 0.6832,
242
- "step": 700
243
- },
244
- {
245
- "epoch": 0.54,
246
- "learning_rate": 0.0001489033306255077,
247
- "loss": 0.6795,
248
- "step": 720
249
- },
250
- {
251
- "epoch": 0.56,
252
- "learning_rate": 0.00014402924451665313,
253
- "loss": 0.686,
254
- "step": 740
255
- },
256
- {
257
- "epoch": 0.57,
258
- "learning_rate": 0.00013915515840779853,
259
- "loss": 0.6799,
260
- "step": 760
261
- },
262
- {
263
- "epoch": 0.59,
264
- "learning_rate": 0.00013428107229894394,
265
- "loss": 0.6738,
266
- "step": 780
267
- },
268
- {
269
- "epoch": 0.6,
270
- "learning_rate": 0.00012940698619008935,
271
- "loss": 0.6801,
272
- "step": 800
273
- },
274
- {
275
- "epoch": 0.6,
276
- "eval_loss": 0.6822482943534851,
277
- "eval_runtime": 128.9861,
278
- "eval_samples_per_second": 15.506,
279
- "eval_steps_per_second": 1.938,
280
- "step": 800
281
- },
282
- {
283
- "epoch": 0.62,
284
- "learning_rate": 0.00012453290008123476,
285
- "loss": 0.6803,
286
- "step": 820
287
- },
288
- {
289
- "epoch": 0.63,
290
- "learning_rate": 0.00011965881397238017,
291
- "loss": 0.6647,
292
- "step": 840
293
- },
294
- {
295
- "epoch": 0.65,
296
- "learning_rate": 0.00011478472786352559,
297
- "loss": 0.6838,
298
- "step": 860
299
- },
300
- {
301
- "epoch": 0.66,
302
- "learning_rate": 0.000109910641754671,
303
- "loss": 0.6793,
304
- "step": 880
305
- },
306
- {
307
- "epoch": 0.68,
308
- "learning_rate": 0.0001050365556458164,
309
- "loss": 0.6882,
310
- "step": 900
311
- },
312
- {
313
- "epoch": 0.69,
314
- "learning_rate": 0.00010016246953696181,
315
- "loss": 0.6733,
316
- "step": 920
317
- },
318
- {
319
- "epoch": 0.71,
320
- "learning_rate": 9.528838342810722e-05,
321
- "loss": 0.6786,
322
- "step": 940
323
- },
324
- {
325
- "epoch": 0.72,
326
- "learning_rate": 9.041429731925264e-05,
327
- "loss": 0.682,
328
- "step": 960
329
- },
330
- {
331
- "epoch": 0.74,
332
- "learning_rate": 8.554021121039803e-05,
333
- "loss": 0.6737,
334
- "step": 980
335
- },
336
- {
337
- "epoch": 0.75,
338
- "learning_rate": 8.066612510154345e-05,
339
- "loss": 0.6681,
340
- "step": 1000
341
- },
342
- {
343
- "epoch": 0.75,
344
- "eval_loss": 0.6784626245498657,
345
- "eval_runtime": 129.063,
346
- "eval_samples_per_second": 15.496,
347
- "eval_steps_per_second": 1.937,
348
- "step": 1000
349
- },
350
- {
351
- "epoch": 0.77,
352
- "learning_rate": 7.579203899268886e-05,
353
- "loss": 0.678,
354
- "step": 1020
355
- },
356
- {
357
- "epoch": 0.78,
358
- "learning_rate": 7.091795288383428e-05,
359
- "loss": 0.6692,
360
- "step": 1040
361
- },
362
- {
363
- "epoch": 0.8,
364
- "learning_rate": 6.604386677497969e-05,
365
- "loss": 0.6767,
366
- "step": 1060
367
- },
368
- {
369
- "epoch": 0.81,
370
- "learning_rate": 6.11697806661251e-05,
371
- "loss": 0.6699,
372
- "step": 1080
373
- },
374
- {
375
- "epoch": 0.83,
376
- "learning_rate": 5.6295694557270505e-05,
377
- "loss": 0.678,
378
- "step": 1100
379
- },
380
- {
381
- "epoch": 0.84,
382
- "learning_rate": 5.142160844841592e-05,
383
- "loss": 0.6802,
384
- "step": 1120
385
- },
386
- {
387
- "epoch": 0.86,
388
- "learning_rate": 4.654752233956133e-05,
389
- "loss": 0.6773,
390
- "step": 1140
391
- },
392
- {
393
- "epoch": 0.87,
394
- "learning_rate": 4.167343623070674e-05,
395
- "loss": 0.6719,
396
- "step": 1160
397
- },
398
- {
399
- "epoch": 0.89,
400
- "learning_rate": 3.679935012185215e-05,
401
- "loss": 0.6764,
402
- "step": 1180
403
- },
404
- {
405
- "epoch": 0.9,
406
- "learning_rate": 3.192526401299756e-05,
407
- "loss": 0.6745,
408
- "step": 1200
409
- },
410
- {
411
- "epoch": 0.9,
412
- "eval_loss": 0.6754332184791565,
413
- "eval_runtime": 128.9887,
414
- "eval_samples_per_second": 15.505,
415
- "eval_steps_per_second": 1.938,
416
- "step": 1200
417
- }
418
- ],
419
- "max_steps": 1331,
420
- "num_train_epochs": 1,
421
- "total_flos": 1.559857496850432e+18,
422
- "trial_name": null,
423
- "trial_params": null
424
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
llama/cce/checkpoint-800/trainer_state.json DELETED
@@ -1,288 +0,0 @@
1
- {
2
- "best_metric": 0.6822482943534851,
3
- "best_model_checkpoint": "lora-alpaca/checkpoint-800",
4
- "epoch": 0.600953074015822,
5
- "global_step": 800,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.02,
12
- "learning_rate": 5.9999999999999995e-05,
13
- "loss": 2.2234,
14
- "step": 20
15
- },
16
- {
17
- "epoch": 0.03,
18
- "learning_rate": 0.00011999999999999999,
19
- "loss": 1.6986,
20
- "step": 40
21
- },
22
- {
23
- "epoch": 0.05,
24
- "learning_rate": 0.00017999999999999998,
25
- "loss": 1.0108,
26
- "step": 60
27
- },
28
- {
29
- "epoch": 0.06,
30
- "learning_rate": 0.00023999999999999998,
31
- "loss": 0.8498,
32
- "step": 80
33
- },
34
- {
35
- "epoch": 0.08,
36
- "learning_rate": 0.0003,
37
- "loss": 0.7975,
38
- "step": 100
39
- },
40
- {
41
- "epoch": 0.09,
42
- "learning_rate": 0.00029512591389114535,
43
- "loss": 0.7621,
44
- "step": 120
45
- },
46
- {
47
- "epoch": 0.11,
48
- "learning_rate": 0.0002902518277822908,
49
- "loss": 0.7465,
50
- "step": 140
51
- },
52
- {
53
- "epoch": 0.12,
54
- "learning_rate": 0.0002853777416734362,
55
- "loss": 0.7367,
56
- "step": 160
57
- },
58
- {
59
- "epoch": 0.14,
60
- "learning_rate": 0.0002805036555645816,
61
- "loss": 0.732,
62
- "step": 180
63
- },
64
- {
65
- "epoch": 0.15,
66
- "learning_rate": 0.00027562956945572704,
67
- "loss": 0.7253,
68
- "step": 200
69
- },
70
- {
71
- "epoch": 0.15,
72
- "eval_loss": 0.7244793176651001,
73
- "eval_runtime": 129.0223,
74
- "eval_samples_per_second": 15.501,
75
- "eval_steps_per_second": 1.938,
76
- "step": 200
77
- },
78
- {
79
- "epoch": 0.17,
80
- "learning_rate": 0.0002707554833468724,
81
- "loss": 0.7226,
82
- "step": 220
83
- },
84
- {
85
- "epoch": 0.18,
86
- "learning_rate": 0.00026588139723801785,
87
- "loss": 0.716,
88
- "step": 240
89
- },
90
- {
91
- "epoch": 0.2,
92
- "learning_rate": 0.00026100731112916323,
93
- "loss": 0.7182,
94
- "step": 260
95
- },
96
- {
97
- "epoch": 0.21,
98
- "learning_rate": 0.00025613322502030867,
99
- "loss": 0.7026,
100
- "step": 280
101
- },
102
- {
103
- "epoch": 0.23,
104
- "learning_rate": 0.0002512591389114541,
105
- "loss": 0.7139,
106
- "step": 300
107
- },
108
- {
109
- "epoch": 0.24,
110
- "learning_rate": 0.0002463850528025995,
111
- "loss": 0.7156,
112
- "step": 320
113
- },
114
- {
115
- "epoch": 0.26,
116
- "learning_rate": 0.00024151096669374492,
117
- "loss": 0.7069,
118
- "step": 340
119
- },
120
- {
121
- "epoch": 0.27,
122
- "learning_rate": 0.0002366368805848903,
123
- "loss": 0.6998,
124
- "step": 360
125
- },
126
- {
127
- "epoch": 0.29,
128
- "learning_rate": 0.0002317627944760357,
129
- "loss": 0.7058,
130
- "step": 380
131
- },
132
- {
133
- "epoch": 0.3,
134
- "learning_rate": 0.00022688870836718114,
135
- "loss": 0.7004,
136
- "step": 400
137
- },
138
- {
139
- "epoch": 0.3,
140
- "eval_loss": 0.6993061900138855,
141
- "eval_runtime": 129.0376,
142
- "eval_samples_per_second": 15.499,
143
- "eval_steps_per_second": 1.937,
144
- "step": 400
145
- },
146
- {
147
- "epoch": 0.32,
148
- "learning_rate": 0.00022201462225832652,
149
- "loss": 0.6933,
150
- "step": 420
151
- },
152
- {
153
- "epoch": 0.33,
154
- "learning_rate": 0.00021714053614947196,
155
- "loss": 0.6868,
156
- "step": 440
157
- },
158
- {
159
- "epoch": 0.35,
160
- "learning_rate": 0.00021226645004061737,
161
- "loss": 0.6955,
162
- "step": 460
163
- },
164
- {
165
- "epoch": 0.36,
166
- "learning_rate": 0.00020739236393176277,
167
- "loss": 0.6878,
168
- "step": 480
169
- },
170
- {
171
- "epoch": 0.38,
172
- "learning_rate": 0.00020251827782290818,
173
- "loss": 0.6891,
174
- "step": 500
175
- },
176
- {
177
- "epoch": 0.39,
178
- "learning_rate": 0.0001976441917140536,
179
- "loss": 0.686,
180
- "step": 520
181
- },
182
- {
183
- "epoch": 0.41,
184
- "learning_rate": 0.00019277010560519902,
185
- "loss": 0.6977,
186
- "step": 540
187
- },
188
- {
189
- "epoch": 0.42,
190
- "learning_rate": 0.00018789601949634443,
191
- "loss": 0.688,
192
- "step": 560
193
- },
194
- {
195
- "epoch": 0.44,
196
- "learning_rate": 0.0001830219333874898,
197
- "loss": 0.6838,
198
- "step": 580
199
- },
200
- {
201
- "epoch": 0.45,
202
- "learning_rate": 0.00017814784727863525,
203
- "loss": 0.682,
204
- "step": 600
205
- },
206
- {
207
- "epoch": 0.45,
208
- "eval_loss": 0.6886340379714966,
209
- "eval_runtime": 128.9722,
210
- "eval_samples_per_second": 15.507,
211
- "eval_steps_per_second": 1.938,
212
- "step": 600
213
- },
214
- {
215
- "epoch": 0.47,
216
- "learning_rate": 0.00017327376116978065,
217
- "loss": 0.6879,
218
- "step": 620
219
- },
220
- {
221
- "epoch": 0.48,
222
- "learning_rate": 0.0001683996750609261,
223
- "loss": 0.6871,
224
- "step": 640
225
- },
226
- {
227
- "epoch": 0.5,
228
- "learning_rate": 0.00016352558895207147,
229
- "loss": 0.6823,
230
- "step": 660
231
- },
232
- {
233
- "epoch": 0.51,
234
- "learning_rate": 0.00015865150284321688,
235
- "loss": 0.6732,
236
- "step": 680
237
- },
238
- {
239
- "epoch": 0.53,
240
- "learning_rate": 0.0001537774167343623,
241
- "loss": 0.6832,
242
- "step": 700
243
- },
244
- {
245
- "epoch": 0.54,
246
- "learning_rate": 0.0001489033306255077,
247
- "loss": 0.6795,
248
- "step": 720
249
- },
250
- {
251
- "epoch": 0.56,
252
- "learning_rate": 0.00014402924451665313,
253
- "loss": 0.686,
254
- "step": 740
255
- },
256
- {
257
- "epoch": 0.57,
258
- "learning_rate": 0.00013915515840779853,
259
- "loss": 0.6799,
260
- "step": 760
261
- },
262
- {
263
- "epoch": 0.59,
264
- "learning_rate": 0.00013428107229894394,
265
- "loss": 0.6738,
266
- "step": 780
267
- },
268
- {
269
- "epoch": 0.6,
270
- "learning_rate": 0.00012940698619008935,
271
- "loss": 0.6801,
272
- "step": 800
273
- },
274
- {
275
- "epoch": 0.6,
276
- "eval_loss": 0.6822482943534851,
277
- "eval_runtime": 128.9861,
278
- "eval_samples_per_second": 15.506,
279
- "eval_steps_per_second": 1.938,
280
- "step": 800
281
- }
282
- ],
283
- "max_steps": 1331,
284
- "num_train_epochs": 1,
285
- "total_flos": 1.039904997900288e+18,
286
- "trial_name": null,
287
- "trial_params": null
288
- }