ccore commited on
Commit
2639f0a
·
1 Parent(s): e6747b6

Delete checkpoint-2500

Browse files
checkpoint-2500/config.json DELETED
@@ -1,26 +0,0 @@
1
- {
2
- "_name_or_path": "./core2",
3
- "architectures": [
4
- "LlamaForCausalLM"
5
- ],
6
- "bos_token_id": 1,
7
- "eos_token_id": 2,
8
- "hidden_act": "silu",
9
- "hidden_size": 4096,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 11008,
12
- "max_position_embeddings": 4096,
13
- "model_type": "llama",
14
- "num_attention_heads": 32,
15
- "num_hidden_layers": 1,
16
- "num_key_value_heads": 32,
17
- "pretraining_tp": 1,
18
- "rms_norm_eps": 1e-06,
19
- "rope_scaling": null,
20
- "rope_theta": 10000.0,
21
- "tie_word_embeddings": false,
22
- "torch_dtype": "bfloat16",
23
- "transformers_version": "4.34.0.dev0",
24
- "use_cache": true,
25
- "vocab_size": 32000
26
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/generation_config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "bos_token_id": 1,
3
- "do_sample": true,
4
- "eos_token_id": 2,
5
- "max_length": 4096,
6
- "pad_token_id": 0,
7
- "temperature": 0.6,
8
- "top_p": 0.9,
9
- "transformers_version": "4.34.0.dev0"
10
- }
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:207452d1168390f8bf7e0da03e7bb9a9aa38eb6d5387b5a85d363d871c6baf5e
3
- size 1858136133
 
 
 
 
checkpoint-2500/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3af8fa74fa47804a24797b9d185706e77f2ad9bd37916f1a746c442efb8adfe
3
- size 929067029
 
 
 
 
checkpoint-2500/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2278a87cdf86c3f9219223c847f6b27f6b7f15b8226b617f38936e8ff2cbcde
3
- size 14575
 
 
 
 
checkpoint-2500/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4651efbde1cbaf4a0b682250b47a031d03dd4e96bb70202b5d9896030fea354c
3
- size 627
 
 
 
 
checkpoint-2500/special_tokens_map.json DELETED
@@ -1,23 +0,0 @@
1
- {
2
- "bos_token": {
3
- "content": "<s>",
4
- "lstrip": false,
5
- "normalized": false,
6
- "rstrip": false,
7
- "single_word": false
8
- },
9
- "eos_token": {
10
- "content": "</s>",
11
- "lstrip": false,
12
- "normalized": false,
13
- "rstrip": false,
14
- "single_word": false
15
- },
16
- "unk_token": {
17
- "content": "<unk>",
18
- "lstrip": false,
19
- "normalized": false,
20
- "rstrip": false,
21
- "single_word": false
22
- }
23
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-2500/tokenizer.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
- size 499723
 
 
 
 
checkpoint-2500/tokenizer_config.json DELETED
@@ -1,34 +0,0 @@
1
- {
2
- "bos_token": {
3
- "__type": "AddedToken",
4
- "content": "<s>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- "clean_up_tokenization_spaces": false,
11
- "eos_token": {
12
- "__type": "AddedToken",
13
- "content": "</s>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false
18
- },
19
- "legacy": false,
20
- "model_max_length": 1000000000000000019884624838656,
21
- "pad_token": null,
22
- "padding_side": "right",
23
- "sp_model_kwargs": {},
24
- "tokenizer_class": "LlamaTokenizer",
25
- "unk_token": {
26
- "__type": "AddedToken",
27
- "content": "<unk>",
28
- "lstrip": false,
29
- "normalized": false,
30
- "rstrip": false,
31
- "single_word": false
32
- },
33
- "use_default_system_prompt": true
34
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/trainer_state.json DELETED
@@ -1,319 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.11366413385088402,
5
- "eval_steps": 500,
6
- "global_step": 2500,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.0,
13
- "learning_rate": 9.977266527234701e-05,
14
- "loss": 2.6296,
15
- "step": 50
16
- },
17
- {
18
- "epoch": 0.0,
19
- "learning_rate": 9.954533054469402e-05,
20
- "loss": 2.6156,
21
- "step": 100
22
- },
23
- {
24
- "epoch": 0.01,
25
- "learning_rate": 9.931799581704102e-05,
26
- "loss": 2.6035,
27
- "step": 150
28
- },
29
- {
30
- "epoch": 0.01,
31
- "learning_rate": 9.909066108938801e-05,
32
- "loss": 2.5399,
33
- "step": 200
34
- },
35
- {
36
- "epoch": 0.01,
37
- "learning_rate": 9.886332636173502e-05,
38
- "loss": 2.5857,
39
- "step": 250
40
- },
41
- {
42
- "epoch": 0.01,
43
- "learning_rate": 9.863599163408202e-05,
44
- "loss": 2.6078,
45
- "step": 300
46
- },
47
- {
48
- "epoch": 0.02,
49
- "learning_rate": 9.840865690642903e-05,
50
- "loss": 2.5931,
51
- "step": 350
52
- },
53
- {
54
- "epoch": 0.02,
55
- "learning_rate": 9.818132217877604e-05,
56
- "loss": 2.5919,
57
- "step": 400
58
- },
59
- {
60
- "epoch": 0.02,
61
- "learning_rate": 9.795398745112304e-05,
62
- "loss": 2.59,
63
- "step": 450
64
- },
65
- {
66
- "epoch": 0.02,
67
- "learning_rate": 9.772665272347005e-05,
68
- "loss": 2.605,
69
- "step": 500
70
- },
71
- {
72
- "epoch": 0.03,
73
- "learning_rate": 9.749931799581704e-05,
74
- "loss": 2.6026,
75
- "step": 550
76
- },
77
- {
78
- "epoch": 0.03,
79
- "learning_rate": 9.727198326816404e-05,
80
- "loss": 2.5839,
81
- "step": 600
82
- },
83
- {
84
- "epoch": 0.03,
85
- "learning_rate": 9.704464854051105e-05,
86
- "loss": 2.5862,
87
- "step": 650
88
- },
89
- {
90
- "epoch": 0.03,
91
- "learning_rate": 9.681731381285806e-05,
92
- "loss": 2.609,
93
- "step": 700
94
- },
95
- {
96
- "epoch": 0.03,
97
- "learning_rate": 9.658997908520506e-05,
98
- "loss": 2.5759,
99
- "step": 750
100
- },
101
- {
102
- "epoch": 0.04,
103
- "learning_rate": 9.636264435755207e-05,
104
- "loss": 2.6046,
105
- "step": 800
106
- },
107
- {
108
- "epoch": 0.04,
109
- "learning_rate": 9.613530962989907e-05,
110
- "loss": 2.5811,
111
- "step": 850
112
- },
113
- {
114
- "epoch": 0.04,
115
- "learning_rate": 9.590797490224606e-05,
116
- "loss": 2.5797,
117
- "step": 900
118
- },
119
- {
120
- "epoch": 0.04,
121
- "learning_rate": 9.568064017459307e-05,
122
- "loss": 2.5867,
123
- "step": 950
124
- },
125
- {
126
- "epoch": 0.05,
127
- "learning_rate": 9.545330544694008e-05,
128
- "loss": 2.5927,
129
- "step": 1000
130
- },
131
- {
132
- "epoch": 0.05,
133
- "learning_rate": 9.522597071928708e-05,
134
- "loss": 2.568,
135
- "step": 1050
136
- },
137
- {
138
- "epoch": 0.05,
139
- "learning_rate": 9.499863599163409e-05,
140
- "loss": 2.6024,
141
- "step": 1100
142
- },
143
- {
144
- "epoch": 0.05,
145
- "learning_rate": 9.477130126398109e-05,
146
- "loss": 2.5936,
147
- "step": 1150
148
- },
149
- {
150
- "epoch": 0.05,
151
- "learning_rate": 9.45439665363281e-05,
152
- "loss": 2.605,
153
- "step": 1200
154
- },
155
- {
156
- "epoch": 0.06,
157
- "learning_rate": 9.431663180867509e-05,
158
- "loss": 2.5775,
159
- "step": 1250
160
- },
161
- {
162
- "epoch": 0.06,
163
- "learning_rate": 9.40892970810221e-05,
164
- "loss": 2.5752,
165
- "step": 1300
166
- },
167
- {
168
- "epoch": 0.06,
169
- "learning_rate": 9.38619623533691e-05,
170
- "loss": 2.5679,
171
- "step": 1350
172
- },
173
- {
174
- "epoch": 0.06,
175
- "learning_rate": 9.36346276257161e-05,
176
- "loss": 2.5856,
177
- "step": 1400
178
- },
179
- {
180
- "epoch": 0.07,
181
- "learning_rate": 9.340729289806311e-05,
182
- "loss": 2.5787,
183
- "step": 1450
184
- },
185
- {
186
- "epoch": 0.07,
187
- "learning_rate": 9.317995817041012e-05,
188
- "loss": 2.5875,
189
- "step": 1500
190
- },
191
- {
192
- "epoch": 0.07,
193
- "learning_rate": 9.295262344275712e-05,
194
- "loss": 2.5631,
195
- "step": 1550
196
- },
197
- {
198
- "epoch": 0.07,
199
- "learning_rate": 9.272528871510412e-05,
200
- "loss": 2.583,
201
- "step": 1600
202
- },
203
- {
204
- "epoch": 0.08,
205
- "learning_rate": 9.249795398745112e-05,
206
- "loss": 2.5609,
207
- "step": 1650
208
- },
209
- {
210
- "epoch": 0.08,
211
- "learning_rate": 9.227061925979813e-05,
212
- "loss": 2.587,
213
- "step": 1700
214
- },
215
- {
216
- "epoch": 0.08,
217
- "learning_rate": 9.204328453214513e-05,
218
- "loss": 2.5555,
219
- "step": 1750
220
- },
221
- {
222
- "epoch": 0.08,
223
- "learning_rate": 9.181594980449214e-05,
224
- "loss": 2.5488,
225
- "step": 1800
226
- },
227
- {
228
- "epoch": 0.08,
229
- "learning_rate": 9.158861507683914e-05,
230
- "loss": 2.5554,
231
- "step": 1850
232
- },
233
- {
234
- "epoch": 0.09,
235
- "learning_rate": 9.136128034918615e-05,
236
- "loss": 2.5408,
237
- "step": 1900
238
- },
239
- {
240
- "epoch": 0.09,
241
- "learning_rate": 9.113394562153314e-05,
242
- "loss": 2.582,
243
- "step": 1950
244
- },
245
- {
246
- "epoch": 0.09,
247
- "learning_rate": 9.090661089388015e-05,
248
- "loss": 2.5533,
249
- "step": 2000
250
- },
251
- {
252
- "epoch": 0.09,
253
- "learning_rate": 9.067927616622715e-05,
254
- "loss": 2.5432,
255
- "step": 2050
256
- },
257
- {
258
- "epoch": 0.1,
259
- "learning_rate": 9.045194143857416e-05,
260
- "loss": 2.5867,
261
- "step": 2100
262
- },
263
- {
264
- "epoch": 0.1,
265
- "learning_rate": 9.022460671092116e-05,
266
- "loss": 2.5343,
267
- "step": 2150
268
- },
269
- {
270
- "epoch": 0.1,
271
- "learning_rate": 8.999727198326817e-05,
272
- "loss": 2.585,
273
- "step": 2200
274
- },
275
- {
276
- "epoch": 0.1,
277
- "learning_rate": 8.976993725561517e-05,
278
- "loss": 2.5679,
279
- "step": 2250
280
- },
281
- {
282
- "epoch": 0.1,
283
- "learning_rate": 8.954260252796217e-05,
284
- "loss": 2.5515,
285
- "step": 2300
286
- },
287
- {
288
- "epoch": 0.11,
289
- "learning_rate": 8.931526780030917e-05,
290
- "loss": 2.5713,
291
- "step": 2350
292
- },
293
- {
294
- "epoch": 0.11,
295
- "learning_rate": 8.908793307265618e-05,
296
- "loss": 2.5587,
297
- "step": 2400
298
- },
299
- {
300
- "epoch": 0.11,
301
- "learning_rate": 8.886059834500318e-05,
302
- "loss": 2.5774,
303
- "step": 2450
304
- },
305
- {
306
- "epoch": 0.11,
307
- "learning_rate": 8.863326361735019e-05,
308
- "loss": 2.551,
309
- "step": 2500
310
- }
311
- ],
312
- "logging_steps": 50,
313
- "max_steps": 21994,
314
- "num_train_epochs": 1,
315
- "save_steps": 2500,
316
- "total_flos": 4.097549795328e+16,
317
- "trial_name": null,
318
- "trial_params": null
319
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-2500/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2458e0243aaab0a29fd49a56d9466fcdd0e8ef0f37d199202b23e025f597ffca
3
- size 4027