valteu commited on
Commit
ef45fbb
·
verified ·
1 Parent(s): b5fb4a6

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ experiment_config.json filter=lfs diff=lfs merge=lfs -text
37
+ logs.jsonl filter=lfs diff=lfs merge=lfs -text
38
+ profiler_cache.csv filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128008,
11
+ 128009
12
+ ],
13
+ "head_dim": 64,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 2048,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 8192,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 16,
23
+ "num_key_value_heads": 8,
24
+ "pad_token_id": 128004,
25
+ "pretraining_tp": 1,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_scaling": {
28
+ "factor": 32.0,
29
+ "high_freq_factor": 4.0,
30
+ "low_freq_factor": 1.0,
31
+ "original_max_position_embeddings": 8192,
32
+ "rope_type": "llama3"
33
+ },
34
+ "rope_theta": 500000.0,
35
+ "tie_word_embeddings": true,
36
+ "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.52.4",
38
+ "use_cache": true,
39
+ "vocab_size": 128256
40
+ }
experiment_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84560359c71dc9999ad184d8751dbd3f8aaf72d4cc9ebefba0809a8a754e923e
3
+ size 42120235
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.52.4"
12
+ }
logs.jsonl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0af9eae0cb53fef32986004acc3140a20d9c82ccd2c2eeb7b557d32da523f66
3
+ size 20075270
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07acb85e77c59c5330c469edbb122caeed92b0c39f25169ca5c983f1ecd2df95
3
+ size 2471645608
profiler_cache.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f2f2972698a1ae6f32d550dfcf5604e795011ab6d3c67bb8633fa49491f6a5b
3
+ size 13181227
results.json ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "nlg_e2e_nlg": [
4
+ {
5
+ "rougeL": {
6
+ "precision": 0.5406017148925292,
7
+ "recall": 0.48800373744280096,
8
+ "fmeasure": 0.5049349309742983
9
+ },
10
+ "rouge1": {
11
+ "precision": 0.7606933187271919,
12
+ "recall": 0.6887970416203039,
13
+ "fmeasure": 0.7119780306931692
14
+ },
15
+ "rouge2": {
16
+ "precision": 0.4593836588032529,
17
+ "recall": 0.4145581793236708,
18
+ "fmeasure": 0.4287808163676091
19
+ }
20
+ },
21
+ [
22
+ null,
23
+ null,
24
+ null,
25
+ null,
26
+ null,
27
+ null,
28
+ null,
29
+ null,
30
+ null,
31
+ null,
32
+ null,
33
+ null,
34
+ null,
35
+ null,
36
+ null,
37
+ null,
38
+ null,
39
+ null,
40
+ null,
41
+ null,
42
+ null,
43
+ null,
44
+ null,
45
+ null,
46
+ null,
47
+ null,
48
+ null,
49
+ null,
50
+ null,
51
+ null,
52
+ null,
53
+ null,
54
+ null,
55
+ null,
56
+ null,
57
+ null,
58
+ null,
59
+ null,
60
+ null,
61
+ null,
62
+ null,
63
+ null,
64
+ null,
65
+ null,
66
+ null,
67
+ null,
68
+ null,
69
+ null,
70
+ null,
71
+ null,
72
+ null,
73
+ null,
74
+ null,
75
+ null,
76
+ null,
77
+ null,
78
+ null,
79
+ null,
80
+ null,
81
+ null,
82
+ null,
83
+ null,
84
+ null,
85
+ null,
86
+ null,
87
+ null,
88
+ null,
89
+ null,
90
+ null,
91
+ null,
92
+ null,
93
+ null,
94
+ null,
95
+ null,
96
+ null,
97
+ null,
98
+ null,
99
+ null,
100
+ null,
101
+ null,
102
+ null,
103
+ null,
104
+ null,
105
+ null,
106
+ null,
107
+ null,
108
+ null,
109
+ null,
110
+ null,
111
+ null,
112
+ null,
113
+ null,
114
+ null,
115
+ null,
116
+ null,
117
+ null,
118
+ null,
119
+ null,
120
+ null,
121
+ null,
122
+ null,
123
+ null,
124
+ null,
125
+ null,
126
+ null,
127
+ null,
128
+ null,
129
+ null,
130
+ null,
131
+ null,
132
+ null,
133
+ null,
134
+ null,
135
+ null,
136
+ null,
137
+ null
138
+ ]
139
+ ],
140
+ "nlg_web_nlg": [
141
+ {
142
+ "rougeL": {
143
+ "precision": 0.5920050740105443,
144
+ "recall": 0.5639361832687325,
145
+ "fmeasure": 0.5719459154017307
146
+ },
147
+ "rouge1": {
148
+ "precision": 0.7688909132287153,
149
+ "recall": 0.7326808024529268,
150
+ "fmeasure": 0.7430172042131359
151
+ },
152
+ "rouge2": {
153
+ "precision": 0.4974154316312452,
154
+ "recall": 0.4738640359810202,
155
+ "fmeasure": 0.4803118688360008
156
+ }
157
+ },
158
+ [
159
+ null,
160
+ null,
161
+ null,
162
+ null,
163
+ null,
164
+ null,
165
+ null,
166
+ null,
167
+ null,
168
+ null,
169
+ null,
170
+ null,
171
+ null,
172
+ null,
173
+ null,
174
+ null,
175
+ null,
176
+ null,
177
+ null,
178
+ null,
179
+ null,
180
+ null,
181
+ null,
182
+ null,
183
+ null,
184
+ null,
185
+ null,
186
+ null,
187
+ null,
188
+ null,
189
+ null,
190
+ null,
191
+ null,
192
+ null,
193
+ null,
194
+ null,
195
+ null,
196
+ null,
197
+ null,
198
+ null,
199
+ null,
200
+ null,
201
+ null,
202
+ null,
203
+ null,
204
+ null,
205
+ null,
206
+ null,
207
+ null,
208
+ null,
209
+ null,
210
+ null,
211
+ null,
212
+ null,
213
+ null,
214
+ null,
215
+ null,
216
+ null,
217
+ null,
218
+ null,
219
+ null,
220
+ null,
221
+ null,
222
+ null,
223
+ null,
224
+ null,
225
+ null,
226
+ null,
227
+ null,
228
+ null,
229
+ null,
230
+ null,
231
+ null,
232
+ null,
233
+ null,
234
+ null,
235
+ null,
236
+ null,
237
+ null,
238
+ null,
239
+ null,
240
+ null,
241
+ null,
242
+ null,
243
+ null,
244
+ null,
245
+ null,
246
+ null,
247
+ null,
248
+ null,
249
+ null,
250
+ null,
251
+ null,
252
+ null,
253
+ null,
254
+ null,
255
+ null,
256
+ null,
257
+ null,
258
+ null,
259
+ null,
260
+ null,
261
+ null,
262
+ null,
263
+ null,
264
+ null,
265
+ null,
266
+ null,
267
+ null,
268
+ null,
269
+ null,
270
+ null
271
+ ]
272
+ ],
273
+ "nlg_samsum": [
274
+ {
275
+ "rougeL": {
276
+ "precision": 0.477362652260863,
277
+ "recall": 0.41249844951572223,
278
+ "fmeasure": 0.41827002038752514
279
+ },
280
+ "rouge1": {
281
+ "precision": 0.5727427889817115,
282
+ "recall": 0.4947975313891264,
283
+ "fmeasure": 0.5018637703753203
284
+ },
285
+ "rouge2": {
286
+ "precision": 0.29084285967694495,
287
+ "recall": 0.2500593965673145,
288
+ "fmeasure": 0.2528424424005315
289
+ }
290
+ },
291
+ [
292
+ null,
293
+ null,
294
+ null,
295
+ null,
296
+ null,
297
+ null,
298
+ null,
299
+ null,
300
+ null,
301
+ null,
302
+ null,
303
+ null,
304
+ null,
305
+ null,
306
+ null,
307
+ null,
308
+ null,
309
+ null,
310
+ null,
311
+ null,
312
+ null,
313
+ null,
314
+ null,
315
+ null,
316
+ null,
317
+ null,
318
+ null,
319
+ null,
320
+ null,
321
+ null,
322
+ null,
323
+ null,
324
+ null,
325
+ null,
326
+ null,
327
+ null,
328
+ null,
329
+ null,
330
+ null,
331
+ null,
332
+ null,
333
+ null,
334
+ null,
335
+ null,
336
+ null,
337
+ null,
338
+ null,
339
+ null,
340
+ null,
341
+ null,
342
+ null,
343
+ null
344
+ ]
345
+ ],
346
+ "summary": {
347
+ "precision": 0.5683189356634631,
348
+ "recall": 0.5535117858316538,
349
+ "fmeasure": 0.5503719881890102
350
+ }
351
+ },
352
+ "energy": {
353
+ "total": 1559351.06314,
354
+ "train": 1303821.16999,
355
+ "eval": 255529.89315000002
356
+ },
357
+ "train_energy": 1303821.16999,
358
+ "eval_energy": 255529.89315000002
359
+ }
summary.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "flops": {
3
+ "eval": 106579590075347200,
4
+ "train": 360972201130439664,
5
+ "total": 467551791205786864
6
+ },
7
+ "total": {
8
+ "total": 1559351.06314,
9
+ "train": 1303821.16999,
10
+ "eval": 255529.89315000002
11
+ },
12
+ "best_evals": {
13
+ "pplx": {
14
+ "score": 5.938638352264631,
15
+ "step": 22219
16
+ },
17
+ "rougel": {
18
+ "precision": 0.5683189356634631,
19
+ "recall": 0.5535117858316538,
20
+ "fmeasure": 0.5503719881890102
21
+ }
22
+ }
23
+ }