valteu commited on
Commit
a96c046
·
verified ·
1 Parent(s): 59f6890

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ experiment_config.json filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 128000,
8
+ "eos_token_id": [
9
+ 128001,
10
+ 128008,
11
+ 128009
12
+ ],
13
+ "head_dim": 64,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 2048,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 8192,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 32,
22
+ "num_hidden_layers": 16,
23
+ "num_key_value_heads": 8,
24
+ "pad_token_id": 128004,
25
+ "pretraining_tp": 1,
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_scaling": {
28
+ "factor": 32.0,
29
+ "high_freq_factor": 4.0,
30
+ "low_freq_factor": 1.0,
31
+ "original_max_position_embeddings": 8192,
32
+ "rope_type": "llama3"
33
+ },
34
+ "rope_theta": 500000.0,
35
+ "tie_word_embeddings": true,
36
+ "torch_dtype": "bfloat16",
37
+ "transformers_version": "4.52.4",
38
+ "use_cache": true,
39
+ "vocab_size": 128256
40
+ }
experiment_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f34bb02f52934d2e67815b21048993ff4aa682315d94960dcb50fe22fec8f2d3
3
+ size 16630237
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.52.4"
12
+ }
logs.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25f283317336d7af3e927e3a36af9c657eb1f368851abe7078040fa99ade7759
3
+ size 2471645608
profiler_cache.csv ADDED
The diff for this file is too large to render. See raw diff
 
results.json ADDED
@@ -0,0 +1,359 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "results": {
3
+ "nlg_e2e_nlg": [
4
+ {
5
+ "rougeL": {
6
+ "precision": 0.5233476402546956,
7
+ "recall": 0.5004834066168418,
8
+ "fmeasure": 0.5033778423328681
9
+ },
10
+ "rouge1": {
11
+ "precision": 0.738364772761526,
12
+ "recall": 0.7074705457282299,
13
+ "fmeasure": 0.711331856203474
14
+ },
15
+ "rouge2": {
16
+ "precision": 0.4466746680451028,
17
+ "recall": 0.42630017746924503,
18
+ "fmeasure": 0.42900092595783323
19
+ }
20
+ },
21
+ [
22
+ null,
23
+ null,
24
+ null,
25
+ null,
26
+ null,
27
+ null,
28
+ null,
29
+ null,
30
+ null,
31
+ null,
32
+ null,
33
+ null,
34
+ null,
35
+ null,
36
+ null,
37
+ null,
38
+ null,
39
+ null,
40
+ null,
41
+ null,
42
+ null,
43
+ null,
44
+ null,
45
+ null,
46
+ null,
47
+ null,
48
+ null,
49
+ null,
50
+ null,
51
+ null,
52
+ null,
53
+ null,
54
+ null,
55
+ null,
56
+ null,
57
+ null,
58
+ null,
59
+ null,
60
+ null,
61
+ null,
62
+ null,
63
+ null,
64
+ null,
65
+ null,
66
+ null,
67
+ null,
68
+ null,
69
+ null,
70
+ null,
71
+ null,
72
+ null,
73
+ null,
74
+ null,
75
+ null,
76
+ null,
77
+ null,
78
+ null,
79
+ null,
80
+ null,
81
+ null,
82
+ null,
83
+ null,
84
+ null,
85
+ null,
86
+ null,
87
+ null,
88
+ null,
89
+ null,
90
+ null,
91
+ null,
92
+ null,
93
+ null,
94
+ null,
95
+ null,
96
+ null,
97
+ null,
98
+ null,
99
+ null,
100
+ null,
101
+ null,
102
+ null,
103
+ null,
104
+ null,
105
+ null,
106
+ null,
107
+ null,
108
+ null,
109
+ null,
110
+ null,
111
+ null,
112
+ null,
113
+ null,
114
+ null,
115
+ null,
116
+ null,
117
+ null,
118
+ null,
119
+ null,
120
+ null,
121
+ null,
122
+ null,
123
+ null,
124
+ null,
125
+ null,
126
+ null,
127
+ null,
128
+ null,
129
+ null,
130
+ null,
131
+ null,
132
+ null,
133
+ null,
134
+ null,
135
+ null,
136
+ null,
137
+ null
138
+ ]
139
+ ],
140
+ "nlg_web_nlg": [
141
+ {
142
+ "rougeL": {
143
+ "precision": 0.3597560446757388,
144
+ "recall": 0.4183793751084057,
145
+ "fmeasure": 0.3678788723981249
146
+ },
147
+ "rouge1": {
148
+ "precision": 0.4936662604776612,
149
+ "recall": 0.5615526335641241,
150
+ "fmeasure": 0.5006596346354545
151
+ },
152
+ "rouge2": {
153
+ "precision": 0.24885348231066495,
154
+ "recall": 0.2811018840034842,
155
+ "fmeasure": 0.25124413866199724
156
+ }
157
+ },
158
+ [
159
+ null,
160
+ null,
161
+ null,
162
+ null,
163
+ null,
164
+ null,
165
+ null,
166
+ null,
167
+ null,
168
+ null,
169
+ null,
170
+ null,
171
+ null,
172
+ null,
173
+ null,
174
+ null,
175
+ null,
176
+ null,
177
+ null,
178
+ null,
179
+ null,
180
+ null,
181
+ null,
182
+ null,
183
+ null,
184
+ null,
185
+ null,
186
+ null,
187
+ null,
188
+ null,
189
+ null,
190
+ null,
191
+ null,
192
+ null,
193
+ null,
194
+ null,
195
+ null,
196
+ null,
197
+ null,
198
+ null,
199
+ null,
200
+ null,
201
+ null,
202
+ null,
203
+ null,
204
+ null,
205
+ null,
206
+ null,
207
+ null,
208
+ null,
209
+ null,
210
+ null,
211
+ null,
212
+ null,
213
+ null,
214
+ null,
215
+ null,
216
+ null,
217
+ null,
218
+ null,
219
+ null,
220
+ null,
221
+ null,
222
+ null,
223
+ null,
224
+ null,
225
+ null,
226
+ null,
227
+ null,
228
+ null,
229
+ null,
230
+ null,
231
+ null,
232
+ null,
233
+ null,
234
+ null,
235
+ null,
236
+ null,
237
+ null,
238
+ null,
239
+ null,
240
+ null,
241
+ null,
242
+ null,
243
+ null,
244
+ null,
245
+ null,
246
+ null,
247
+ null,
248
+ null,
249
+ null,
250
+ null,
251
+ null,
252
+ null,
253
+ null,
254
+ null,
255
+ null,
256
+ null,
257
+ null,
258
+ null,
259
+ null,
260
+ null,
261
+ null,
262
+ null,
263
+ null,
264
+ null,
265
+ null,
266
+ null,
267
+ null,
268
+ null,
269
+ null,
270
+ null
271
+ ]
272
+ ],
273
+ "nlg_samsum": [
274
+ {
275
+ "rougeL": {
276
+ "precision": 0.14660479219951678,
277
+ "recall": 0.34662300022901293,
278
+ "fmeasure": 0.18932591116239866
279
+ },
280
+ "rouge1": {
281
+ "precision": 0.20407216737191722,
282
+ "recall": 0.4699403581756143,
283
+ "fmeasure": 0.26255445182584775
284
+ },
285
+ "rouge2": {
286
+ "precision": 0.0635705246792896,
287
+ "recall": 0.14622738377877775,
288
+ "fmeasure": 0.08132512098584845
289
+ }
290
+ },
291
+ [
292
+ null,
293
+ null,
294
+ null,
295
+ null,
296
+ null,
297
+ null,
298
+ null,
299
+ null,
300
+ null,
301
+ null,
302
+ null,
303
+ null,
304
+ null,
305
+ null,
306
+ null,
307
+ null,
308
+ null,
309
+ null,
310
+ null,
311
+ null,
312
+ null,
313
+ null,
314
+ null,
315
+ null,
316
+ null,
317
+ null,
318
+ null,
319
+ null,
320
+ null,
321
+ null,
322
+ null,
323
+ null,
324
+ null,
325
+ null,
326
+ null,
327
+ null,
328
+ null,
329
+ null,
330
+ null,
331
+ null,
332
+ null,
333
+ null,
334
+ null,
335
+ null,
336
+ null,
337
+ null,
338
+ null,
339
+ null,
340
+ null,
341
+ null,
342
+ null,
343
+ null
344
+ ]
345
+ ],
346
+ "summary": {
347
+ "precision": 0.5182739685640539,
348
+ "recall": 0.5418052882526836,
349
+ "fmeasure": 0.5223936423472224
350
+ }
351
+ },
352
+ "energy": {
353
+ "total": 457659.91993,
354
+ "train": 412865.52845000004,
355
+ "eval": 44794.391480000006
356
+ },
357
+ "train_energy": 412865.52845000004,
358
+ "eval_energy": 44794.391480000006
359
+ }
summary.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "flops": {
3
+ "eval": 15147053117893760,
4
+ "train": 85187890246966032,
5
+ "total": 100334943364859792
6
+ },
7
+ "total": {
8
+ "total": 457659.91993,
9
+ "train": 412865.52845000004,
10
+ "eval": 44794.391480000006
11
+ },
12
+ "best_evals": {
13
+ "pplx": {
14
+ "score": 5.855236127426757,
15
+ "step": 8384
16
+ },
17
+ "rougel": {
18
+ "precision": 0.5182739685640539,
19
+ "recall": 0.5418052882526836,
20
+ "fmeasure": 0.5223936423472224
21
+ }
22
+ }
23
+ }