AbstractPhil commited on
Commit
22930a6
·
verified ·
1 Parent(s): 8d82e1c

Upload folder using huggingface_hub

Browse files
beeper_final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4695a25162370f3bb519f2419bd6d7f11f7b0b043b1eb77a2addc34f18372ce4
3
+ size 116810550
beeper_final.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:058c19b4326e05820b694c1377235963dc099362bdb0a97baff8669997694eb6
3
+ size 116795584
beeper_rose_final.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee0f8495abba954d7125d2cb282d32b18465a3fb5d66550716b6e16ed52aa900
3
+ size 111301146
beeper_rose_final.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b8b682beba4d8e3a7ad2a4977bc429c7f83120fd2707879d85cd704030c68a2
3
+ size 111286288
config.json ADDED
@@ -0,0 +1,993 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "Rose-Beeper",
3
+ "context": 512,
4
+ "vocab_size": 8192,
5
+ "dim": 512,
6
+ "n_layers": 6,
7
+ "n_heads": 8,
8
+ "mlp_ratio": 4.0,
9
+ "dropout": 0.0,
10
+ "resid_dropout": 0.1,
11
+ "grad_checkpoint": false,
12
+ "compile_model": false,
13
+ "tokenizer_path": "beeper.tokenizer.json",
14
+ "add_bos_eos": true,
15
+ "span_corrupt_frac": 0.0,
16
+ "val_ratio": 0.01,
17
+ "test_ratio": 0.01,
18
+ "max_rows_per_dataset": null,
19
+ "dataset_cache_verbose": true,
20
+ "batch_size": 32,
21
+ "grad_accum_steps": 1,
22
+ "epochs": 10,
23
+ "lr": 0.0003,
24
+ "betas": [
25
+ 0.9,
26
+ 0.95
27
+ ],
28
+ "weight_decay": 0.1,
29
+ "warmup_steps": 500,
30
+ "max_steps": null,
31
+ "clip_grad": 1.0,
32
+ "min_lr": 1e-06,
33
+ "label_smoothing": 0.0,
34
+ "mixed_precision": "bf16",
35
+ "log_dir": "./runs/rose_beeper",
36
+ "log_interval": 50,
37
+ "ckpt_dir": "./beeper_checkpoints",
38
+ "export_dir": "./beeper_export",
39
+ "temperature": 0.9,
40
+ "top_k": 40,
41
+ "top_p": 0.9,
42
+ "repetition_penalty": 1.1,
43
+ "presence_penalty": 0.6,
44
+ "frequency_penalty": 0.0,
45
+ "hf_repo": "AbstractPhil/beeper-rose-v4",
46
+ "upload_to_hub": true,
47
+ "resume": true,
48
+ "resume_tag": "best_model.safetensors",
49
+ "resume_strict": false,
50
+ "pent_level": "medium",
51
+ "lambda_contrast": 0.25,
52
+ "pent_min_edge": 0.5,
53
+ "pent_temp": 0.1,
54
+ "contrast_warmup": 800,
55
+ "lambda_rose": 0.1,
56
+ "rose_scale": 1.8,
57
+ "lambda_geom_sep": 0.5,
58
+ "geom_sep_margin": 0.9,
59
+ "lambda_geom": 0.3,
60
+ "lambda_geom_angle": 0.8,
61
+ "lambda_geom_var": 0.3,
62
+ "lambda_geom_edge": 0.3,
63
+ "lambda_geom_vol": 0.6,
64
+ "lambda_geom_minrel": 1.0,
65
+ "geom_min_edge_rel": 0.6,
66
+ "geom_vol_lower_frac": 0.85,
67
+ "geom_sample_classes": 64,
68
+ "geom_sample_k": 64,
69
+ "seed": 1337,
70
+ "corpus": [
71
+ {
72
+ "name": "TinyStories",
73
+ "path": "roneneldan/TinyStories",
74
+ "split": "train[20%:30%]",
75
+ "weight": 0.1,
76
+ "dialect": [
77
+ 0.6,
78
+ 0.1,
79
+ 0.05,
80
+ 0.05,
81
+ 0.2
82
+ ]
83
+ },
84
+ {
85
+ "name": "WikipediaEN",
86
+ "path": "wikimedia/wikipedia",
87
+ "config": "20231101.en",
88
+ "split": "train[2%:5%]",
89
+ "weight": 0.4,
90
+ "dialect": [
91
+ 0.12,
92
+ 0.58,
93
+ 0.1,
94
+ 0.1,
95
+ 0.1
96
+ ]
97
+ },
98
+ {
99
+ "name": "AGNews",
100
+ "path": "ag_news",
101
+ "split": "train[50%:]",
102
+ "weight": 0.1,
103
+ "dialect": [
104
+ 0.2,
105
+ 0.5,
106
+ 0.1,
107
+ 0.1,
108
+ 0.1
109
+ ]
110
+ },
111
+ {
112
+ "name": "GSM8K",
113
+ "path": "openai/gsm8k",
114
+ "config": "main",
115
+ "split": "train[20%:40%]",
116
+ "weight": 0.6,
117
+ "dialect": [
118
+ 0.1,
119
+ 0.15,
120
+ 0.5,
121
+ 0.15,
122
+ 0.1
123
+ ]
124
+ },
125
+ {
126
+ "name": "AI2-ARC-Easy",
127
+ "path": "allenai/ai2_arc",
128
+ "config": "ARC-Easy",
129
+ "split": "train[20%:30%]",
130
+ "weight": 0.5,
131
+ "dialect": [
132
+ 0.05,
133
+ 0.15,
134
+ 0.4,
135
+ 0.25,
136
+ 0.15
137
+ ]
138
+ },
139
+ {
140
+ "name": "HH-RLHF",
141
+ "path": "Anthropic/hh-rlhf",
142
+ "split": "train[2%:5%]",
143
+ "weight": 0.4,
144
+ "dialect": [
145
+ 0.1,
146
+ 0.25,
147
+ 0.2,
148
+ 0.25,
149
+ 0.2
150
+ ]
151
+ },
152
+ {
153
+ "name": "SVAMP",
154
+ "path": "ChilleD/SVAMP",
155
+ "split": "train",
156
+ "weight": 0.25,
157
+ "dialect": [
158
+ 0.1,
159
+ 0.15,
160
+ 0.55,
161
+ 0.15,
162
+ 0.05
163
+ ]
164
+ },
165
+ {
166
+ "name": "MATH-500",
167
+ "path": "HuggingFaceH4/MATH-500",
168
+ "split": "test",
169
+ "weight": 0.25,
170
+ "dialect": [
171
+ 0.05,
172
+ 0.15,
173
+ 0.6,
174
+ 0.15,
175
+ 0.05
176
+ ]
177
+ },
178
+ {
179
+ "name": "SEP",
180
+ "path": "AiresPucrs/stanford-encyclopedia-philosophy",
181
+ "split": "train",
182
+ "weight": 0.3,
183
+ "dialect": [
184
+ 0.05,
185
+ 0.45,
186
+ 0.18,
187
+ 0.22,
188
+ 0.1
189
+ ]
190
+ },
191
+ {
192
+ "name": "ETHICS-commonsense",
193
+ "path": "hendrycks/ethics",
194
+ "config": "commonsense",
195
+ "split": "train",
196
+ "weight": 0.45,
197
+ "dialect": [
198
+ 0.1,
199
+ 0.3,
200
+ 0.18,
201
+ 0.24,
202
+ 0.18
203
+ ]
204
+ },
205
+ {
206
+ "name": "ETHICS-deontology",
207
+ "path": "hendrycks/ethics",
208
+ "config": "deontology",
209
+ "split": "train",
210
+ "weight": 0.35,
211
+ "dialect": [
212
+ 0.1,
213
+ 0.3,
214
+ 0.18,
215
+ 0.24,
216
+ 0.18
217
+ ]
218
+ },
219
+ {
220
+ "name": "ETHICS-justice",
221
+ "path": "hendrycks/ethics",
222
+ "config": "justice",
223
+ "split": "train",
224
+ "weight": 0.35,
225
+ "dialect": [
226
+ 0.1,
227
+ 0.3,
228
+ 0.18,
229
+ 0.24,
230
+ 0.18
231
+ ]
232
+ },
233
+ {
234
+ "name": "ETHICS-utilitarianism",
235
+ "path": "hendrycks/ethics",
236
+ "config": "utilitarianism",
237
+ "split": "train",
238
+ "weight": 0.35,
239
+ "dialect": [
240
+ 0.1,
241
+ 0.3,
242
+ 0.18,
243
+ 0.24,
244
+ 0.18
245
+ ]
246
+ },
247
+ {
248
+ "name": "ETHICS-virtue",
249
+ "path": "hendrycks/ethics",
250
+ "config": "virtue",
251
+ "split": "train",
252
+ "weight": 0.35,
253
+ "dialect": [
254
+ 0.1,
255
+ 0.3,
256
+ 0.18,
257
+ 0.24,
258
+ 0.18
259
+ ]
260
+ },
261
+ {
262
+ "name": "SocialChem101",
263
+ "path": "allenai/social-chemistry-101",
264
+ "split": "train",
265
+ "weight": 0.65,
266
+ "dialect": [
267
+ 0.15,
268
+ 0.25,
269
+ 0.2,
270
+ 0.2,
271
+ 0.2
272
+ ]
273
+ },
274
+ {
275
+ "name": "MoralStories",
276
+ "path": "demelin/moral_stories",
277
+ "split": "train",
278
+ "weight": 0.35,
279
+ "dialect": [
280
+ 0.2,
281
+ 0.2,
282
+ 0.2,
283
+ 0.2,
284
+ 0.2
285
+ ]
286
+ },
287
+ {
288
+ "name": "ART-AbductiveNLI",
289
+ "path": "allenai/art",
290
+ "split": "train",
291
+ "weight": 0.3,
292
+ "dialect": [
293
+ 0.05,
294
+ 0.2,
295
+ 0.45,
296
+ 0.2,
297
+ 0.1
298
+ ]
299
+ },
300
+ {
301
+ "name": "EntailmentBankV3",
302
+ "path": "ariesutiono/entailment-bank-v3",
303
+ "split": "train",
304
+ "weight": 0.35,
305
+ "dialect": [
306
+ 0.05,
307
+ 0.25,
308
+ 0.45,
309
+ 0.15,
310
+ 0.1
311
+ ]
312
+ },
313
+ {
314
+ "name": "LogiQA2.0NLI",
315
+ "path": "tasksource/logiqa-2.0-nli",
316
+ "split": "train",
317
+ "weight": 0.65,
318
+ "dialect": [
319
+ 0.05,
320
+ 0.25,
321
+ 0.45,
322
+ 0.15,
323
+ 0.1
324
+ ]
325
+ },
326
+ {
327
+ "name": "TruthfulQA-MC",
328
+ "path": "EleutherAI/truthful_qa_mc",
329
+ "split": "validation",
330
+ "weight": 0.65,
331
+ "dialect": [
332
+ 0.05,
333
+ 0.35,
334
+ 0.25,
335
+ 0.25,
336
+ 0.1
337
+ ]
338
+ },
339
+ {
340
+ "name": "VUA20-Metaphor",
341
+ "path": "CreativeLang/vua20_metaphor",
342
+ "split": "train[5%:15%]",
343
+ "weight": 0.4,
344
+ "dialect": [
345
+ 0.3,
346
+ 0.1,
347
+ 0.1,
348
+ 0.15,
349
+ 0.35
350
+ ]
351
+ }
352
+ ],
353
+ "capoera": {
354
+ "enable": true,
355
+ "topic_bins": 512,
356
+ "mood_bins": 7
357
+ },
358
+ "_ok_entries": [
359
+ {
360
+ "name": "TinyStories",
361
+ "path": "roneneldan/TinyStories",
362
+ "split": "train[20%:30%]",
363
+ "weight": 0.1,
364
+ "dialect": [
365
+ 0.6000000238418579,
366
+ 0.10000000149011612,
367
+ 0.05000000074505806,
368
+ 0.05000000074505806,
369
+ 0.20000000298023224
370
+ ],
371
+ "class_id": 0,
372
+ "p": 0.01234567901234568
373
+ },
374
+ {
375
+ "name": "WikipediaEN",
376
+ "path": "wikimedia/wikipedia",
377
+ "config": "20231101.en",
378
+ "split": "train[2%:5%]",
379
+ "weight": 0.4,
380
+ "dialect": [
381
+ 0.11999999731779099,
382
+ 0.5799999833106995,
383
+ 0.10000000149011612,
384
+ 0.10000000149011612,
385
+ 0.10000000149011612
386
+ ],
387
+ "class_id": 1,
388
+ "p": 0.04938271604938272
389
+ },
390
+ {
391
+ "name": "AGNews",
392
+ "path": "ag_news",
393
+ "split": "train[50%:]",
394
+ "weight": 0.1,
395
+ "dialect": [
396
+ 0.20000000298023224,
397
+ 0.5,
398
+ 0.10000000149011612,
399
+ 0.10000000149011612,
400
+ 0.10000000149011612
401
+ ],
402
+ "class_id": 2,
403
+ "p": 0.01234567901234568
404
+ },
405
+ {
406
+ "name": "GSM8K",
407
+ "path": "openai/gsm8k",
408
+ "config": "main",
409
+ "split": "train[20%:40%]",
410
+ "weight": 0.6,
411
+ "dialect": [
412
+ 0.10000000149011612,
413
+ 0.15000000596046448,
414
+ 0.5,
415
+ 0.15000000596046448,
416
+ 0.10000000149011612
417
+ ],
418
+ "class_id": 3,
419
+ "p": 0.07407407407407407
420
+ },
421
+ {
422
+ "name": "AI2-ARC-Easy",
423
+ "path": "allenai/ai2_arc",
424
+ "config": "ARC-Easy",
425
+ "split": "train[20%:30%]",
426
+ "weight": 0.5,
427
+ "dialect": [
428
+ 0.05000000074505806,
429
+ 0.15000000596046448,
430
+ 0.4000000059604645,
431
+ 0.25,
432
+ 0.15000000596046448
433
+ ],
434
+ "class_id": 4,
435
+ "p": 0.0617283950617284
436
+ },
437
+ {
438
+ "name": "HH-RLHF",
439
+ "path": "Anthropic/hh-rlhf",
440
+ "split": "train[2%:5%]",
441
+ "weight": 0.4,
442
+ "dialect": [
443
+ 0.10000000149011612,
444
+ 0.25,
445
+ 0.20000000298023224,
446
+ 0.25,
447
+ 0.20000000298023224
448
+ ],
449
+ "class_id": 5,
450
+ "p": 0.04938271604938272
451
+ },
452
+ {
453
+ "name": "SVAMP",
454
+ "path": "ChilleD/SVAMP",
455
+ "split": "train",
456
+ "weight": 0.25,
457
+ "dialect": [
458
+ 0.10000000149011612,
459
+ 0.15000000596046448,
460
+ 0.550000011920929,
461
+ 0.15000000596046448,
462
+ 0.05000000074505806
463
+ ],
464
+ "class_id": 6,
465
+ "p": 0.0308641975308642
466
+ },
467
+ {
468
+ "name": "MATH-500",
469
+ "path": "HuggingFaceH4/MATH-500",
470
+ "split": "test",
471
+ "weight": 0.25,
472
+ "dialect": [
473
+ 0.05000000074505806,
474
+ 0.15000000596046448,
475
+ 0.6000000238418579,
476
+ 0.15000000596046448,
477
+ 0.05000000074505806
478
+ ],
479
+ "class_id": 7,
480
+ "p": 0.0308641975308642
481
+ },
482
+ {
483
+ "name": "SEP",
484
+ "path": "AiresPucrs/stanford-encyclopedia-philosophy",
485
+ "split": "train",
486
+ "weight": 0.3,
487
+ "dialect": [
488
+ 0.05000000074505806,
489
+ 0.44999998807907104,
490
+ 0.18000000715255737,
491
+ 0.2199999988079071,
492
+ 0.10000000149011612
493
+ ],
494
+ "class_id": 8,
495
+ "p": 0.037037037037037035
496
+ },
497
+ {
498
+ "name": "ETHICS-commonsense",
499
+ "path": "hendrycks/ethics",
500
+ "config": "commonsense",
501
+ "split": "train",
502
+ "weight": 0.45,
503
+ "dialect": [
504
+ 0.10000000149011612,
505
+ 0.30000001192092896,
506
+ 0.18000000715255737,
507
+ 0.23999999463558197,
508
+ 0.18000000715255737
509
+ ],
510
+ "class_id": 9,
511
+ "p": 0.05555555555555556
512
+ },
513
+ {
514
+ "name": "ETHICS-deontology",
515
+ "path": "hendrycks/ethics",
516
+ "config": "deontology",
517
+ "split": "train",
518
+ "weight": 0.35,
519
+ "dialect": [
520
+ 0.10000000149011612,
521
+ 0.30000001192092896,
522
+ 0.18000000715255737,
523
+ 0.23999999463558197,
524
+ 0.18000000715255737
525
+ ],
526
+ "class_id": 10,
527
+ "p": 0.043209876543209874
528
+ },
529
+ {
530
+ "name": "ETHICS-justice",
531
+ "path": "hendrycks/ethics",
532
+ "config": "justice",
533
+ "split": "train",
534
+ "weight": 0.35,
535
+ "dialect": [
536
+ 0.10000000149011612,
537
+ 0.30000001192092896,
538
+ 0.18000000715255737,
539
+ 0.23999999463558197,
540
+ 0.18000000715255737
541
+ ],
542
+ "class_id": 11,
543
+ "p": 0.043209876543209874
544
+ },
545
+ {
546
+ "name": "ETHICS-utilitarianism",
547
+ "path": "hendrycks/ethics",
548
+ "config": "utilitarianism",
549
+ "split": "train",
550
+ "weight": 0.35,
551
+ "dialect": [
552
+ 0.10000000149011612,
553
+ 0.30000001192092896,
554
+ 0.18000000715255737,
555
+ 0.23999999463558197,
556
+ 0.18000000715255737
557
+ ],
558
+ "class_id": 12,
559
+ "p": 0.043209876543209874
560
+ },
561
+ {
562
+ "name": "ETHICS-virtue",
563
+ "path": "hendrycks/ethics",
564
+ "config": "virtue",
565
+ "split": "train",
566
+ "weight": 0.35,
567
+ "dialect": [
568
+ 0.10000000149011612,
569
+ 0.30000001192092896,
570
+ 0.18000000715255737,
571
+ 0.23999999463558197,
572
+ 0.18000000715255737
573
+ ],
574
+ "class_id": 13,
575
+ "p": 0.043209876543209874
576
+ },
577
+ {
578
+ "name": "SocialChem101",
579
+ "path": "allenai/social-chemistry-101",
580
+ "split": "train",
581
+ "weight": 0.65,
582
+ "dialect": [
583
+ 0.15000000596046448,
584
+ 0.25,
585
+ 0.20000000298023224,
586
+ 0.20000000298023224,
587
+ 0.20000000298023224
588
+ ],
589
+ "class_id": 14,
590
+ "p": 0.08024691358024692
591
+ },
592
+ {
593
+ "name": "MoralStories",
594
+ "path": "demelin/moral_stories",
595
+ "split": "train",
596
+ "weight": 0.35,
597
+ "dialect": [
598
+ 0.20000000298023224,
599
+ 0.20000000298023224,
600
+ 0.20000000298023224,
601
+ 0.20000000298023224,
602
+ 0.20000000298023224
603
+ ],
604
+ "class_id": 15,
605
+ "p": 0.043209876543209874
606
+ },
607
+ {
608
+ "name": "ART-AbductiveNLI",
609
+ "path": "allenai/art",
610
+ "split": "train",
611
+ "weight": 0.3,
612
+ "dialect": [
613
+ 0.05000000074505806,
614
+ 0.20000000298023224,
615
+ 0.44999998807907104,
616
+ 0.20000000298023224,
617
+ 0.10000000149011612
618
+ ],
619
+ "class_id": 16,
620
+ "p": 0.037037037037037035
621
+ },
622
+ {
623
+ "name": "EntailmentBankV3",
624
+ "path": "ariesutiono/entailment-bank-v3",
625
+ "split": "train",
626
+ "weight": 0.35,
627
+ "dialect": [
628
+ 0.05000000074505806,
629
+ 0.25,
630
+ 0.44999998807907104,
631
+ 0.15000000596046448,
632
+ 0.10000000149011612
633
+ ],
634
+ "class_id": 17,
635
+ "p": 0.043209876543209874
636
+ },
637
+ {
638
+ "name": "LogiQA2.0NLI",
639
+ "path": "tasksource/logiqa-2.0-nli",
640
+ "split": "train",
641
+ "weight": 0.65,
642
+ "dialect": [
643
+ 0.05000000074505806,
644
+ 0.25,
645
+ 0.44999998807907104,
646
+ 0.15000000596046448,
647
+ 0.10000000149011612
648
+ ],
649
+ "class_id": 18,
650
+ "p": 0.08024691358024692
651
+ },
652
+ {
653
+ "name": "TruthfulQA-MC",
654
+ "path": "EleutherAI/truthful_qa_mc",
655
+ "split": "validation",
656
+ "weight": 0.65,
657
+ "dialect": [
658
+ 0.05000000074505806,
659
+ 0.3499999940395355,
660
+ 0.25,
661
+ 0.25,
662
+ 0.10000000149011612
663
+ ],
664
+ "class_id": 19,
665
+ "p": 0.08024691358024692
666
+ },
667
+ {
668
+ "name": "VUA20-Metaphor",
669
+ "path": "CreativeLang/vua20_metaphor",
670
+ "split": "train[5%:15%]",
671
+ "weight": 0.4,
672
+ "dialect": [
673
+ 0.30000001192092896,
674
+ 0.10000000149011612,
675
+ 0.10000000149011612,
676
+ 0.15000000596046448,
677
+ 0.3499999940395355
678
+ ],
679
+ "class_id": 20,
680
+ "p": 0.04938271604938272
681
+ }
682
+ ],
683
+ "_alive_entries": [
684
+ {
685
+ "name": "TinyStories",
686
+ "path": "roneneldan/TinyStories",
687
+ "split": "train[20%:30%]",
688
+ "weight": 0.1,
689
+ "dialect": [
690
+ 0.6000000238418579,
691
+ 0.10000000149011612,
692
+ 0.05000000074505806,
693
+ 0.05000000074505806,
694
+ 0.20000000298023224
695
+ ],
696
+ "class_id": 0,
697
+ "p": 0.01234567901234568
698
+ },
699
+ {
700
+ "name": "WikipediaEN",
701
+ "path": "wikimedia/wikipedia",
702
+ "config": "20231101.en",
703
+ "split": "train[2%:5%]",
704
+ "weight": 0.4,
705
+ "dialect": [
706
+ 0.11999999731779099,
707
+ 0.5799999833106995,
708
+ 0.10000000149011612,
709
+ 0.10000000149011612,
710
+ 0.10000000149011612
711
+ ],
712
+ "class_id": 1,
713
+ "p": 0.04938271604938272
714
+ },
715
+ {
716
+ "name": "AGNews",
717
+ "path": "ag_news",
718
+ "split": "train[50%:]",
719
+ "weight": 0.1,
720
+ "dialect": [
721
+ 0.20000000298023224,
722
+ 0.5,
723
+ 0.10000000149011612,
724
+ 0.10000000149011612,
725
+ 0.10000000149011612
726
+ ],
727
+ "class_id": 2,
728
+ "p": 0.01234567901234568
729
+ },
730
+ {
731
+ "name": "GSM8K",
732
+ "path": "openai/gsm8k",
733
+ "config": "main",
734
+ "split": "train[20%:40%]",
735
+ "weight": 0.6,
736
+ "dialect": [
737
+ 0.10000000149011612,
738
+ 0.15000000596046448,
739
+ 0.5,
740
+ 0.15000000596046448,
741
+ 0.10000000149011612
742
+ ],
743
+ "class_id": 3,
744
+ "p": 0.07407407407407407
745
+ },
746
+ {
747
+ "name": "AI2-ARC-Easy",
748
+ "path": "allenai/ai2_arc",
749
+ "config": "ARC-Easy",
750
+ "split": "train[20%:30%]",
751
+ "weight": 0.5,
752
+ "dialect": [
753
+ 0.05000000074505806,
754
+ 0.15000000596046448,
755
+ 0.4000000059604645,
756
+ 0.25,
757
+ 0.15000000596046448
758
+ ],
759
+ "class_id": 4,
760
+ "p": 0.0617283950617284
761
+ },
762
+ {
763
+ "name": "HH-RLHF",
764
+ "path": "Anthropic/hh-rlhf",
765
+ "split": "train[2%:5%]",
766
+ "weight": 0.4,
767
+ "dialect": [
768
+ 0.10000000149011612,
769
+ 0.25,
770
+ 0.20000000298023224,
771
+ 0.25,
772
+ 0.20000000298023224
773
+ ],
774
+ "class_id": 5,
775
+ "p": 0.04938271604938272
776
+ },
777
+ {
778
+ "name": "SVAMP",
779
+ "path": "ChilleD/SVAMP",
780
+ "split": "train",
781
+ "weight": 0.25,
782
+ "dialect": [
783
+ 0.10000000149011612,
784
+ 0.15000000596046448,
785
+ 0.550000011920929,
786
+ 0.15000000596046448,
787
+ 0.05000000074505806
788
+ ],
789
+ "class_id": 6,
790
+ "p": 0.0308641975308642
791
+ },
792
+ {
793
+ "name": "MATH-500",
794
+ "path": "HuggingFaceH4/MATH-500",
795
+ "split": "test",
796
+ "weight": 0.25,
797
+ "dialect": [
798
+ 0.05000000074505806,
799
+ 0.15000000596046448,
800
+ 0.6000000238418579,
801
+ 0.15000000596046448,
802
+ 0.05000000074505806
803
+ ],
804
+ "class_id": 7,
805
+ "p": 0.0308641975308642
806
+ },
807
+ {
808
+ "name": "SEP",
809
+ "path": "AiresPucrs/stanford-encyclopedia-philosophy",
810
+ "split": "train",
811
+ "weight": 0.3,
812
+ "dialect": [
813
+ 0.05000000074505806,
814
+ 0.44999998807907104,
815
+ 0.18000000715255737,
816
+ 0.2199999988079071,
817
+ 0.10000000149011612
818
+ ],
819
+ "class_id": 8,
820
+ "p": 0.037037037037037035
821
+ },
822
+ {
823
+ "name": "ETHICS-commonsense",
824
+ "path": "hendrycks/ethics",
825
+ "config": "commonsense",
826
+ "split": "train",
827
+ "weight": 0.45,
828
+ "dialect": [
829
+ 0.10000000149011612,
830
+ 0.30000001192092896,
831
+ 0.18000000715255737,
832
+ 0.23999999463558197,
833
+ 0.18000000715255737
834
+ ],
835
+ "class_id": 9,
836
+ "p": 0.05555555555555556
837
+ },
838
+ {
839
+ "name": "ETHICS-deontology",
840
+ "path": "hendrycks/ethics",
841
+ "config": "deontology",
842
+ "split": "train",
843
+ "weight": 0.35,
844
+ "dialect": [
845
+ 0.10000000149011612,
846
+ 0.30000001192092896,
847
+ 0.18000000715255737,
848
+ 0.23999999463558197,
849
+ 0.18000000715255737
850
+ ],
851
+ "class_id": 10,
852
+ "p": 0.043209876543209874
853
+ },
854
+ {
855
+ "name": "ETHICS-justice",
856
+ "path": "hendrycks/ethics",
857
+ "config": "justice",
858
+ "split": "train",
859
+ "weight": 0.35,
860
+ "dialect": [
861
+ 0.10000000149011612,
862
+ 0.30000001192092896,
863
+ 0.18000000715255737,
864
+ 0.23999999463558197,
865
+ 0.18000000715255737
866
+ ],
867
+ "class_id": 11,
868
+ "p": 0.043209876543209874
869
+ },
870
+ {
871
+ "name": "ETHICS-utilitarianism",
872
+ "path": "hendrycks/ethics",
873
+ "config": "utilitarianism",
874
+ "split": "train",
875
+ "weight": 0.35,
876
+ "dialect": [
877
+ 0.10000000149011612,
878
+ 0.30000001192092896,
879
+ 0.18000000715255737,
880
+ 0.23999999463558197,
881
+ 0.18000000715255737
882
+ ],
883
+ "class_id": 12,
884
+ "p": 0.043209876543209874
885
+ },
886
+ {
887
+ "name": "ETHICS-virtue",
888
+ "path": "hendrycks/ethics",
889
+ "config": "virtue",
890
+ "split": "train",
891
+ "weight": 0.35,
892
+ "dialect": [
893
+ 0.10000000149011612,
894
+ 0.30000001192092896,
895
+ 0.18000000715255737,
896
+ 0.23999999463558197,
897
+ 0.18000000715255737
898
+ ],
899
+ "class_id": 13,
900
+ "p": 0.043209876543209874
901
+ },
902
+ {
903
+ "name": "SocialChem101",
904
+ "path": "allenai/social-chemistry-101",
905
+ "split": "train",
906
+ "weight": 0.65,
907
+ "dialect": [
908
+ 0.15000000596046448,
909
+ 0.25,
910
+ 0.20000000298023224,
911
+ 0.20000000298023224,
912
+ 0.20000000298023224
913
+ ],
914
+ "class_id": 14,
915
+ "p": 0.08024691358024692
916
+ },
917
+ {
918
+ "name": "MoralStories",
919
+ "path": "demelin/moral_stories",
920
+ "split": "train",
921
+ "weight": 0.35,
922
+ "dialect": [
923
+ 0.20000000298023224,
924
+ 0.20000000298023224,
925
+ 0.20000000298023224,
926
+ 0.20000000298023224,
927
+ 0.20000000298023224
928
+ ],
929
+ "class_id": 15,
930
+ "p": 0.043209876543209874
931
+ },
932
+ {
933
+ "name": "ART-AbductiveNLI",
934
+ "path": "allenai/art",
935
+ "split": "train",
936
+ "weight": 0.3,
937
+ "dialect": [
938
+ 0.05000000074505806,
939
+ 0.20000000298023224,
940
+ 0.44999998807907104,
941
+ 0.20000000298023224,
942
+ 0.10000000149011612
943
+ ],
944
+ "class_id": 16,
945
+ "p": 0.037037037037037035
946
+ },
947
+ {
948
+ "name": "LogiQA2.0NLI",
949
+ "path": "tasksource/logiqa-2.0-nli",
950
+ "split": "train",
951
+ "weight": 0.65,
952
+ "dialect": [
953
+ 0.05000000074505806,
954
+ 0.25,
955
+ 0.44999998807907104,
956
+ 0.15000000596046448,
957
+ 0.10000000149011612
958
+ ],
959
+ "class_id": 18,
960
+ "p": 0.08024691358024692
961
+ },
962
+ {
963
+ "name": "TruthfulQA-MC",
964
+ "path": "EleutherAI/truthful_qa_mc",
965
+ "split": "validation",
966
+ "weight": 0.65,
967
+ "dialect": [
968
+ 0.05000000074505806,
969
+ 0.3499999940395355,
970
+ 0.25,
971
+ 0.25,
972
+ 0.10000000149011612
973
+ ],
974
+ "class_id": 19,
975
+ "p": 0.08024691358024692
976
+ },
977
+ {
978
+ "name": "VUA20-Metaphor",
979
+ "path": "CreativeLang/vua20_metaphor",
980
+ "split": "train[5%:15%]",
981
+ "weight": 0.4,
982
+ "dialect": [
983
+ 0.30000001192092896,
984
+ 0.10000000149011612,
985
+ 0.10000000149011612,
986
+ 0.15000000596046448,
987
+ 0.3499999940395355
988
+ ],
989
+ "class_id": 20,
990
+ "p": 0.04938271604938272
991
+ }
992
+ ]
993
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff