Upload folder using huggingface_hub
Browse files
outputs/assets/benchmarks/hellaswag_detail.csv
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
outputs/assets/benchmarks/hellaswag_overall.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0.21858195578570006
|
outputs/assets/benchmarks/hellaswag_task_scores.csv
ADDED
|
@@ -0,0 +1,193 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
,Task,Score
|
| 2 |
+
0,Applying sunscreen,0.25
|
| 3 |
+
1,Trimming branches or hedges,0.5
|
| 4 |
+
2,Disc dog,0.1875
|
| 5 |
+
3,Wakeboarding,0.23076923076923078
|
| 6 |
+
4,Skateboarding,0.2222222222222222
|
| 7 |
+
5,Waterskiing,0.3684210526315789
|
| 8 |
+
6,Washing hands,0.4
|
| 9 |
+
7,Sailing,0.25
|
| 10 |
+
8,Playing congas,0.125
|
| 11 |
+
9,Ballet,0.2857142857142857
|
| 12 |
+
10,Roof shingle removal,0.14285714285714285
|
| 13 |
+
11,Hand car wash,0.21739130434782608
|
| 14 |
+
12,Kite flying,0.0
|
| 15 |
+
13,Playing pool,0.3076923076923077
|
| 16 |
+
14,Playing lacrosse,0.06666666666666667
|
| 17 |
+
15,Layup drill in basketball,0.2072072072072072
|
| 18 |
+
16,Home and Garden,0.21025641025641026
|
| 19 |
+
17,Playing beach volleyball,0.23529411764705882
|
| 20 |
+
18,Calf roping,0.2962962962962963
|
| 21 |
+
19,Scuba diving,0.30434782608695654
|
| 22 |
+
20,Mixing drinks,0.38461538461538464
|
| 23 |
+
21,Putting on shoes,0.5
|
| 24 |
+
22,Making a lemonade,0.28125
|
| 25 |
+
23,Uncategorized,0.14285714285714285
|
| 26 |
+
24,Zumba,0.0
|
| 27 |
+
25,Playing badminton,0.06666666666666667
|
| 28 |
+
26,Playing bagpipes,0.1
|
| 29 |
+
27,Food and Entertaining,0.248
|
| 30 |
+
28,Personal Care and Style,0.22535211267605634
|
| 31 |
+
29,Cricket,0.0
|
| 32 |
+
30,Shoveling snow,0.1111111111111111
|
| 33 |
+
31,Ping-pong,0.0
|
| 34 |
+
32,Holidays and Traditions,0.10526315789473684
|
| 35 |
+
33,Ice fishing,0.1968503937007874
|
| 36 |
+
34,Beach soccer,0.25
|
| 37 |
+
35,Table soccer,0.14285714285714285
|
| 38 |
+
36,Swimming,0.21428571428571427
|
| 39 |
+
37,Baton twirling,0.35714285714285715
|
| 40 |
+
38,Javelin throw,0.4
|
| 41 |
+
39,Shot put,0.3333333333333333
|
| 42 |
+
40,Doing crunches,0.26666666666666666
|
| 43 |
+
41,Polishing shoes,0.07142857142857142
|
| 44 |
+
42,Travel,0.08333333333333333
|
| 45 |
+
43,Using uneven bars,0.0
|
| 46 |
+
44,Playing harmonica,0.18556701030927836
|
| 47 |
+
45,Relationships,0.18584070796460178
|
| 48 |
+
46,High jump,0.20512820512820512
|
| 49 |
+
47,Making a sandwich,0.25
|
| 50 |
+
48,Powerbocking,0.35714285714285715
|
| 51 |
+
49,Removing ice from car,0.0
|
| 52 |
+
50,Shaving,0.0
|
| 53 |
+
51,Sharpening knives,0.17391304347826086
|
| 54 |
+
52,Welding,0.25
|
| 55 |
+
53,Using parallel bars,0.19047619047619047
|
| 56 |
+
54,"Home,Categories",0.0
|
| 57 |
+
55,Rock climbing,0.4444444444444444
|
| 58 |
+
56,Snow tubing,0.3888888888888889
|
| 59 |
+
57,Washing face,0.19117647058823528
|
| 60 |
+
58,Assembling bicycle,0.125
|
| 61 |
+
59,Tennis serve with ball bouncing,0.07692307692307693
|
| 62 |
+
60,Shuffleboard,0.38461538461538464
|
| 63 |
+
61,Dodgeball,0.3333333333333333
|
| 64 |
+
62,Capoeira,0.14285714285714285
|
| 65 |
+
63,Paintball,0.2
|
| 66 |
+
64,Doing a powerbomb,0.0
|
| 67 |
+
65,Doing motocross,0.2857142857142857
|
| 68 |
+
66,Playing ice hockey,0.125
|
| 69 |
+
67,Philosophy and Religion,0.20833333333333334
|
| 70 |
+
68,Archery,0.2222222222222222
|
| 71 |
+
69,Cars & Other Vehicles,0.16923076923076924
|
| 72 |
+
70,Running a marathon,0.17857142857142858
|
| 73 |
+
71,Throwing darts,0.0
|
| 74 |
+
72,Painting furniture,0.21052631578947367
|
| 75 |
+
73,Having an ice cream,0.27586206896551724
|
| 76 |
+
74,Slacklining,0.16666666666666666
|
| 77 |
+
75,Camel ride,0.3125
|
| 78 |
+
76,Arm wrestling,0.2
|
| 79 |
+
77,Hula hoop,0.25
|
| 80 |
+
78,Surfing,0.25
|
| 81 |
+
79,Playing piano,0.3333333333333333
|
| 82 |
+
80,Gargling mouthwash,0.2222222222222222
|
| 83 |
+
81,Playing accordion,0.16666666666666666
|
| 84 |
+
82,Horseback riding,0.25
|
| 85 |
+
83,Putting in contact lenses,0.16666666666666666
|
| 86 |
+
84,Playing saxophone,0.4444444444444444
|
| 87 |
+
85,Futsal,0.08
|
| 88 |
+
86,Long jump,0.2857142857142857
|
| 89 |
+
87,Longboarding,0.5454545454545454
|
| 90 |
+
88,Pole vault,0.3333333333333333
|
| 91 |
+
89,Building sandcastles,0.6
|
| 92 |
+
90,Plataform diving,0.07692307692307693
|
| 93 |
+
91,Painting,1.0
|
| 94 |
+
92,Spinning,0.3333333333333333
|
| 95 |
+
93,Carving jack-o-lanterns,0.2222222222222222
|
| 96 |
+
94,Braiding hair,0.16666666666666666
|
| 97 |
+
95,Youth,0.20496894409937888
|
| 98 |
+
96,Playing violin,0.256198347107438
|
| 99 |
+
97,Canoeing,0.14285714285714285
|
| 100 |
+
98,Cheerleading,0.17391304347826086
|
| 101 |
+
99,Pets and Animals,0.24313725490196078
|
| 102 |
+
100,Kayaking,0.0
|
| 103 |
+
101,Cleaning shoes,0.2727272727272727
|
| 104 |
+
102,Knitting,0.0
|
| 105 |
+
103,Baking cookies,0.18181818181818182
|
| 106 |
+
104,Doing fencing,0.25
|
| 107 |
+
105,Playing guitarra,0.2222222222222222
|
| 108 |
+
106,Using the rowing machine,0.3
|
| 109 |
+
107,Getting a haircut,0.125
|
| 110 |
+
108,Mooping floor,0.1111111111111111
|
| 111 |
+
109,River tubing,0.09090909090909091
|
| 112 |
+
110,Cleaning sink,0.0
|
| 113 |
+
111,Grooming dog,0.2727272727272727
|
| 114 |
+
112,Discus throw,0.25
|
| 115 |
+
113,Cleaning windows,0.125
|
| 116 |
+
114,Finance and Business,0.24150943396226415
|
| 117 |
+
115,Hanging wallpaper,0.18181818181818182
|
| 118 |
+
116,Rope skipping,0.38095238095238093
|
| 119 |
+
117,Windsurfing,0.5714285714285714
|
| 120 |
+
118,Kneeling,0.2727272727272727
|
| 121 |
+
119,Getting a piercing,0.2222222222222222
|
| 122 |
+
120,Rock-paper-scissors,0.0
|
| 123 |
+
121,Sports and Fitness,0.2152777777777778
|
| 124 |
+
122,Breakdancing,1.0
|
| 125 |
+
123,Walking the dog,0.125
|
| 126 |
+
124,Playing drums,0.2
|
| 127 |
+
125,Playing water polo,0.26666666666666666
|
| 128 |
+
126,BMX,0.2222222222222222
|
| 129 |
+
127,Smoking a cigarette,0.42857142857142855
|
| 130 |
+
128,Blowing leaves,0.2727272727272727
|
| 131 |
+
129,Bullfighting,0.375
|
| 132 |
+
130,Drinking coffee,0.2
|
| 133 |
+
131,Bathing dog,0.36363636363636365
|
| 134 |
+
132,Tango,0.1111111111111111
|
| 135 |
+
133,Wrapping presents,0.058823529411764705
|
| 136 |
+
134,Plastering,0.14285714285714285
|
| 137 |
+
135,Playing blackjack,0.16666666666666666
|
| 138 |
+
136,Fun sliding down,0.0
|
| 139 |
+
137,Work World,0.20930232558139536
|
| 140 |
+
138,Triple jump,0.18181818181818182
|
| 141 |
+
139,Tumbling,0.2857142857142857
|
| 142 |
+
140,Skiing,0.29411764705882354
|
| 143 |
+
141,Doing kickboxing,0.25
|
| 144 |
+
142,Blow-drying hair,0.1111111111111111
|
| 145 |
+
143,Drum corps,0.25
|
| 146 |
+
144,Smoking hookah,0.2
|
| 147 |
+
145,Mowing the lawn,0.0
|
| 148 |
+
146,Volleyball,0.25
|
| 149 |
+
147,Laying tile,0.25
|
| 150 |
+
148,Starting a campfire,0.35714285714285715
|
| 151 |
+
149,Sumo,0.3333333333333333
|
| 152 |
+
150,Hurling,0.25
|
| 153 |
+
151,Playing kickball,0.0
|
| 154 |
+
152,Making a cake,0.16666666666666666
|
| 155 |
+
153,Fixing the roof,0.0
|
| 156 |
+
154,Playing polo,0.3333333333333333
|
| 157 |
+
155,Removing curlers,0.1111111111111111
|
| 158 |
+
156,Elliptical trainer,0.2
|
| 159 |
+
157,Health,0.20140515222482436
|
| 160 |
+
158,Spread mulch,0.0
|
| 161 |
+
159,Chopping wood,0.4444444444444444
|
| 162 |
+
160,Brushing teeth,0.3333333333333333
|
| 163 |
+
161,Using the pommel horse,0.2
|
| 164 |
+
162,Snatch,0.3333333333333333
|
| 165 |
+
163,Clipping cat claws,0.22727272727272727
|
| 166 |
+
164,Putting on makeup,0.0
|
| 167 |
+
165,Hand washing clothes,0.25
|
| 168 |
+
166,Hitting a pinata,0.25
|
| 169 |
+
167,Tai chi,0.1111111111111111
|
| 170 |
+
168,Getting a tattoo,0.5
|
| 171 |
+
169,Drinking beer,0.18181818181818182
|
| 172 |
+
170,Shaving legs,0.4
|
| 173 |
+
171,Doing karate,0.18181818181818182
|
| 174 |
+
172,Playing rubik cube,0.3333333333333333
|
| 175 |
+
173,Family Life,0.20918367346938777
|
| 176 |
+
174,Rollerblading,0.15384615384615385
|
| 177 |
+
175,Education and Communications,0.22388059701492538
|
| 178 |
+
176,Fixing bicycle,0.16666666666666666
|
| 179 |
+
177,Beer pong,0.3333333333333333
|
| 180 |
+
178,Ironing clothes,0.26666666666666666
|
| 181 |
+
179,Cutting the grass,0.16030534351145037
|
| 182 |
+
180,Raking leaves,0.18181818181818182
|
| 183 |
+
181,Playing squash,0.0
|
| 184 |
+
182,Hopscotch,0.0
|
| 185 |
+
183,Installing carpet,0.36363636363636365
|
| 186 |
+
184,Polishing forniture,0.3333333333333333
|
| 187 |
+
185,Decorating the Christmas tree,0.2857142857142857
|
| 188 |
+
186,Preparing salad,0.6
|
| 189 |
+
187,Preparing pasta,0.16666666666666666
|
| 190 |
+
188,Vacuuming floor,0.2222222222222222
|
| 191 |
+
189,Clean and jerk,0.27007299270072993
|
| 192 |
+
190,Computers and Electronics,0.22026431718061673
|
| 193 |
+
191,Croquet,0.2727272727272727
|
outputs/assets/benchmarks/piqa_overall.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0.23177366702937977
|
outputs/assets/benchmarks/winogrande_overall.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0.26677190213101815
|