test
#22
by
davidstap
- opened
- README.md +1 -1
- contamination_report.csv +0 -37
- postprocessing.py +0 -4
README.md
CHANGED
|
@@ -5,7 +5,7 @@ colorFrom: green
|
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
python_version: 3.11
|
| 8 |
-
sdk_version: 4.
|
| 9 |
app_file: app.py
|
| 10 |
app_port: 7860
|
| 11 |
fullWidth: true
|
|
|
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
python_version: 3.11
|
| 8 |
+
sdk_version: 4.19.1
|
| 9 |
app_file: app.py
|
| 10 |
app_port: 7860
|
| 11 |
fullWidth: true
|
contamination_report.csv
CHANGED
|
@@ -6,9 +6,6 @@ Anagrams 1;;GPT-3;;model;;3.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
|
| 6 |
|
| 7 |
Anagrams 2;;GPT-3;;model;;7.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
| 8 |
|
| 9 |
-
CodeForces2305;;GPT-3.5-turbo;0613;model;;;0.0;model-based;https://arxiv.org/abs/2402.15938;28
|
| 10 |
-
CodeForces2305;;GPT-3.5-turbo;1106;model;;;0.0;model-based;https://arxiv.org/abs/2402.15938;28
|
| 11 |
-
|
| 12 |
Cycled Letters;;GPT-3;;model;;1.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
| 13 |
|
| 14 |
EdinburghNLP/xsum;;GPT-3.5;;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
|
|
@@ -20,9 +17,6 @@ EdinburghNLP/xsum;;allenai/c4;;corpus;;;15.49;data-based;https://arxiv.org/abs/2
|
|
| 20 |
|
| 21 |
EleutherAI/hendrycks_math;;GPT-4;;model;100.0;;;data-based;https://arxiv.org/abs/2303.08774;11
|
| 22 |
|
| 23 |
-
HumanEval_R;;GPT-3.5-turbo;0613;model;;;9.76;model-based;https://arxiv.org/abs/2402.15938;28
|
| 24 |
-
HumanEval_R;;GPT-3.5-turbo;1106;model;;;10.97;model-based;https://arxiv.org/abs/2402.15938;28
|
| 25 |
-
|
| 26 |
RadNLI;;GPT-3.5;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
| 27 |
RadNLI;;GPT-4;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
| 28 |
|
|
@@ -149,20 +143,6 @@ facebook/anli;test_r2;GPT-3;;model;;;18.0;data-based;https://arxiv.org/abs/2005.
|
|
| 149 |
|
| 150 |
facebook/anli;test_r3;GPT-3;;model;;;16.0;data-based;https://arxiv.org/abs/2005.14165;13
|
| 151 |
|
| 152 |
-
facebook/flores;;Claude 3 Opus;;model;;100.0;;model-based;https://arxiv.org/abs/2404.13813;29
|
| 153 |
-
facebook/flores;;bigscience/bloomz;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 154 |
-
facebook/flores;;bigscience/bloomz-1b1;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 155 |
-
facebook/flores;;bigscience/bloomz-1b7;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 156 |
-
facebook/flores;;bigscience/bloomz-3b;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 157 |
-
facebook/flores;;bigscience/bloomz-560m;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 158 |
-
facebook/flores;;bigscience/bloomz-7b1;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 159 |
-
facebook/flores;;bigscience/mt0-base;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 160 |
-
facebook/flores;;bigscience/mt0-large;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 161 |
-
facebook/flores;;bigscience/mt0-small;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 162 |
-
facebook/flores;;bigscience/mt0-xl;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 163 |
-
facebook/flores;;bigscience/mt0-xxl;;model;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 164 |
-
facebook/flores;;bigscience/xP3;;corpus;;100.0;;data-based;https://aclanthology.org/2023.acl-long.891/;20
|
| 165 |
-
|
| 166 |
gigaword;;EleutherAI/pile;;corpus;;;1.18;data-based;https://arxiv.org/abs/2310.20707;2
|
| 167 |
gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;2
|
| 168 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
|
@@ -170,13 +150,8 @@ gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://a
|
|
| 170 |
|
| 171 |
gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
|
| 172 |
gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
|
| 173 |
-
gsm8k;;EleutherAI/llemma_34b;;model;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
| 174 |
-
gsm8k;;EleutherAI/llemma_7b;;model;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
| 175 |
-
gsm8k;;EleutherAI/proof-pile-2;;corpus;;;0.15;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
| 176 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
| 177 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
| 178 |
-
gsm8k;;Qwen/Qwen-14B;;model;0.5;;;model-based;https://arxiv.org/abs/2404.18824;27
|
| 179 |
-
gsm8k;;Qwen/Qwen-1_8B;;model;12.8;;0.075;model-based;https://arxiv.org/abs/2404.18824;27
|
| 180 |
|
| 181 |
head_qa;en;EleutherAI/pile;;corpus;;;5.11;data-based;https://arxiv.org/abs/2310.20707;2
|
| 182 |
head_qa;en;allenai/c4;;corpus;;;5.22;data-based;https://arxiv.org/abs/2310.20707;2
|
|
@@ -188,18 +163,6 @@ health_fact;;allenai/c4;;corpus;;;7.53;data-based;https://arxiv.org/abs/2310.207
|
|
| 188 |
health_fact;;oscar-corpus/OSCAR-2301;;corpus;;;3.4;data-based;https://arxiv.org/abs/2310.20707;2
|
| 189 |
health_fact;;togethercomputer/RedPajama-Data-V2;;corpus;;;18.7;data-based;https://arxiv.org/abs/2310.20707;2
|
| 190 |
|
| 191 |
-
hendrycks/competition_math;;BAAI/Aquila2-34B;;model;3.366;;1.166;model-based;https://arxiv.org/abs/2404.18824;27
|
| 192 |
-
hendrycks/competition_math;;BAAI/Aquila2-7B;;model;1;;0.133;model-based;https://arxiv.org/abs/2404.18824;27
|
| 193 |
-
hendrycks/competition_math;;EleutherAI/llemma_34b;;model;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
| 194 |
-
hendrycks/competition_math;;EleutherAI/llemma_7b;;model;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
| 195 |
-
hendrycks/competition_math;;EleutherAI/proof-pile-2;;corpus;;;7.72;data-based;https://openreview.net/forum?id=4WnqRR915j;23
|
| 196 |
-
hendrycks/competition_math;;Qwen/Qwen-14B;;model;1.766;;1.6;model-based;https://arxiv.org/abs/2404.18824;27
|
| 197 |
-
hendrycks/competition_math;;Qwen/Qwen-1_8B;;model;4.533;;1.70;model-based;https://arxiv.org/abs/2404.18824;27
|
| 198 |
-
hendrycks/competition_math;;Qwen/Qwen-7B;;model;1.266;;0.766;model-based;https://arxiv.org/abs/2404.18824;27
|
| 199 |
-
hendrycks/competition_math;;THUDM/chatglm3-6b;;model;0.70;;0.4;model-based;https://arxiv.org/abs/2404.18824;27
|
| 200 |
-
hendrycks/competition_math;;internlm/internlm2-20b;;model;4.733;;0.666;model-based;https://arxiv.org/abs/2404.18824;27
|
| 201 |
-
hendrycks/competition_math;;internlm/internlm2-7b;;model;3.033;;0.433;model-based;https://arxiv.org/abs/2404.18824;27
|
| 202 |
-
|
| 203 |
hlgd;;EleutherAI/pile;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
| 204 |
hlgd;;allenai/c4;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
| 205 |
hlgd;;oscar-corpus/OSCAR-2301;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
|
| 6 |
|
| 7 |
Anagrams 2;;GPT-3;;model;;7.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
| 8 |
|
|
|
|
|
|
|
|
|
|
| 9 |
Cycled Letters;;GPT-3;;model;;1.0;;data-based;https://arxiv.org/abs/2005.14165;13
|
| 10 |
|
| 11 |
EdinburghNLP/xsum;;GPT-3.5;;model;0.0;;100.0;model-based;https://arxiv.org/abs/2308.08493;3
|
|
|
|
| 17 |
|
| 18 |
EleutherAI/hendrycks_math;;GPT-4;;model;100.0;;;data-based;https://arxiv.org/abs/2303.08774;11
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
RadNLI;;GPT-3.5;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
| 21 |
RadNLI;;GPT-4;;model;0.0;0.0;0.0;model-based;https://arxiv.org/abs/2308.08493;8
|
| 22 |
|
|
|
|
| 143 |
|
| 144 |
facebook/anli;test_r3;GPT-3;;model;;;16.0;data-based;https://arxiv.org/abs/2005.14165;13
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
gigaword;;EleutherAI/pile;;corpus;;;1.18;data-based;https://arxiv.org/abs/2310.20707;2
|
| 147 |
gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;2
|
| 148 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
|
| 150 |
|
| 151 |
gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
|
| 152 |
gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
|
|
|
|
|
|
|
|
|
|
| 153 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
| 154 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
|
|
|
|
|
|
| 155 |
|
| 156 |
head_qa;en;EleutherAI/pile;;corpus;;;5.11;data-based;https://arxiv.org/abs/2310.20707;2
|
| 157 |
head_qa;en;allenai/c4;;corpus;;;5.22;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
|
| 163 |
health_fact;;oscar-corpus/OSCAR-2301;;corpus;;;3.4;data-based;https://arxiv.org/abs/2310.20707;2
|
| 164 |
health_fact;;togethercomputer/RedPajama-Data-V2;;corpus;;;18.7;data-based;https://arxiv.org/abs/2310.20707;2
|
| 165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
hlgd;;EleutherAI/pile;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
| 167 |
hlgd;;allenai/c4;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
| 168 |
hlgd;;oscar-corpus/OSCAR-2301;;corpus;;;0.0;data-based;https://arxiv.org/abs/2310.20707;2
|
postprocessing.py
CHANGED
|
@@ -17,9 +17,6 @@ def remove_duplicates(data):
|
|
| 17 |
def fix_arxiv_links(data):
|
| 18 |
return [[*item[:-2], item[-2].replace("arxiv.org/pdf", "arxiv.org/abs"), item[-1]] for item in data]
|
| 19 |
|
| 20 |
-
def fix_openreview_links(data):
|
| 21 |
-
return [[*item[:-2], item[-2].replace("openreview.net/pdf", "openreview.net/forum"), item[-1]] for item in data]
|
| 22 |
-
|
| 23 |
def sort_data(data):
|
| 24 |
return sorted(data, key=lambda x: (x[0], x[1], x[2], x[3], x[-1]))
|
| 25 |
|
|
@@ -28,7 +25,6 @@ def main():
|
|
| 28 |
data = sort_data(data)
|
| 29 |
data = remove_duplicates(data)
|
| 30 |
data = fix_arxiv_links(data)
|
| 31 |
-
data = fix_openreview_links(data)
|
| 32 |
print("Total datapoints:", len(data))
|
| 33 |
|
| 34 |
with open("contamination_report.csv", 'w') as f:
|
|
|
|
| 17 |
def fix_arxiv_links(data):
|
| 18 |
return [[*item[:-2], item[-2].replace("arxiv.org/pdf", "arxiv.org/abs"), item[-1]] for item in data]
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
def sort_data(data):
|
| 21 |
return sorted(data, key=lambda x: (x[0], x[1], x[2], x[3], x[-1]))
|
| 22 |
|
|
|
|
| 25 |
data = sort_data(data)
|
| 26 |
data = remove_duplicates(data)
|
| 27 |
data = fix_arxiv_links(data)
|
|
|
|
| 28 |
print("Total datapoints:", len(data))
|
| 29 |
|
| 30 |
with open("contamination_report.csv", 'w') as f:
|