Add Aquila model series which have gsm8k test set contamination
#21
by
bpHigh
- opened
- README.md +1 -1
- contamination_report.csv +4 -0
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: π
|
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
-
python_version: 3.
|
| 8 |
sdk_version: 4.19.1
|
| 9 |
app_file: app.py
|
| 10 |
app_port: 7860
|
|
|
|
| 4 |
colorFrom: green
|
| 5 |
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
python_version: 3.11
|
| 8 |
sdk_version: 4.19.1
|
| 9 |
app_file: app.py
|
| 10 |
app_port: 7860
|
contamination_report.csv
CHANGED
|
@@ -148,6 +148,8 @@ gigaword;;allenai/c4;;corpus;;;0.15;data-based;https://arxiv.org/abs/2310.20707;
|
|
| 148 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
| 149 |
gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
|
| 150 |
|
|
|
|
|
|
|
| 151 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
| 152 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
| 153 |
|
|
@@ -664,6 +666,8 @@ wmt/wmt16;fr-en;GPT-3;;model;;;14.0;data-based;https://arxiv.org/abs/2005.14165;
|
|
| 664 |
wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
|
| 665 |
wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
|
| 666 |
|
|
|
|
|
|
|
| 667 |
xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
| 668 |
xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
|
| 669 |
xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2
|
|
|
|
| 148 |
gigaword;;oscar-corpus/OSCAR-2301;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
| 149 |
gigaword;;togethercomputer/RedPajama-Data-V2;;corpus;;;2.82;data-based;https://arxiv.org/abs/2310.20707;2
|
| 150 |
|
| 151 |
+
gsm8k;;BAAI/Aquila2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/Aquila2-34B/blob/main/README.md;21
|
| 152 |
+
gsm8k;;BAAI/AquilaChat2-34B;;model;;;100.0;model-based;https://huggingface.co/BAAI/AquilaChat2-34B/blob/main/README.md;21
|
| 153 |
gsm8k;;GPT-4;;model;100.0;;1.0;data-based;https://arxiv.org/abs/2303.08774;11
|
| 154 |
gsm8k;;GPT-4;;model;79.00;;;model-based;https://arxiv.org/abs/2311.06233;8
|
| 155 |
|
|
|
|
| 666 |
wmt/wmt16;ro-en;FLAN;;model;;;12.4;data-based;https://arxiv.org/abs/2109.01652;13
|
| 667 |
wmt/wmt16;ro-en;GPT-3;;model;;;21.0;data-based;https://arxiv.org/abs/2005.14165;13
|
| 668 |
|
| 669 |
+
xlangai/spider;;GPT-3.5;;model;;11.3;;model-based;https://arxiv.org/abs/2402.08100;18
|
| 670 |
+
|
| 671 |
xnli;en;EleutherAI/pile;;corpus;;;0.36;data-based;https://arxiv.org/abs/2310.20707;2
|
| 672 |
xnli;en;allenai/c4;;corpus;;;0.12;data-based;https://arxiv.org/abs/2310.20707;2
|
| 673 |
xnli;en;oscar-corpus/OSCAR-2301;;corpus;;;0.24;data-based;https://arxiv.org/abs/2310.20707;2
|