Update app.py
Browse files
app.py
CHANGED
|
@@ -284,7 +284,7 @@ By [Princeton Language and Intelligence (PLI), Princeton University](https://pli
|
|
| 284 |
|
| 285 |
### This is a demonstration of the Skill-Mix evaluation.
|
| 286 |
|
| 287 |
-
Paper link: [
|
| 288 |
|
| 289 |
### Samples are generated using 10% of the full set of skills and topics. Click the second tab for comparison between two generations.
|
| 290 |
|
|
@@ -427,9 +427,32 @@ Coming soon: generation by more models; grading by LLaMA-2.
|
|
| 427 |
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2]).then(fn_list[3], input_list[3], output_list[3]).then(fn_list[4], input_list[4], output_list[4]).then(fn_list[5], input_list[5], output_list[5])
|
| 428 |
else:
|
| 429 |
raise NotImplementedError
|
| 430 |
-
gr.Markdown('''
|
| 431 |
```
|
| 432 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
```
|
| 434 |
''')
|
| 435 |
return demo
|
|
|
|
| 284 |
|
| 285 |
### This is a demonstration of the Skill-Mix evaluation.
|
| 286 |
|
| 287 |
+
Paper link: [https://arxiv.org/abs/2310.17567](https://arxiv.org/abs/2310.17567)
|
| 288 |
|
| 289 |
### Samples are generated using 10% of the full set of skills and topics. Click the second tab for comparison between two generations.
|
| 290 |
|
|
|
|
| 427 |
c.change(fn_list[0], input_list[0], output_list[0]).then(fn_list[1], input_list[1], output_list[1]).then(fn_list[2], input_list[2], output_list[2]).then(fn_list[3], input_list[3], output_list[3]).then(fn_list[4], input_list[4], output_list[4]).then(fn_list[5], input_list[5], output_list[5])
|
| 428 |
else:
|
| 429 |
raise NotImplementedError
|
| 430 |
+
gr.Markdown('''Please consider citing
|
| 431 |
```
|
| 432 |
+
@article{yu2023skillmix,
|
| 433 |
+
title={Skill-Mix: a Flexible and Expandable Family of Evaluations for AI models},
|
| 434 |
+
author={Yu, Dingli and Kaur, Simran and Gupta, Arushi and Brown-Cohen, Jonah and Goyal, Anirudh and Arora, Sanjeev},
|
| 435 |
+
journal={arXiv preprint arXiv:2310.17567},
|
| 436 |
+
year={2023}
|
| 437 |
+
}
|
| 438 |
+
```
|
| 439 |
+
```
|
| 440 |
+
@misc{openai2023gpt4,
|
| 441 |
+
title={GPT-4 Technical Report},
|
| 442 |
+
author={OpenAI},
|
| 443 |
+
year={2023},
|
| 444 |
+
eprint={2303.08774},
|
| 445 |
+
archivePrefix={arXiv},
|
| 446 |
+
primaryClass={cs.CL}
|
| 447 |
+
}
|
| 448 |
+
```
|
| 449 |
+
```
|
| 450 |
+
@article{touvron2023llama,
|
| 451 |
+
title={Llama 2: Open foundation and fine-tuned chat models},
|
| 452 |
+
author={Touvron, Hugo and Martin, Louis and Stone, Kevin and Albert, Peter and Almahairi, Amjad and Babaei, Yasmine and Bashlykov, Nikolay and Batra, Soumya and Bhargava, Prajjwal and Bhosale, Shruti and others},
|
| 453 |
+
journal={arXiv preprint arXiv:2307.09288},
|
| 454 |
+
year={2023}
|
| 455 |
+
}
|
| 456 |
```
|
| 457 |
''')
|
| 458 |
return demo
|