Update app.py
Browse files
app.py
CHANGED
|
@@ -280,10 +280,17 @@ def build_demo(df):
|
|
| 280 |
gr.Markdown(
|
| 281 |
"""
|
| 282 |
# Skill-Mix: a Flexible and Expandable Family of Evaluations for AI models
|
|
|
|
| 283 |
|
| 284 |
-
|
| 285 |
|
| 286 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 287 |
"""
|
| 288 |
)
|
| 289 |
|
|
|
|
| 280 |
gr.Markdown(
|
| 281 |
"""
|
| 282 |
# Skill-Mix: a Flexible and Expandable Family of Evaluations for AI models
|
| 283 |
+
Dingli Yu* Simran Kaur* Arushi Gupta* Jonah Brown-Cohen† Anirudh Goyal† Sanjeev Arora*
|
| 284 |
|
| 285 |
+
* Princeton Language and Intelligence (PLI), Princeton University
|
| 286 |
|
| 287 |
+
† Google DeepMind
|
| 288 |
+
|
| 289 |
+
## This is a demonstration of the Skill-Mix evaluation.
|
| 290 |
+
|
| 291 |
+
## Samples are generated using 10% of the full set of skills and topics.
|
| 292 |
+
|
| 293 |
+
## Coming soon: generation by models other than the GPT family and LLaMA-2 family; grading by LLaMA-2.
|
| 294 |
"""
|
| 295 |
)
|
| 296 |
|