Spaces:
Sleeping
Sleeping
vtrv.vls
commited on
Commit
Β·
dd9842f
1
Parent(s):
d4e0b1a
Added header and banner
Browse files- about.md +10 -0
- app.py +12 -8
- constants.py +2 -2
- header.md +4 -0
- resources/MERA.png +0 -0
- test.md +0 -1
about.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<br/>
|
| 2 |
+
|
| 3 |
+
## We propose a new methodology for the evaluation of fundamental models:
|
| 4 |
+
|
| 5 |
+
<br/>
|
| 6 |
+
21 challenging tasks for fundamental models covering issues related to world knowledge, logic, cause-and-effect relationships, AI ethics, and much more.
|
| 7 |
+
|
| 8 |
+
We have developed an open instructional benchmark for evaluating large language models for the Russian language. A unified leaderboard on the website includes fixed, verified expert tasks and standardized configurations of prompts and parameters.
|
| 9 |
+
|
| 10 |
+
The project has been supported by the AI Alliance, leading industrial players, and academic partners engaged in language model research.
|
app.py
CHANGED
|
@@ -8,13 +8,14 @@ from random import choice
|
|
| 8 |
|
| 9 |
import queue
|
| 10 |
|
| 11 |
-
from constants import css, js_code, js_light
|
| 12 |
from utils import model_response, clear_chat
|
| 13 |
from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama
|
| 14 |
|
| 15 |
INIT_MODELS = dict()
|
| 16 |
S3_SESSION = None
|
| 17 |
-
|
|
|
|
| 18 |
CURRENT_MODELS = queue.LifoQueue()
|
| 19 |
MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
|
| 20 |
GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
|
|
@@ -258,10 +259,10 @@ def tab_offline_arena():
|
|
| 258 |
|
| 259 |
def build_demo():
|
| 260 |
with gradio.Blocks(theme=gradio.themes.Soft(text_size=gradio.themes.sizes.text_lg), css=css) as demo: # , css=css, js=js_light
|
| 261 |
-
|
|
|
|
| 262 |
with gradio.Tabs() as tabs:
|
| 263 |
with gradio.TabItem("πΌ MERA leaderboard", id=0):
|
| 264 |
-
gradio.Markdown(TEST_MD)
|
| 265 |
tab_leaderboard()
|
| 266 |
|
| 267 |
with gradio.TabItem("π SBS by categories and criteria", id=1):
|
|
@@ -272,7 +273,7 @@ def build_demo():
|
|
| 272 |
# _tab_explore()
|
| 273 |
|
| 274 |
with gradio.TabItem("πͺ About MERA", id=3):
|
| 275 |
-
gradio.Markdown(
|
| 276 |
return demo
|
| 277 |
|
| 278 |
if __name__ == "__main__":
|
|
@@ -283,8 +284,11 @@ if __name__ == "__main__":
|
|
| 283 |
# data_load(args.result_file)
|
| 284 |
# TYPES = ["number", "markdown", "number"]
|
| 285 |
|
| 286 |
-
with open("
|
| 287 |
-
|
|
|
|
|
|
|
|
|
|
| 288 |
|
| 289 |
try:
|
| 290 |
session = boto3.session.Session()
|
|
@@ -298,7 +302,7 @@ if __name__ == "__main__":
|
|
| 298 |
print('Failed to start s3 session')
|
| 299 |
|
| 300 |
app = build_demo()
|
| 301 |
-
app.launch(share=args.share, height=3000, width="110%") # share=args.share
|
| 302 |
|
| 303 |
# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
|
| 304 |
# demo.launch()
|
|
|
|
| 8 |
|
| 9 |
import queue
|
| 10 |
|
| 11 |
+
from constants import css, js_code, js_light, BANNER
|
| 12 |
from utils import model_response, clear_chat
|
| 13 |
from models import get_tinyllama, get_qwen2ins1b, GigaChat, response_gigachat, response_qwen2ins1b, response_tinyllama
|
| 14 |
|
| 15 |
INIT_MODELS = dict()
|
| 16 |
S3_SESSION = None
|
| 17 |
+
HEADER_MD = None
|
| 18 |
+
ABOUT_MD = None
|
| 19 |
CURRENT_MODELS = queue.LifoQueue()
|
| 20 |
MODEL_LIB = {'TINYLLAMA': get_tinyllama, "QWEN2INS1B": get_qwen2ins1b, "RUBASE": GigaChat.get_giga}
|
| 21 |
GEN_LIB = {'TINYLLAMA': response_tinyllama, "QWEN2INS1B": response_qwen2ins1b, "RUBASE": response_gigachat}
|
|
|
|
| 259 |
|
| 260 |
def build_demo():
|
| 261 |
with gradio.Blocks(theme=gradio.themes.Soft(text_size=gradio.themes.sizes.text_lg), css=css) as demo: # , css=css, js=js_light
|
| 262 |
+
gradio.HTML(BANNER, elem_id="banner")
|
| 263 |
+
gradio.Markdown(HEADER_MD)
|
| 264 |
with gradio.Tabs() as tabs:
|
| 265 |
with gradio.TabItem("πΌ MERA leaderboard", id=0):
|
|
|
|
| 266 |
tab_leaderboard()
|
| 267 |
|
| 268 |
with gradio.TabItem("π SBS by categories and criteria", id=1):
|
|
|
|
| 273 |
# _tab_explore()
|
| 274 |
|
| 275 |
with gradio.TabItem("πͺ About MERA", id=3):
|
| 276 |
+
gradio.Markdown(ABOUT_MD)
|
| 277 |
return demo
|
| 278 |
|
| 279 |
if __name__ == "__main__":
|
|
|
|
| 284 |
# data_load(args.result_file)
|
| 285 |
# TYPES = ["number", "markdown", "number"]
|
| 286 |
|
| 287 |
+
with open("header.md", "r") as f:
|
| 288 |
+
HEADER_MD = f.read()
|
| 289 |
+
|
| 290 |
+
with open("about.md", "r") as f:
|
| 291 |
+
ABOUT_MD = f.read()
|
| 292 |
|
| 293 |
try:
|
| 294 |
session = boto3.session.Session()
|
|
|
|
| 302 |
print('Failed to start s3 session')
|
| 303 |
|
| 304 |
app = build_demo()
|
| 305 |
+
app.launch(share=args.share, height=3000, width="110%", allowed_paths=["."]) # share=args.share
|
| 306 |
|
| 307 |
# demo = gradio.Interface(fn=gen, inputs="text", outputs="text")
|
| 308 |
# demo.launch()
|
constants.py
CHANGED
|
@@ -4,8 +4,8 @@ from collections import OrderedDict
|
|
| 4 |
# DEFAULT_K = "β"
|
| 5 |
DEFAULT_K = "1500"
|
| 6 |
|
| 7 |
-
|
| 8 |
-
BANNER = f'<div style="display: flex; justify-content: flex-start;"><img src="{
|
| 9 |
|
| 10 |
TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> π¦ AI2 WildBench Leaderboard </b> </body> </html>"
|
| 11 |
|
|
|
|
| 4 |
# DEFAULT_K = "β"
|
| 5 |
DEFAULT_K = "1500"
|
| 6 |
|
| 7 |
+
banner_path = "file/resources/MERA.png" # the same repo here.
|
| 8 |
+
BANNER = f'<div style="display: flex; justify-content: flex-start;"><img src="{banner_path}" alt="MERA" style="width: 20vw; min-width: 150px; max-width: 400px;"> </div>'
|
| 9 |
|
| 10 |
TITLE = "<html> <head> <style> h1 {text-align: center;} </style> </head> <body> <h1> π¦ AI2 WildBench Leaderboard </b> </body> </html>"
|
| 11 |
|
header.md
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<br/>
|
| 2 |
+
|
| 3 |
+
# MERA: Multimodal Evaluation for Russian-language Architectures
|
| 4 |
+
[GitHub](https://github.com/ai-forever/MERA) [HFDatasets](https://huggingface.co/datasets/ai-forever/MERA) [Paper](https://arxiv.org/abs/2401.04531) [Info](https://mera.a-ai.ru/en) Version: V1
|
resources/MERA.png
ADDED
|
test.md
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
## TEST
|
|
|
|
|
|