Spaces:
Runtime error
Runtime error
File size: 6,310 Bytes
1bcaf5a 76a3bfd 0dda3bd 84010af 76a3bfd e38dcf1 76a3bfd e38dcf1 76a3bfd dd7ade0 76a3bfd fbd3675 0637d24 dbc1b2d 6992c96 f5894fd dbc1b2d cd73003 466c028 cd73003 76a3bfd cd73003 466c028 545a4a4 76a3bfd cd73003 1bcaf5a cd73003 76a3bfd 6992c96 76a3bfd 1301ce8 dd7ade0 c048789 dd7ade0 76a3bfd 1301ce8 abe371d 1bcaf5a 84010af abe371d 76a3bfd 24c2d4a 76a3bfd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import json
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
TASK_TEXT,
SUBMIT_TEMPLATE,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
from src.display.utils import (
BENCHMARK_COLS,
COLS,
EVAL_COLS,
EVAL_TYPES,
AutoEvalColumn,
ModelType,
fields,
WeightType,
Precision
)
from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN
from src.populate import get_evaluation_queue_df, get_leaderboard_df
from src.submission.submit import add_new_eval
import pdb
def restart_space():
API.restart_space(repo_id=REPO_ID)
### Space initialisation
# try:
# print(EVAL_REQUESTS_PATH)
# snapshot_download(
# repo_id=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
# )
# except Exception:
# restart_space()
# try:
# print(EVAL_RESULTS_PATH)
# snapshot_download(
# repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN
# )
# except Exception:
# restart_space()
task = ['Overall', 'Acrostic', 'Crossword', 'Cryptogram', 'Logic_Puzzle', 'Sudoku', 'Drop_Quote']
leaderboard_dict = {}
for t in task:
leaderboard_dict[t] = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS, BENCHMARK_COLS, task=t)
(
finished_eval_queue_df,
running_eval_queue_df,
pending_eval_queue_df,
) = get_evaluation_queue_df(EVAL_REQUESTS_PATH, EVAL_COLS)
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
# pdb.set_trace()
def highlight_max_bold(s):
return ['font-weight: bold' if v == s.max() and v != s.min() else '' for v in s]
num_cols = dataframe.select_dtypes(include=['float']).columns
styler = dataframe.style.format({col: "{:.1f}" for col in num_cols})
styler = styler.apply(highlight_max_bold, subset=num_cols)
return gr.components.Dataframe(
value=styler,
headers=[c.name for c in fields(AutoEvalColumn)],
datatype=[c.type for c in fields(AutoEvalColumn)],
row_count=10,
interactive=False,
column_widths=[180, 60, 80, 80, 80, 80, 60],
)
# return Leaderboard(
# value=dataframe,
# datatype=[c.type for c in fields(AutoEvalColumn)],
# select_columns=SelectColumns(
# default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
# cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
# label="Select Columns to Display:",
# ),
# # search_columns=[AutoEvalColumn.model.name, AutoEvalColumn.license.name],
# # hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
# # filter_columns=[
# # ColumnFilter(AutoEvalColumn.model_type.name, type="checkboxgroup", label="Model types"),
# # ColumnFilter(AutoEvalColumn.precision.name, type="checkboxgroup", label="Precision"),
# # ColumnFilter(
# # AutoEvalColumn.params.name,
# # type="slider",
# # min=0.01,
# # max=150,
# # label="Select the number of parameters (B)",
# # ),
# # ColumnFilter(
# # AutoEvalColumn.still_on_hub.name, type="boolean", label="Deleted/incomplete", default=True
# # ),
# # ],
# # bool_checkboxgroup_label="Hide models",
# interactive=False,
# )
def process_json(file):
""" 读取用户上传的 JSON 文件并返回解析后的数据 """
try:
with open(file.name, 'r', encoding='utf-8') as f:
data = json.load(f)
return json.dumps(data, indent=4, ensure_ascii=False) # 格式化 JSON 以便显示
except Exception as e:
return str(e)
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_id="main-tabs", elem_classes="tab-buttons") as tabs:
with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
# leaderboard = init_leaderboard(LEADERBOARD_DF)
with gr.Tabs():
for i, t in enumerate(task):
with gr.TabItem(t.replace("_", " "), elem_id=f"llm-benchmark-tab-table-{t}", id=i):
if TASK_TEXT.get(t, None):
gr.Markdown(TASK_TEXT[t], elem_classes="markdown-text")
leaderboard = init_leaderboard(leaderboard_dict[t])
# with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=2):
# gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("🚀 Submit here! ", elem_id="llm-benchmark-tab-table", id=3):
with gr.Row():
gr.Markdown("# ✉️✨ Submit your results here!", elem_classes="markdown-text")
gr.Markdown("## Submission Template", elem_classes="markdown-text")
gr.Markdown(SUBMIT_TEMPLATE, elem_classes="markdown-text", height=250)
file_input = gr.File(label="Upload JSON File", file_types=[".json"], height=150)
json_output = gr.JSON(label="Parsed JSON Data") # 输出 JSON 数据
submit_button = gr.Button("Submit")
submit_button.click(fn=process_json, inputs=file_input, outputs=json_output)
with gr.Row():
# gr.Markdown()
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
elem_id="citation-button",
show_copy_button=True,
)
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
demo.queue(default_concurrency_limit=40).launch() |