DOoM-lb / app.py
Anonumous's picture
Update search
16a8b82
import json
import logging
import os
from io import BytesIO
import gradio as gr
import pandas as pd
from gradio_space_ci import enable_space_ci
from src.config import API, DEFAULT_SYSTEM_PROMPT, REPO_ID, RESULTS_REPO
from src.display import INTRODUCTION_TEXT, LEADERBOARD_CSS, LLM_BENCHMARKS_TEXT, THEME_DETECTION_JS
from src.leaderboard import build_leaderboard_df, download_results
from src.plots import create_plot, create_radar_plot
os.environ["GRADIO_ANALYTICS_ENABLED"] = "false"
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
enable_space_ci()
def handle_file_upload(file) -> tuple[dict, str]:
"""Handle JSON file upload and parse contents."""
file_path = file.name.split("/")[-1] if "/" in file.name else file.name
logging.info("File uploaded: %s", file_path)
with open(file.name) as f:
v = json.load(f)
return v, file_path
def submit_file(v: dict, _file_path: str, mn: str, profile: gr.OAuthProfile | None) -> str:
"""
Handle user model evaluation results submission.
Args:
v: Uploaded evaluation results from JSON
_file_path: Path to uploaded file
mn: Model name specified by user
profile: HuggingFace user profile
Returns:
Success or error message
"""
if profile is None:
return "Hub Login Required: Войдите в HuggingFace, чтобы загрузить результаты"
try:
required_fields = ["score", "math_score", "physics_score"]
for field in required_fields:
if field not in v:
return f"Error: Отсутствует обязательное поле '{field}' в JSON файле"
new_file = {
"model_name": profile.username + "/" + mn,
"score": float(v["score"]),
"math_score": float(v["math_score"]),
"physics_score": float(v["physics_score"]),
"total_tokens": int(v.get("total_tokens", 0)),
"evaluation_time": float(v.get("evaluation_time", 0.0)),
"system_prompt": v.get("system_prompt", DEFAULT_SYSTEM_PROMPT),
}
if not (
0 <= new_file["score"] <= 1 and 0 <= new_file["math_score"] <= 1 and 0 <= new_file["physics_score"] <= 1
):
return "Error: Все значения оценок должны быть в диапазоне от 0 до 1"
safe_filename = profile.username + "_" + mn.replace("/", "_").replace(" ", "_") + ".json"
buf = BytesIO()
buf.write(json.dumps(new_file, ensure_ascii=False).encode("utf-8"))
API.upload_file(
path_or_fileobj=buf.getvalue(),
path_in_repo="model_data/" + safe_filename,
repo_id=RESULTS_REPO,
repo_type="dataset",
)
local_path = f"./m_data/model_data/{safe_filename}"
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, "w", encoding="utf-8") as f:
json.dump(new_file, f, ensure_ascii=False, indent=2)
logging.info(f"Successfully uploaded and cached: {new_file['model_name']}")
try:
leaderboard_df = build_leaderboard_df()
leaderboard_data = leaderboard_df.to_dict("records")
leaderboard_buf = BytesIO()
leaderboard_buf.write(json.dumps(leaderboard_data, ensure_ascii=False, indent=2).encode("utf-8"))
API.upload_file(
path_or_fileobj=leaderboard_buf.getvalue(),
path_in_repo="leaderboard.json",
repo_id=RESULTS_REPO,
repo_type="dataset",
)
logging.info("Successfully updated leaderboard.json backup")
except Exception as e:
logging.warning(f"Failed to update leaderboard.json backup: {e}")
return f"Success! Результаты модели '{mn}' успешно отправлены в лидерборд DeathMath."
except Exception as e:
logging.error(f"Error submitting file: {e}")
return f"Error: Произошла ошибка при отправке файла: {str(e)}"
def search_models(df: pd.DataFrame, search_text: str) -> pd.DataFrame:
"""
Filter dataframe by model name search.
Args:
df: Leaderboard dataframe
search_text: Search query
Returns:
Filtered dataframe
"""
if not search_text or search_text.strip() == "":
return df
mask = df["model"].str.contains(search_text, case=False, na=False)
return df[mask]
def update_leaderboard(search_text: str) -> pd.DataFrame:
"""
Update leaderboard table based on search query.
Args:
search_text: Search query
Returns:
Filtered dataframe
"""
df = build_leaderboard_df()
return search_models(df, search_text)
def update_plot(selected_models: list[str]):
"""Update bar chart with selected models."""
return create_plot(selected_models)
def restart_space() -> None:
"""Restart HuggingFace Space and reload data."""
API.restart_space(repo_id=REPO_ID)
download_results()
def build_demo() -> gr.Blocks:
"""Build DeathMath leaderboard interface."""
download_results()
demo = gr.Blocks(
title="DeathMath Leaderboard",
css=LEADERBOARD_CSS,
theme=gr.themes.Soft(),
)
leaderboard_df = build_leaderboard_df()
with demo:
gr.HTML(THEME_DETECTION_JS)
with gr.Column(elem_classes=["leaderboard-content"]):
with gr.Column(elem_classes=["leaderboard-header"]):
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons"):
with gr.TabItem("🏅 Лидерборд", elem_id="llm-benchmark-tab-table", id=0):
gr.Markdown("### Таблица результатов моделей DeathMath", elem_classes=["centered-title"])
search_box = gr.Textbox(
placeholder="🔍 Поиск по названию модели...",
show_label=False,
elem_classes=["search-box-centered"],
container=False,
)
all_cols = leaderboard_df.columns.tolist()
column_datatypes = []
for col in all_cols:
if col == "model":
column_datatypes.append("markdown")
elif col == "system_prompt":
column_datatypes.append("str")
else:
column_datatypes.append("number")
column_widths = ["240px"] + ["85px"] * (len(all_cols) - 2) + ["200px"]
leaderboard_table = gr.Dataframe(
value=leaderboard_df,
headers=all_cols,
datatype=column_datatypes,
max_height="75vh",
wrap=True,
interactive=False,
column_widths=column_widths,
elem_id="leaderboard-table",
row_count=(len(leaderboard_df), "fixed"),
col_count=(len(all_cols), "fixed"),
)
search_box.change(fn=update_leaderboard, inputs=[search_box], outputs=[leaderboard_table])
with gr.TabItem("🚀 Отправить результаты", elem_id="submit-tab", id=1):
with gr.Row():
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.Column():
model_name_textbox = gr.Textbox(label="Название модели")
file_output = gr.File(label="Перетащите JSON файл с результатами сюда", type="filepath")
uploaded_file = gr.State()
file_path = gr.State()
with gr.Row():
with gr.Column():
out = gr.Textbox("Здесь будет показан статус отправки")
with gr.Column():
_login_button = gr.LoginButton(elem_id="oauth-button")
submit_button = gr.Button("Отправить результаты", elem_id="submit_button", variant="primary")
file_output.upload(handle_file_upload, file_output, [uploaded_file, file_path])
submit_button.click(submit_file, [uploaded_file, file_path, model_name_textbox], [out]).then(
fn=lambda: build_leaderboard_df(), inputs=[], outputs=[leaderboard_table]
)
with gr.TabItem("📊 Аналитика", elem_id="analytics-tab", id=2), gr.Column():
if len(leaderboard_df) > 0:
model_dropdown = gr.Dropdown(
choices=leaderboard_df["model"].tolist(),
label="Модели",
value=leaderboard_df["model"].tolist()[:5]
if len(leaderboard_df) >= 5
else leaderboard_df["model"].tolist(),
multiselect=True,
info="Выберите модели для сравнения",
)
else:
model_dropdown = gr.Dropdown(
choices=["example/model-1", "example/model-2"],
label="Модели",
value=["example/model-1", "example/model-2"],
multiselect=True,
info="Выберите модели для сравнения",
)
with gr.Tabs():
with gr.TabItem("Столбчатая диаграмма"):
bar_plot = gr.Plot(create_plot(model_dropdown.value))
with gr.TabItem("Радарная диаграмма"):
radar_plot = gr.Plot(create_radar_plot(model_dropdown.value))
model_dropdown.change(fn=create_plot, inputs=[model_dropdown], outputs=[bar_plot])
model_dropdown.change(fn=create_radar_plot, inputs=[model_dropdown], outputs=[radar_plot])
return demo
if __name__ == "__main__":
demo_app = build_demo()
demo_app.launch()