Spaces:
Running
Running
| import json | |
| import logging | |
| import os | |
| from io import BytesIO | |
| import gradio as gr | |
| import pandas as pd | |
| from gradio_space_ci import enable_space_ci | |
| from src.config import API, DEFAULT_SYSTEM_PROMPT, REPO_ID, RESULTS_REPO | |
| from src.display import INTRODUCTION_TEXT, LEADERBOARD_CSS, LLM_BENCHMARKS_TEXT, THEME_DETECTION_JS | |
| from src.leaderboard import build_leaderboard_df, download_results | |
| from src.plots import create_plot, create_radar_plot | |
| os.environ["GRADIO_ANALYTICS_ENABLED"] = "false" | |
| logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") | |
| enable_space_ci() | |
| def handle_file_upload(file) -> tuple[dict, str]: | |
| """Handle JSON file upload and parse contents.""" | |
| file_path = file.name.split("/")[-1] if "/" in file.name else file.name | |
| logging.info("File uploaded: %s", file_path) | |
| with open(file.name) as f: | |
| v = json.load(f) | |
| return v, file_path | |
| def submit_file(v: dict, _file_path: str, mn: str, profile: gr.OAuthProfile | None) -> str: | |
| """ | |
| Handle user model evaluation results submission. | |
| Args: | |
| v: Uploaded evaluation results from JSON | |
| _file_path: Path to uploaded file | |
| mn: Model name specified by user | |
| profile: HuggingFace user profile | |
| Returns: | |
| Success or error message | |
| """ | |
| if profile is None: | |
| return "Hub Login Required: Войдите в HuggingFace, чтобы загрузить результаты" | |
| try: | |
| required_fields = ["score", "math_score", "physics_score"] | |
| for field in required_fields: | |
| if field not in v: | |
| return f"Error: Отсутствует обязательное поле '{field}' в JSON файле" | |
| new_file = { | |
| "model_name": profile.username + "/" + mn, | |
| "score": float(v["score"]), | |
| "math_score": float(v["math_score"]), | |
| "physics_score": float(v["physics_score"]), | |
| "total_tokens": int(v.get("total_tokens", 0)), | |
| "evaluation_time": float(v.get("evaluation_time", 0.0)), | |
| "system_prompt": v.get("system_prompt", DEFAULT_SYSTEM_PROMPT), | |
| } | |
| if not ( | |
| 0 <= new_file["score"] <= 1 and 0 <= new_file["math_score"] <= 1 and 0 <= new_file["physics_score"] <= 1 | |
| ): | |
| return "Error: Все значения оценок должны быть в диапазоне от 0 до 1" | |
| safe_filename = profile.username + "_" + mn.replace("/", "_").replace(" ", "_") + ".json" | |
| buf = BytesIO() | |
| buf.write(json.dumps(new_file, ensure_ascii=False).encode("utf-8")) | |
| API.upload_file( | |
| path_or_fileobj=buf.getvalue(), | |
| path_in_repo="model_data/" + safe_filename, | |
| repo_id=RESULTS_REPO, | |
| repo_type="dataset", | |
| ) | |
| local_path = f"./m_data/model_data/{safe_filename}" | |
| os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
| with open(local_path, "w", encoding="utf-8") as f: | |
| json.dump(new_file, f, ensure_ascii=False, indent=2) | |
| logging.info(f"Successfully uploaded and cached: {new_file['model_name']}") | |
| try: | |
| leaderboard_df = build_leaderboard_df() | |
| leaderboard_data = leaderboard_df.to_dict("records") | |
| leaderboard_buf = BytesIO() | |
| leaderboard_buf.write(json.dumps(leaderboard_data, ensure_ascii=False, indent=2).encode("utf-8")) | |
| API.upload_file( | |
| path_or_fileobj=leaderboard_buf.getvalue(), | |
| path_in_repo="leaderboard.json", | |
| repo_id=RESULTS_REPO, | |
| repo_type="dataset", | |
| ) | |
| logging.info("Successfully updated leaderboard.json backup") | |
| except Exception as e: | |
| logging.warning(f"Failed to update leaderboard.json backup: {e}") | |
| return f"Success! Результаты модели '{mn}' успешно отправлены в лидерборд DeathMath." | |
| except Exception as e: | |
| logging.error(f"Error submitting file: {e}") | |
| return f"Error: Произошла ошибка при отправке файла: {str(e)}" | |
| def search_models(df: pd.DataFrame, search_text: str) -> pd.DataFrame: | |
| """ | |
| Filter dataframe by model name search. | |
| Args: | |
| df: Leaderboard dataframe | |
| search_text: Search query | |
| Returns: | |
| Filtered dataframe | |
| """ | |
| if not search_text or search_text.strip() == "": | |
| return df | |
| mask = df["model"].str.contains(search_text, case=False, na=False) | |
| return df[mask] | |
| def update_leaderboard(search_text: str) -> pd.DataFrame: | |
| """ | |
| Update leaderboard table based on search query. | |
| Args: | |
| search_text: Search query | |
| Returns: | |
| Filtered dataframe | |
| """ | |
| df = build_leaderboard_df() | |
| return search_models(df, search_text) | |
| def update_plot(selected_models: list[str]): | |
| """Update bar chart with selected models.""" | |
| return create_plot(selected_models) | |
| def restart_space() -> None: | |
| """Restart HuggingFace Space and reload data.""" | |
| API.restart_space(repo_id=REPO_ID) | |
| download_results() | |
| def build_demo() -> gr.Blocks: | |
| """Build DeathMath leaderboard interface.""" | |
| download_results() | |
| demo = gr.Blocks( | |
| title="DeathMath Leaderboard", | |
| css=LEADERBOARD_CSS, | |
| theme=gr.themes.Soft(), | |
| ) | |
| leaderboard_df = build_leaderboard_df() | |
| with demo: | |
| gr.HTML(THEME_DETECTION_JS) | |
| with gr.Column(elem_classes=["leaderboard-content"]): | |
| with gr.Column(elem_classes=["leaderboard-header"]): | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Tabs(elem_classes="tab-buttons"): | |
| with gr.TabItem("🏅 Лидерборд", elem_id="llm-benchmark-tab-table", id=0): | |
| gr.Markdown("### Таблица результатов моделей DeathMath", elem_classes=["centered-title"]) | |
| search_box = gr.Textbox( | |
| placeholder="🔍 Поиск по названию модели...", | |
| show_label=False, | |
| elem_classes=["search-box-centered"], | |
| container=False, | |
| ) | |
| all_cols = leaderboard_df.columns.tolist() | |
| column_datatypes = [] | |
| for col in all_cols: | |
| if col == "model": | |
| column_datatypes.append("markdown") | |
| elif col == "system_prompt": | |
| column_datatypes.append("str") | |
| else: | |
| column_datatypes.append("number") | |
| column_widths = ["240px"] + ["85px"] * (len(all_cols) - 2) + ["200px"] | |
| leaderboard_table = gr.Dataframe( | |
| value=leaderboard_df, | |
| headers=all_cols, | |
| datatype=column_datatypes, | |
| max_height="75vh", | |
| wrap=True, | |
| interactive=False, | |
| column_widths=column_widths, | |
| elem_id="leaderboard-table", | |
| row_count=(len(leaderboard_df), "fixed"), | |
| col_count=(len(all_cols), "fixed"), | |
| ) | |
| search_box.change(fn=update_leaderboard, inputs=[search_box], outputs=[leaderboard_table]) | |
| with gr.TabItem("🚀 Отправить результаты", elem_id="submit-tab", id=1): | |
| with gr.Row(): | |
| gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") | |
| with gr.Column(): | |
| model_name_textbox = gr.Textbox(label="Название модели") | |
| file_output = gr.File(label="Перетащите JSON файл с результатами сюда", type="filepath") | |
| uploaded_file = gr.State() | |
| file_path = gr.State() | |
| with gr.Row(): | |
| with gr.Column(): | |
| out = gr.Textbox("Здесь будет показан статус отправки") | |
| with gr.Column(): | |
| _login_button = gr.LoginButton(elem_id="oauth-button") | |
| submit_button = gr.Button("Отправить результаты", elem_id="submit_button", variant="primary") | |
| file_output.upload(handle_file_upload, file_output, [uploaded_file, file_path]) | |
| submit_button.click(submit_file, [uploaded_file, file_path, model_name_textbox], [out]).then( | |
| fn=lambda: build_leaderboard_df(), inputs=[], outputs=[leaderboard_table] | |
| ) | |
| with gr.TabItem("📊 Аналитика", elem_id="analytics-tab", id=2), gr.Column(): | |
| if len(leaderboard_df) > 0: | |
| model_dropdown = gr.Dropdown( | |
| choices=leaderboard_df["model"].tolist(), | |
| label="Модели", | |
| value=leaderboard_df["model"].tolist()[:5] | |
| if len(leaderboard_df) >= 5 | |
| else leaderboard_df["model"].tolist(), | |
| multiselect=True, | |
| info="Выберите модели для сравнения", | |
| ) | |
| else: | |
| model_dropdown = gr.Dropdown( | |
| choices=["example/model-1", "example/model-2"], | |
| label="Модели", | |
| value=["example/model-1", "example/model-2"], | |
| multiselect=True, | |
| info="Выберите модели для сравнения", | |
| ) | |
| with gr.Tabs(): | |
| with gr.TabItem("Столбчатая диаграмма"): | |
| bar_plot = gr.Plot(create_plot(model_dropdown.value)) | |
| with gr.TabItem("Радарная диаграмма"): | |
| radar_plot = gr.Plot(create_radar_plot(model_dropdown.value)) | |
| model_dropdown.change(fn=create_plot, inputs=[model_dropdown], outputs=[bar_plot]) | |
| model_dropdown.change(fn=create_radar_plot, inputs=[model_dropdown], outputs=[radar_plot]) | |
| return demo | |
| if __name__ == "__main__": | |
| demo_app = build_demo() | |
| demo_app.launch() | |