Spaces:
Sleeping
Sleeping
| import os | |
| import json | |
| import datetime | |
| import requests | |
| from email.utils import parseaddr | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from datasets import load_dataset, VerificationMode | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from huggingface_hub import HfApi | |
| # InfoStrings | |
| from scorer import question_scorer | |
| from content import format_error, format_warning, format_log, TITLE, DATA_DATASET, INTRODUCTION_TEXT, SUBMISSION_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, model_hyperlink, SUBMIT_INTRODUCTION | |
| TOKEN = os.environ.get("TOKEN", None) | |
| OWNER="Online Mind2Web" | |
| # api = HfApi() | |
| YEAR_VERSION = "2024" | |
| LOCAL_DEBUG = True | |
| # Display the results | |
| def get_dataframe_from_results(eval_path): | |
| df = pd.read_csv(eval_path) | |
| df = df.sort_values(by=["Average SR"], ascending=False) | |
| for format_column in ['Easy', 'Medium', 'Hard', 'Average SR']: | |
| df[format_column] = df[format_column].map('{:.1f}'.format) | |
| # df["Average SR"] = df["Average SR"].map('{:.1f}'.format) | |
| return df | |
| # auto_df = pd.read_csv("./auto_Mind2Web-Online - Leaderboard_data.csv") | |
| # human_df = pd.read_csv("./human_Mind2Web-Online - Leaderboard_data.csv") | |
| auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv') | |
| human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv') | |
| # def restart_space(): | |
| # api.restart_space(repo_id=LEADERBOARD_PATH, token=TOKEN) | |
| TYPES = ["str", "str", "str", "str", "number", "number", "number", "number", "str"] | |
| def refresh(): | |
| auto_eval_dataframe_test = get_dataframe_from_results('./auto_Mind2Web-Online - Leaderboard_data.csv') | |
| human_eval_dataframe_test = get_dataframe_from_results('./human_Mind2Web-Online - Leaderboard_data.csv') | |
| return auto_eval_dataframe_test, human_eval_dataframe_test | |
| def upload_file(files): | |
| file_paths = [file.name for file in files] | |
| return file_paths | |
| demo = gr.Blocks() | |
| with demo: | |
| gr.HTML(TITLE) | |
| gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") | |
| with gr.Row(): | |
| with gr.Accordion("๐ Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| label=CITATION_BUTTON_LABEL, | |
| elem_id="citation-button", | |
| lines=10, | |
| ) #.style(show_copy_button=True) | |
| with gr.Tab("Human Evaluation", elem_id="human-tab", id=1): | |
| human_leaderboard_table_test = gr.components.Dataframe( | |
| value=human_eval_dataframe_test, datatype=TYPES, interactive=False, | |
| column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"] | |
| ) | |
| with gr.Tab("Auto Evaluation", elem_id="auto-tab", id=2): | |
| auto_leaderboard_table_test = gr.components.Dataframe( | |
| value=auto_eval_dataframe_test, datatype=TYPES, interactive=False, | |
| column_widths=["15%", "15%", "15%", "15%", "10%", "10%", "10%", "10%", "15%"] | |
| ) | |
| with gr.Tab("Submission Guideline", elem_id="submit-tab", id=3): | |
| with gr.Row(): | |
| gr.Markdown(SUBMIT_INTRODUCTION, elem_classes="markdown-text") | |
| refresh_button = gr.Button("Refresh") | |
| refresh_button.click( | |
| refresh, | |
| inputs=[], | |
| outputs=[ | |
| auto_leaderboard_table_test, | |
| human_leaderboard_table_test, | |
| ], | |
| ) | |
| # gr.Markdown(DATA_DATASET, elem_classes="markdown-text") | |
| # with gr.Row(): | |
| # # gr.Image(value="./figure/distribution_reference_length.png", label="Distribution of reference length", show_label=True, scale=0.4) | |
| # gr.Image(value="./figure/Difficulty.png", label="Number of tasks by difficulty level", show_label=True, scale=0.4) | |
| # with gr.Row(): | |
| # gr.Image(value="./figure/distribution_website.jpg", label="Distribution of websites.",show_label=True, scale=0.4) | |
| # with gr.Row(): | |
| # gr.Image(value="./figure/popularity.jpg", label="Popularity of websites.", show_label=True, scale=0.4) | |
| # with gr.Accordion("Submit a new agent for evaluation"): | |
| # with gr.Row(): | |
| # gr.Markdown(SUBMISSION_TEXT, elem_classes="markdown-text") | |
| # with gr.Row(): | |
| # with gr.Column(): | |
| # model_name_textbox = gr.Textbox(label="Agent name") | |
| # model_family_textbox = gr.Textbox(label="Model family") | |
| # organisation = gr.Textbox(label="Organization") | |
| # mail = gr.Textbox(label="Contact email (will be stored privately, & used if there is an issue with your submission)") | |
| # file_output = gr.File() | |
| # with gr.Row(): | |
| # gr.LoginButton() | |
| # submit_button = gr.Button("Submit Eval") | |
| # submission_result = gr.Markdown() | |
| # submit_button.click( | |
| # [ | |
| # level_of_test, | |
| # model_name_textbox, | |
| # model_family_textbox, | |
| # system_prompt_textbox, | |
| # url_textbox, | |
| # file_output, | |
| # organisation, | |
| # ], | |
| # submission_result, | |
| # ) | |
| scheduler = BackgroundScheduler() | |
| # scheduler.add_job(restart_space, "interval", seconds=3600) | |
| scheduler.start() | |
| demo.launch(debug=True) | |