Spaces:
Running
Running
| import os | |
| import json | |
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| from pathlib import Path | |
| from apscheduler.schedulers.background import BackgroundScheduler | |
| from huggingface_hub import snapshot_download | |
| from src.about import ( | |
| CITATION_BUTTON_LABEL, | |
| CITATION_BUTTON_TEXT, | |
| EVALUATION_QUEUE_TEXT, | |
| INTRODUCTION_TEXT, | |
| LLM_BENCHMARKS_TEXT, | |
| TITLE, | |
| ABOUT_TEXT | |
| ) | |
| from src.display.css_html_js import custom_css | |
| # from src.display.utils import ( | |
| # BENCHMARK_COLS, | |
| # COLS, | |
| # EVAL_COLS, | |
| # EVAL_TYPES, | |
| # NUMERIC_INTERVALS, | |
| # TYPES, | |
| # AutoEvalColumn, | |
| # ModelType, | |
| # fields, | |
| # WeightType, | |
| # Precision | |
| # ) | |
| from src.envs import API, EVAL_REQUESTS_PATH, EVAL_RESULTS_PATH, QUEUE_REPO, REPO_ID, RESULTS_REPO, TOKEN | |
| try: | |
| print(EVAL_RESULTS_PATH) | |
| snapshot_download( | |
| repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30, token=TOKEN | |
| ) | |
| except Exception: | |
| pass | |
| # restart_space() | |
| SUBSET_COUNTS = { | |
| "Alignment-Object": 250, | |
| "Alignment-Attribute": 229, | |
| "Alignment-Action": 115, | |
| "Alignment-Count": 55, | |
| "Alignment-Location": 75, | |
| "Safety-Toxicity-Crime": 29, | |
| "Safety-Toxicity-Shocking": 31, | |
| "Safety-Toxicity-Disgust": 42, | |
| "Safety-Nsfw-Evident": 197, | |
| "Safety-Nsfw-Evasive": 177, | |
| "Safety-Nsfw-Subtle": 98, | |
| "Quality-Distortion-Human_face": 169, | |
| "Quality-Distortion-Human_limb": 152, | |
| "Quality-Distortion-Object": 100, | |
| "Quality-Blurry-Defocused": 350, | |
| "Quality-Blurry-Motion": 350, | |
| "Bias-Age": 80, | |
| "Bias-Gender": 140, | |
| "Bias-Race": 140, | |
| "Bias-Nationality": 120, | |
| "Bias-Religion": 60, | |
| } | |
| PERSPECTIVE_COUNTS= { | |
| "Alignment": 724, | |
| "Safety": 574, | |
| "Quality": 1121, | |
| "Bias": 540 | |
| } | |
| META_DATA = ['Model'] | |
| def restart_space(): | |
| API.restart_space(repo_id=REPO_ID) | |
| # color_map = { | |
| # "Score Model": "#7497db", | |
| # "Opensource VLM": "#E8ECF2", | |
| # "Closesource VLM": "#ffcd75", | |
| # "Others": "#75809c", | |
| # # #7497db #E8ECF2 #ffcd75 #75809c | |
| # } | |
| # def color_model_type_column(df, color_map): | |
| # """ | |
| # Apply color to the 'Model Type' column of the DataFrame based on a given color mapping. | |
| # Parameters: | |
| # df (pd.DataFrame): The DataFrame containing the 'Model Type' column. | |
| # color_map (dict): A dictionary mapping model types to colors. | |
| # Returns: | |
| # pd.Styler: The styled DataFrame. | |
| # """ | |
| # # Function to apply color based on the model type | |
| # def apply_color(val): | |
| # color = color_map.get(val, "default") # Default color if not specified in color_map | |
| # return f'background-color: {color}' | |
| # # Format for different columns | |
| # format_dict = {col: "{:.1f}" for col in df.columns if col not in META_DATA} | |
| # format_dict['Overall Score'] = "{:.2f}" | |
| # format_dict[''] = "{:d}" | |
| # return df.style.applymap(apply_color, subset=['Model Type']).format(format_dict, na_rep='') | |
| def regex_table(dataframe, regex, filter_button, style=True): | |
| """ | |
| Takes a model name as a regex, then returns only the rows that has that in it. | |
| """ | |
| # Split regex statement by comma and trim whitespace around regexes | |
| regex_list = [x.strip() for x in regex.split(",")] | |
| # Join the list into a single regex pattern with '|' acting as OR | |
| combined_regex = '|'.join(regex_list) | |
| # if filter_button, remove all rows with "ai2" in the model name | |
| update_scores = False | |
| if isinstance(filter_button, list) or isinstance(filter_button, str): | |
| if "Integrated LVLM" not in filter_button: | |
| dataframe = dataframe[~dataframe["Model Type"].str.contains("Integrated LVLM", case=False, na=False)] | |
| if "Interleaved LVLM" not in filter_button: | |
| dataframe = dataframe[~dataframe["Model Type"].str.contains("Interleaved LVLM", case=False, na=False)] | |
| # Filter the dataframe such that 'model' contains any of the regex patterns | |
| data = dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)] | |
| data.reset_index(drop=True, inplace=True) | |
| # replace column '' with count/rank | |
| data.insert(0, '', range(1, 1 + len(data))) | |
| # if style: | |
| # # apply color | |
| # data = color_model_type_column(data, color_map) | |
| return data | |
| def get_leaderboard_results(results_path): | |
| data_dir = Path(results_path) | |
| files = [d for d in os.listdir(data_dir)] # TODO check if "Path(data_dir) / d" is a dir | |
| df = pd.DataFrame() | |
| for file in files: | |
| if not file.endswith(".json"): | |
| continue | |
| with open(results_path / file) as rf: | |
| result = json.load(rf) | |
| result = pd.DataFrame(result) | |
| df = pd.concat([result, df]) | |
| df.reset_index(drop=True, inplace=True) | |
| return df | |
| def avg_all_perspective(orig_df: pd.DataFrame, columns_name: list, meta_data=META_DATA, perspective_counts=PERSPECTIVE_COUNTS): | |
| new_df = orig_df[meta_data + columns_name] | |
| new_perspective_counts = {col: perspective_counts[col] for col in columns_name} | |
| total_count = sum(perspective_counts.values()) | |
| weights = {perspective: count / total_count for perspective, count in perspective_counts.items()} | |
| def calculate_weighted_avg(row): | |
| weighted_sum = sum(row[col] * weights[col] for col in columns_name) | |
| return weighted_sum | |
| new_df["Overall Score"] = new_df.apply(calculate_weighted_avg, axis=1) | |
| cols = meta_data + ["Overall Score"] + columns_name | |
| new_df = new_df[cols].sort_values(by="Overall Score", ascending=False).reset_index(drop=True) | |
| return new_df | |
| data = { | |
| "Model": [ | |
| "MiniGPT-5", "EMU-2", "GILL", "Anole", | |
| "GPT-4o | Openjourney", "GPT-4o | SD-3", "GPT-4o | SD-XL", "GPT-4o | Flux", | |
| "Gemini-1.5 | Openjourney", "Gemini-1.5 | SD-3", "Gemini-1.5 | SD-XL", "Gemini-1.5 | Flux", | |
| "LLAVA-34b | Openjourney", "LLAVA-34b | SD-3", "LLAVA-34b | SD-XL", "LLAVA-34b | Flux", | |
| "Qwen-VL-70b | Openjourney", "Qwen-VL-70b | SD-3", "Qwen-VL-70b | SD-XL", "Qwen-VL-70b | Flux" | |
| ], | |
| "Model Type":[ | |
| "Interleaved LVLM", "Interleaved LVLM", "Interleaved LVLM", "Interleaved LVLM", | |
| "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", | |
| "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", | |
| "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", | |
| "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", "Integrated LVLM", | |
| ], | |
| "Situational analysis": [ | |
| 47.63, 39.65, 46.72, 48.95, | |
| 53.05, 53.00, 56.12, 54.97, | |
| 48.08, 47.48, 49.43, 47.07, | |
| 54.12, 54.72, 55.97, 54.23, | |
| 52.73, 54.98, 52.58, 54.23 | |
| ], | |
| "Project-based learning": [ | |
| 55.12, 46.12, 57.57, 59.05, | |
| 71.40, 71.20, 73.25, 68.80, | |
| 67.93, 68.70, 71.85, 68.33, | |
| 73.47, 72.55, 74.60, 71.32, | |
| 71.63, 71.87, 73.57, 69.47 | |
| ], | |
| "Multi-step reasoning": [ | |
| 42.17, 50.75, 39.33, 51.72, | |
| 53.67, 53.67, 53.67, 53.67, | |
| 60.05, 60.05, 60.05, 60.05, | |
| 47.28, 47.28, 47.28, 47.28, | |
| 55.63, 55.63, 55.63, 55.63 | |
| ], | |
| "AVG": [ | |
| 50.92, 45.33, 51.58, 55.22, | |
| 63.65, 63.52, 65.47, 62.63, | |
| 61.57, 61.87, 64.15, 61.55, | |
| 63.93, 63.57, 65.05, 62.73, | |
| 64.05, 64.75, 65.12, 63.18 | |
| ] | |
| } | |
| df = pd.DataFrame(data) | |
| total_models = len(df) | |
| with gr.Blocks(css=custom_css) as app: | |
| with gr.Row(): | |
| with gr.Column(scale=6): | |
| gr.Markdown(INTRODUCTION_TEXT.format(str(total_models))) | |
| with gr.Column(scale=4): | |
| gr.Markdown("") | |
| # gr.HTML(BGB_LOGO, elem_classes="logo") | |
| with gr.Tabs(elem_classes="tab-buttons") as tabs: | |
| with gr.TabItem("🏆 MMIE Leaderboard"): | |
| with gr.Row(): | |
| search_overall = gr.Textbox( | |
| label="Model Search (delimit with , )", | |
| placeholder="🔍 Search model (separate multiple queries with ``) and press ENTER...", | |
| show_label=False | |
| ) | |
| model_type_overall = gr.CheckboxGroup( | |
| choices=["Interleaved LVLM", "Integrated LVLM"], | |
| value=["Interleaved LVLM", "Integrated LVLM"], | |
| label="Model Type", | |
| show_label=False, | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| mmie_table_overall_hidden = gr.Dataframe( | |
| df, | |
| headers=df.columns.tolist(), | |
| elem_id="mmie_leadboard_overall_hidden", | |
| wrap=True, | |
| visible=False, | |
| ) | |
| mmie_table_overall = gr.Dataframe( | |
| regex_table( | |
| df.copy(), | |
| "", | |
| ["Interleaved LVLM", "Integrated LVLM"] | |
| ), | |
| headers=df.columns.tolist(), | |
| elem_id="mmie_leadboard_overall", | |
| wrap=True, | |
| ) | |
| with gr.TabItem("About"): | |
| with gr.Row(): | |
| gr.Markdown(ABOUT_TEXT) | |
| with gr.Accordion("📚 Citation", open=False): | |
| citation_button = gr.Textbox( | |
| value=CITATION_BUTTON_TEXT, | |
| lines=7, | |
| label="Copy the following to cite these results.", | |
| elem_id="citation-button", | |
| show_copy_button=True, | |
| ) | |
| search_overall.change(regex_table, inputs=[mmie_table_overall_hidden, search_overall, model_type_overall], outputs=mmie_table_overall) | |
| model_type_overall.change(regex_table, inputs=[mmie_table_overall_hidden, search_overall, model_type_overall], outputs=mmie_table_overall) | |
| scheduler = BackgroundScheduler() | |
| scheduler.add_job(restart_space, "interval", seconds=18000) # restarted every 3h | |
| scheduler.start() | |
| # app.queue(default_concurrency_limit=40).launch() | |
| app.launch(allowed_paths=['./', "./src", "./evals"]) |