__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] import os import gradio as gr import pandas as pd import json import numpy as np from constants import * from huggingface_hub import Repository HF_TOKEN = os.environ.get("HF_TOKEN") global data_component, filter_component category_to_dimension = {} for key, value in DIM2CAT_T2V.items(): if value not in category_to_dimension: category_to_dimension[value] = [] category_to_dimension[value].append(key) def upload_file(files): file_paths = [file.name for file in files] return file_paths def get_normalized_i2v_df(df): normalize_df = df.copy().fillna(0.0) for column in normalize_df.columns[4:]: min_val = NORMALIZE_DIC[column]['Min'] max_val = NORMALIZE_DIC[column]['Max'] normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val) return normalize_df def get_normalized_t2v_df(df): normalize_df = df.copy() for column in normalize_df.columns: min_val = NORMALIZE_DIC[column]['Min'] max_val = NORMALIZE_DIC[column]['Max'] normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val) return normalize_df def calculate_selected_score_i2v(df, selected_columns): selected_QUALITY = [i for i in selected_columns if i in I2V_QUALITY_LIST] selected_I2V = [i for i in selected_columns if i in I2V_LIST] selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_QUALITY]) selected_i2v_score = df[selected_I2V].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_I2V ]) if selected_quality_score.isna().any().any() and selected_i2v_score.isna().any().any(): selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT) return selected_score.fillna(0.0) if selected_quality_score.isna().any().any(): return selected_i2v_score if selected_i2v_score.isna().any().any(): return selected_quality_score selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT) return selected_score.fillna(0.0) def get_final_score_t2i(df, selected_columns, select_subject_button=None): normalize_df = df.copy().fillna(0.0) for name in df.drop('Model Name', axis=1).drop('Resolution', axis=1).drop('Total Score', axis=1): if name in DIM_WEIGHT_T2I: normalize_df[name] = normalize_df[name]*DIM_WEIGHT_T2I[name] # 根据 Subject 和选中的维度计算 Selected Score # 如果 subject 是 text-and-typography/text-and-typography-cn,或者手动选中了 Text Rendering,使用包含 Text Rendering 的计算公式 use_text_rendering_formula = ( select_subject_button in ["text-and-typography", "text-and-typography-cn"] or "Text Rendering" in selected_columns ) if use_text_rendering_formula: # 当 Subject 为 text-and-typography/text-and-typography-cn,或手动选中了 Text Rendering 时 # select_score = ((Aesthetic + Image Quality)/2 + (Prompt Semantic Alignment + Text Rendering)/2)/2 aesthetic_quality_avg = (normalize_df["Aesthetic"] + normalize_df["Image Quality"]) / 2 semantic_text_avg = (normalize_df["Prompt Semantic Alignment"] + normalize_df["Text Rendering"]) / 2 selected_score = (aesthetic_quality_avg + semantic_text_avg) / 2 else: # 其他值时 # select_score = ((Aesthetic + Image Quality)/2 + Prompt Semantic Alignment)/2 aesthetic_quality_avg = (normalize_df["Aesthetic"] + normalize_df["Image Quality"]) / 2 selected_score = (aesthetic_quality_avg + normalize_df["Prompt Semantic Alignment"]) / 2 if 'Selected Score' in df: df['Selected Score'] = selected_score else: df.insert(4, 'Selected Score', selected_score) return df def get_final_score_i2i(df, selected_task_button=TASK_I2I): normalize_df = df.copy().fillna(0.0) selected_score = normalize_df[selected_task_button].sum(axis=1)/len(selected_task_button) total_score = normalize_df[TASK_I2I].sum(axis=1)/len(TASK_I2I) if 'Total Score' in df: df['Total Score'] = total_score else: df.insert(2, 'Total Score', total_score) if 'Selected Score' in df: df['Selected Score'] = selected_score else: df.insert(3, 'Selected Score', selected_score) return df def get_final_score_i2v(df, selected_columns): normalize_df = get_normalized_i2v_df(df) try: for name in normalize_df.drop('Model Name', axis=1).drop('Resolution', axis=1).drop('Duration', axis=1).drop('FPS', axis=1): normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name] except: for name in normalize_df.drop('Model Name', axis=1).drop('Resolution', axis=1).drop('Duration', axis=1).drop('FPS', axis=1): normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name] quality_score = normalize_df[I2V_QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_QUALITY_LIST]) i2v_score = normalize_df[I2V_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_LIST ]) final_score = (quality_score * I2V_QUALITY_WEIGHT + i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT) if 'Total Score' in df: df['Total Score'] = final_score else: df.insert(1, 'Total Score', final_score) if 'I2V Score' in df: df['I2V Score'] = i2v_score else: df.insert(2, 'I2V Score', i2v_score) if 'Quality Score' in df: df['Quality Score'] = quality_score else: df.insert(3, 'Quality Score', quality_score) selected_score = calculate_selected_score_i2v(normalize_df, selected_columns) if 'Selected Score' in df: df['Selected Score'] = selected_score else: df.insert(1, 'Selected Score', selected_score) mask = df.iloc[:, 8:].isnull().any(axis=1) df.loc[mask, ['Total Score', 'I2V Score','Selected Score' ]] = np.nan return df def get_final_score_t2v(df, selected_columns): score_names = [] for cur_score in category_to_dimension: score_name = f"{cur_score} Score" score_names.append(score_name) filtered_columns = [col for col in category_to_dimension[cur_score] if col in selected_columns] if cur_score == "Quality": print(filtered_columns) normalize_df = get_normalized_t2v_df(df[filtered_columns]) for name in filtered_columns: normalize_df[name] = normalize_df[name]*DIM_WEIGHT_QUALITY_T2V[name] score = normalize_df[filtered_columns].sum(axis=1)/sum([DIM_WEIGHT_QUALITY_T2V[i] for i in filtered_columns]) else: score = df[filtered_columns].mean(axis=1) if score_name in df: df[score_name] = score else: df.insert(1, score_name, score) dim_cols = [c for c in selected_columns if c in DIM_KEY_TO_COL.values()] text_rendering_cols = [c for c in selected_columns if c in DIM_KEY_TO_COL_TEXT_RENDERING.values()] quality_cols = [c for c in selected_columns if c in DIM_KEY_TO_COL_QUALITY.values()] scores = pd.DataFrame() weighted_cols = [] if len(dim_cols) > 0: for col in dim_cols: scores[col] = df[col] weighted_cols.append(col) if len(text_rendering_cols) > 0: text_rendering_score = df[text_rendering_cols].mean(axis=1) scores['Text Rendering'] = text_rendering_score weighted_cols.append("Text Rendering") if len(quality_cols) > 0: quality_normalize_df = get_normalized_t2v_df(df[quality_cols]) for name in quality_cols: quality_normalize_df[name] = quality_normalize_df[name]*DIM_WEIGHT_QUALITY_T2V[name] quality_score = quality_normalize_df[quality_cols].sum(axis=1)/sum([DIM_WEIGHT_QUALITY_T2V[i] for i in quality_cols]) scores['Quality'] = quality_score weighted_cols.append("Quality") for col in weighted_cols: scores[col] = scores[col] * DIM_WEIGHT_T2V[col] selected_score = scores[weighted_cols].sum(axis=1)/sum([DIM_WEIGHT_T2V[i] for i in weighted_cols]) if 'Selected Score' in df: df['Selected Score'] = selected_score else: df.insert(1, 'Selected Score', selected_score) return df def get_df_from_json_t2i(file_dir, select_subject_button=None): if not os.path.isdir(file_dir): return pd.DataFrame(columns=COLUMN_NAMES_T2I) colname_to_key = { "Aesthetic": "aesthetic", "Image Quality": "imaging", "Prompt Semantic Alignment": "semantic_alignment", "Text Rendering": "text_rendering", "Total Score": "task_score", } image_generation_params = [ "Model Name", "Resolution" ] rows = [] for filename in os.listdir(file_dir): if not filename.endswith(".json") or not filename.startswith("eval_results_"): continue file_path = os.path.join(file_dir, filename) try: with open(file_path, "r") as f: data = json.load(f) except Exception: continue row = {} for col_name, json_key in colname_to_key.items(): if col_name == "Total Score": value = data.get("task_score", 0) row[col_name] = value continue dimension_results = data.get("dimension_results", {}) dim_info = dimension_results.get(json_key, {}) result_items = dim_info.get("result_items", []) if select_subject_button is None: value = dim_info.get("score", 0) else: score_list = [] target_categories = [select_subject_button] for item in result_items: item_categories = item.get("Category", []) # 检查是否有任何目标 category 在 item 的 Category 列表中 if any(cat in item_categories for cat in target_categories): value = item.get("image_results", 0) score_list.append(value) # print(select_subject_button, score_list) if len(score_list) > 0: value = sum(score_list) / len(score_list) else: value = 0 if col_name in SHOW_DIM_WEIGHT_IMAGE: value = value * SHOW_DIM_WEIGHT_IMAGE[col_name] row[col_name] = value for key in image_generation_params: value = data.get("image_generation_params").get(key) row[key] = value rows.append(row) if len(rows) == 0: return pd.DataFrame(columns=COLUMN_NAMES_T2I) df = pd.DataFrame(rows)[["Model Name", "Resolution", "Total Score"] + T2I_TAB] return df def get_df_from_json_i2i(file_dir): select_task_button = TASK_I2I if not os.path.isdir(file_dir): return pd.DataFrame(columns=MODEL_INFO_TAB_I2I+select_task_button) rows = [] image_generation_params = ["Model Name"] task_dims_list = [f"{task}_{dim}" for task in select_task_button for dim in I2I_TAB_DICT[task]] for filename in os.listdir(file_dir): if not filename.endswith(".json") or not filename.startswith("eval_results_"): continue file_path = os.path.join(file_dir, filename) try: with open(file_path, "r") as f: data = json.load(f) except Exception: continue row = {} model_image_generation_params = data.get("image_generation_params", {}) for key in image_generation_params: value = model_image_generation_params.get(key, "None") row[key] = value for task in select_task_button: value = 0 task_info = data.get(task) value = task_info.get("task_score", 0) row[task] = value for dim, dim_info in task_info.get("dimension_results").items(): dim_score = dim_info.get("score", 0) if dim in SHOW_DIM_WEIGHT_IMAGE: dim_score = dim_score * SHOW_DIM_WEIGHT_IMAGE[dim] row[f"{task}_{DIM_KEY_TO_COL_I2I[dim]}"] = dim_score rows.append(row) if len(rows) == 0: return pd.DataFrame(columns=MODEL_INFO_TAB_I2I+select_task_button) df = pd.DataFrame(rows)[["Model Name"] + select_task_button + task_dims_list] return df def get_df_from_json_i2v(file_dir): if not os.path.isdir(file_dir): return pd.DataFrame(columns=COLUMN_NAMES_I2V) colname_to_key = { # "Video-Text Camera Motion": "camera_motion", "Video-Image Subject Consistency": "i2v_subject", "Video-Image Background Consistency": "i2v_background", "Subject Consistency": "subject_consistency", "Background Consistency": "background_consistency", "Motion Smoothness": "motion_smoothness", "Dynamic Degree": "dynamic_degree", "Aesthetic Quality": "aesthetic_quality", "Imaging Quality": "imaging_quality", "Temporal Flickering": "temporal_flickering", } rows = [] for filename in os.listdir(file_dir): if not filename.endswith(".json") or not filename.startswith("eval_results_"): continue file_path = os.path.join(file_dir, filename) try: with open(file_path, "r") as f: data = json.load(f) except Exception: continue row = {} for col_name, json_key in colname_to_key.items(): value = 0 dim_info = data.get(json_key) if isinstance(dim_info, dict): score_list = dim_info.get("score", []) if isinstance(score_list, list) and len(score_list) >= 1: value = score_list[0] row[col_name] = value video_generation_params = data.get("video_generation_params", {}) for key, value in video_generation_params.items(): row[key] = value rows.append(row) if len(rows) == 0: return pd.DataFrame(columns=COLUMN_NAMES_I2V) df = pd.DataFrame(rows)[["Model Name", "Resolution", "Duration", "FPS"] + I2V_TAB] return df def get_df_from_json_t2v(file_dir): if not os.path.isdir(file_dir): return pd.DataFrame(columns=COLUMN_NAMES_T2V) rows = [] for filename in os.listdir(file_dir): if not filename.endswith(".json") or not filename.startswith("eval_results_"): continue file_path = os.path.join(T2V_DIR, filename) try: with open(file_path, "r") as f: data = json.load(f) except Exception as e: continue row = {} for json_key, col_name in DIM_KEY_TO_COL.items(): dim_info = data.get(json_key, {}) score_list = dim_info.get("score", []) row[col_name] = score_list[1] if isinstance(score_list, list) and len(score_list) == 3 else 0 for json_key, col_name in DIM_KEY_TO_COL_TEXT_RENDERING.items(): dim_info = data.get(json_key, {}) score_list = dim_info.get("score", []) row[col_name] = score_list[1] if isinstance(score_list, list) and len(score_list) == 3 else 0 for json_key, col_name in DIM_KEY_TO_COL_QUALITY.items(): dim_info = data.get(json_key, {}) score_list = dim_info.get("score", []) row[col_name] = score_list[0] if isinstance(score_list, list) and len(score_list) == 1 else 0 final_fixed = data.get("final_score_fixed", {}) for json_key, col_name in FINAL_FIXED_KEY_TO_COL.items(): row[col_name] = final_fixed.get(json_key) video_generation_params = data.get("video_generation_params") for key, value in video_generation_params.items(): row[key] = value rows.append(row) if len(rows) == 0: return pd.DataFrame(columns=COLUMN_NAMES_T2V) df = pd.DataFrame(rows) return df def try_sync_repo(): try: submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset") submission_repo.git_pull() return True except Exception as e: print(f"[警告] 无法同步远程仓库,将使用本地数据: {e}") return False def get_baseline_df_i2v(): try_sync_repo() df = get_df_from_json_i2v(I2V_DIR) df = get_final_score_i2v(df, checkbox_group_i2v.value) df = df.sort_values(by="Selected Score", ascending=False) present_columns = MODEL_INFO_TAB_I2V + checkbox_group_i2v.value df = df[present_columns] df = convert_scores_to_percentage(df) return df def get_baseline_df_t2i(): try_sync_repo() # 兼容"显示值 -> 内部值"的映射;🌐 All 映射为 None(表示不过滤) _subject_value = select_subject_button.value if _subject_value is not None and _subject_value in T2I_SUBJECT_REVERSE_MAP: _subject_value = T2I_SUBJECT_REVERSE_MAP[_subject_value] df = get_df_from_json_t2i(T2I_DIR, _subject_value) # 传递 _subject_value 给 get_final_score_t2i,用于正确计算 Selected Score df = get_final_score_t2i(df, checkbox_group_t2i.value, _subject_value) df = df.sort_values(by="Selected Score", ascending=False) present_columns = MODEL_INFO_TAB_T2I + checkbox_group_t2i.value df = df[present_columns] df = convert_scores_to_percentage(df) return df def get_baseline_df_i2i(): try_sync_repo() df = get_df_from_json_i2i(I2I_DIR) df = get_final_score_i2i(df) df = df.sort_values(by="Selected Score", ascending=False) present_columns = MODEL_INFO_TAB_I2I + TASK_I2I df = df[present_columns] df = convert_scores_to_percentage(df) return df def get_baseline_df_t2v(): try_sync_repo() df = get_all_df_t2v(TASK_INFO_T2V) df = df[COLUMN_NAMES_T2V] df = df.sort_values(by="Total Score", ascending=False) df = convert_scores_to_percentage(df) return df def get_all_df_i2v(selected_columns, dir=I2V_DIR): try_sync_repo() df = get_df_from_json_i2v(dir) df = get_final_score_i2v(df, selected_columns) df = df.sort_values(by="Selected Score", ascending=False) return df def get_all_df_t2i(selected_columns, select_subject_button=None, dir=T2I_DIR): try_sync_repo() df = get_df_from_json_t2i(dir, select_subject_button) df = get_final_score_t2i(df, selected_columns, select_subject_button) df = df.sort_values(by="Selected Score", ascending=False) return df def get_all_df_t2v(selected_columns, dir=T2V_DIR): try_sync_repo() df = get_df_from_json_t2v(dir) df = get_final_score_t2v(df, selected_columns) df = df.sort_values(by="Total Score", ascending=False) return df def convert_scores_to_percentage(df): df_copy = df.copy() NON_SCORE_COLS = ['Model Name', 'Resolution', 'Duration', 'FPS'] for col in df_copy.columns: if col not in NON_SCORE_COLS: numeric_series = pd.to_numeric(df_copy[col], errors='coerce') if numeric_series.notna().any(): processed_series = round(numeric_series * 100, 2) formatted_series = processed_series.apply(lambda x: f"{x:05.2f}%" if pd.notna(x) else x) df_copy[col] = df_copy[col].astype(str).where(formatted_series.isna(), formatted_series) return df_copy def on_filter_model_size_method_change_t2i(selected_columns, select_subject_button=None): updated_data = get_all_df_t2i(selected_columns, select_subject_button, T2I_DIR) selected_columns = [item for item in T2I_TAB if item in selected_columns] present_columns = MODEL_INFO_TAB_T2I+ selected_columns updated_data = updated_data[present_columns] updated_data = updated_data.sort_values(by="Selected Score", ascending=False) updated_headers = present_columns update_datatype = [T2I_TITLE_TYPE[COLUMN_NAMES_T2I.index(x)] for x in updated_headers] updated_data = convert_scores_to_percentage(updated_data) filter_component = gr.components.Dataframe( value=updated_data, headers=updated_headers, type="pandas", datatype=update_datatype, interactive=False, visible=True, ) return filter_component def on_filter_model_size_method_change_i2i(selected_categories): selected_tasks = TASK_INFO_I2I[selected_categories] updated_data = get_df_from_json_i2i(I2I_DIR) updated_data = get_final_score_i2i(updated_data, selected_tasks) present_columns = MODEL_INFO_TAB_I2I + selected_tasks updated_data = updated_data[present_columns] updated_data = updated_data.sort_values(by="Selected Score", ascending=False) updated_headers = present_columns update_datatype = ["markdown"] + ["number"] * (len(updated_headers)-1) updated_data = convert_scores_to_percentage(updated_data) filter_component = gr.components.Dataframe( value=updated_data, headers=updated_headers, type="pandas", datatype=update_datatype, interactive=False, visible=True, ) return filter_component, gr.update(value=selected_tasks) def on_filter_task_method_change_i2i(selected_tasks): updated_data = get_df_from_json_i2i(I2I_DIR) updated_data = get_final_score_i2i(updated_data, selected_tasks) present_columns = MODEL_INFO_TAB_I2I + selected_tasks present_dim = [] # if len(selected_tasks) == 1: # present_dim = I2I_TAB_DICT[selected_tasks[0]] # all_present_columns = present_columns + [f"{selected_tasks[0]}_{dim}" for dim in present_dim] # else: # all_present_columns = present_columns all_present_columns = present_columns updated_data = updated_data[all_present_columns] # if len(selected_tasks) == 1: # rename_dict = {} # for dim in present_dim: # old_col = f"{selected_tasks[0]}_{dim}" # if old_col in updated_data.columns: # rename_dict[old_col] = dim # updated_data = updated_data.rename(columns=rename_dict) updated_data = updated_data.sort_values(by="Selected Score", ascending=False) updated_headers = present_columns + present_dim update_datatype = ["markdown"] + ["number"] * (len(updated_headers)-1) updated_data = convert_scores_to_percentage(updated_data) filter_component = gr.components.Dataframe( value=updated_data, headers=updated_headers, type="pandas", datatype=update_datatype, interactive=False, visible=True, ) return filter_component # select function def on_filter_model_size_method_change_i2v(selected_columns): updated_data = get_all_df_i2v(selected_columns, I2V_DIR) selected_columns = [item for item in I2V_TAB if item in selected_columns] present_columns = MODEL_INFO_TAB_I2V + selected_columns updated_data = updated_data[present_columns] updated_data = updated_data.sort_values(by="Selected Score", ascending=False) updated_data = convert_scores_to_percentage(updated_data) updated_headers = present_columns update_datatype = [I2V_TITLE_TYPE[COLUMN_NAMES_I2V.index(x)] for x in updated_headers] filter_component = gr.components.Dataframe( value=updated_data, headers=updated_headers, type="pandas", datatype=update_datatype, interactive=False, visible=True, ) return filter_component def on_filter_model_size_method_change_t2v(selected_columns=TASK_INFO_T2V): updated_data = get_all_df_t2v(selected_columns, T2V_DIR) present_columns = MODEL_INFO_TAB_T2V + selected_columns updated_headers = present_columns update_datatype = [T2V_TITLE_TYPE[COLUMN_NAMES_T2V.index(x)] for x in updated_headers] updated_data = updated_data[present_columns] updated_data = updated_data.sort_values(by="Selected Score", ascending=False) updated_data = convert_scores_to_percentage(updated_data) filter_component = gr.components.Dataframe( value=updated_data, headers=updated_headers, type="pandas", datatype=update_datatype, interactive=False, visible=True, ) return filter_component def on_filter_model_size_method_score_change_t2v(select_score): selected_columns = category_to_dimension[select_score] updated_data = get_all_df_t2v(selected_columns, T2V_DIR) present_columns = MODEL_INFO_TAB_T2V + [f"{select_score} Score"] + selected_columns updated_headers = present_columns updated_data = updated_data[present_columns] updated_data = updated_data.sort_values(by=f"{select_score} Score", ascending=False) updated_data = convert_scores_to_percentage(updated_data) update_datatype = [T2V_TITLE_TYPE[COLUMN_NAMES_T2V.index(x)] for x in updated_headers] filter_component = gr.components.Dataframe( value=updated_data, headers=updated_headers, type="pandas", datatype=update_datatype, interactive=False, visible=True, ) return filter_component, gr.update(value=selected_columns) def refresh_and_switch_tab(t2v_selected_columns, i2i_selected_tasks): t2v_data, t2v_file = on_filter_dim_and_update_download_t2v(t2v_selected_columns) i2i_data, i2i_file = on_filter_task_and_update_download_i2i(i2i_selected_tasks) return t2v_data, i2i_data, t2v_file, i2i_file def download_t2v_xlsx(selected_columns, vbench_t2v_download_file=None): import tempfile updated_data = get_all_df_t2v(selected_columns, T2V_DIR) present_columns = MODEL_INFO_T2V + selected_columns updated_data = updated_data[present_columns] updated_data = convert_scores_to_percentage(updated_data) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="VBench_T2V_Leaderboard_") updated_data.to_excel(temp_file.name, index=False, engine='openpyxl') if vbench_t2v_download_file is not None: return gr.update(value=temp_file.name, visible=True) else: return gr.update(value=temp_file.name) def download_t2i_xlsx(selected_columns, select_subject_button=None,t2i_download_file=None): import tempfile if select_subject_button is not None and select_subject_button in T2I_SUBJECT_REVERSE_MAP: select_subject_button = T2I_SUBJECT_REVERSE_MAP[select_subject_button] updated_data = get_all_df_t2i(selected_columns,select_subject_button, T2I_DIR) selected_columns = [item for item in T2I_TAB if item in selected_columns] present_columns = MODEL_INFO_TAB_T2I + selected_columns updated_data = updated_data[present_columns] updated_data = convert_scores_to_percentage(updated_data) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="IBench_T2I_Leaderboard_") updated_data.to_excel(temp_file.name, index=False, engine='openpyxl') if t2i_download_file is not None: return gr.update(value=temp_file.name, visible=True) else: return gr.update(value=temp_file.name) def download_i2i_xlsx(selected_tasks, i2i_download_file=None): import tempfile updated_data = get_df_from_json_i2i(I2I_DIR) updated_data = get_final_score_i2i(updated_data, selected_tasks) present_columns = MODEL_INFO_TAB_I2I + selected_tasks updated_data = updated_data[present_columns] updated_data = convert_scores_to_percentage(updated_data) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="IBench_I2I_Leaderboard_") updated_data.to_excel(temp_file.name, index=False, engine='openpyxl') if i2i_download_file is not None: return gr.update(value=temp_file.name, visible=True) else: return gr.update(value=temp_file.name) def download_i2v_xlsx(selected_columns, i2v_download_file=None): import tempfile updated_data = get_all_df_i2v(selected_columns, I2V_DIR) selected_columns = [item for item in I2V_TAB if item in selected_columns] present_columns = MODEL_INFO_TAB_I2V + selected_columns updated_data = updated_data[present_columns] updated_data = convert_scores_to_percentage(updated_data) temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="VBench_I2V_Leaderboard_") updated_data.to_excel(temp_file.name, index=False, engine='openpyxl') if i2v_download_file is not None: return gr.update(value=temp_file.name, visible=True) else: return gr.update(value=temp_file.name) def on_filter_and_update_download_t2v(select_score, selected_columns): data, checkgroup = on_filter_model_size_method_score_change_t2v(select_score) file = download_t2v_xlsx(selected_columns) return data, checkgroup, file def on_filter_dim_and_update_download_t2v(selected_columns): data = on_filter_model_size_method_change_t2v(selected_columns) file = download_t2v_xlsx(selected_columns) return data, file def on_filter_task_and_update_download_i2i(selected_tasks): data = on_filter_task_method_change_i2i(selected_tasks) file = download_i2i_xlsx(selected_tasks) return data, file def on_filter_and_update_download_i2v(selected_columns): data = on_filter_model_size_method_change_i2v(selected_columns) file = download_i2v_xlsx(selected_columns) return data, file def update_t2i_checkbox_choices(select_subject_button_value, current_selected_columns): """根据 subject 更新 checkbox 选项 - 🌐 All、🔤 Text & Typography (EN)、🔤 Text & Typography (CN) 显示 Text Rendering - 其他 subject 不显示 Text Rendering 返回: (gr.update对象, 更新后的选中值列表) """ # 检查是否应该显示 Text Rendering # 🌐 All、🔤 Text & Typography (EN)、🔤 Text & Typography (CN) 显示 Text Rendering show_text_rendering = ( select_subject_button_value == "🌐 All" or select_subject_button_value == "🔤 Text & Typography (EN)" or select_subject_button_value == "🔤 Text & Typography (CN)" ) # 检查是否是 text-and-typography 或 text-and-typography-cn(用于自动选中) is_text_typography = False if select_subject_button_value is not None: # 先转换为内部值 internal_subject = T2I_SUBJECT_REVERSE_MAP.get(select_subject_button_value, select_subject_button_value) is_text_typography = internal_subject in ["text-and-typography", "text-and-typography-cn"] # 根据是否显示 Text Rendering 构建可用的 choices if show_text_rendering: # 包含 Text Rendering available_choices = T2I_TAB.copy() else: # 不包含 Text Rendering available_choices = [col for col in T2I_TAB if col != "Text Rendering"] # 更新当前选中的值,移除不可用的选项 updated_selected = [col for col in current_selected_columns if col in available_choices] # 根据 subject 处理 Text Rendering 的选中状态 if is_text_typography: # 当 subject 是 text-and-typography 或 text-and-typography-cn 时,确保 Text Rendering 被选中 if "Text Rendering" not in updated_selected: updated_selected.append("Text Rendering") else: # 当 subject 不是 text-and-typography 或 text-and-typography-cn 时,移除 Text Rendering 的选中状态 if "Text Rendering" in updated_selected: updated_selected.remove("Text Rendering") # 如果没有任何选中项,至少选中第一个可用项 if len(updated_selected) == 0 and len(available_choices) > 0: updated_selected = [available_choices[0]] checkbox_update = gr.update(choices=available_choices, value=updated_selected) return checkbox_update, updated_selected def on_filter_and_update_download_t2i(selected_columns, select_subject_button=None): if select_subject_button is not None and select_subject_button in T2I_SUBJECT_REVERSE_MAP: select_subject_button = T2I_SUBJECT_REVERSE_MAP[select_subject_button] data = on_filter_model_size_method_change_t2i(selected_columns, select_subject_button) file = download_t2i_xlsx(selected_columns, select_subject_button) return data, file def on_filter_and_update_download_i2i(selected_categories, selected_tasks): data, checkgroup = on_filter_model_size_method_change_i2i(selected_categories) file = download_i2i_xlsx(selected_tasks) return data, checkgroup, file css = """ table { text-align: center; } thead th { text-align: center !important; } tbody td { text-align: center !important; } /* 让 Text to Video 的 7 个评分按钮宽度一致:固定为 5 列网格布局(第二行自动补空位) */ #vbench_t2v_score_buttons { display: grid !important; grid-template-columns: repeat(7, minmax(0, 1fr)); gap: 8px; align-items: stretch; } /* 防止内部容器撑开导致网格溢出 */ #vbench_t2v_score_buttons > div { min-width: 0; } /* 让按钮填满各自网格单元格 */ #vbench_t2v_score_buttons button { width: 100%; } """ block = gr.Blocks(css=css) with block: gr.Markdown( LEADERBORAD_INTRODUCTION ) with gr.Tabs(elem_classes="tab-buttons") as tabs: # VBench 2.0 with gr.TabItem("Text to Video", elem_id="vbench-tab-table", id=1): gr.Markdown( TABLE_INTRODUCTION ) with gr.Row(elem_id="vbench_t2v_score_buttons"): vbench_t2v_creativity_button = gr.Button("Show Creativity Score") vbench_t2v_commonsense_button = gr.Button("Show Commonsense Score") vbench_t2v_control_button = gr.Button("Show Controllability Score") vbench_t2v_human_button = gr.Button("Show Human Fidelity Score") vbench_t2v_physics_button = gr.Button("Show Physics Score") vbench_t2v_text_rendering_button = gr.Button("Show Text Rendering Score") vbench_t2v_quality_button = gr.Button("Show Quality Score") with gr.Row(): vbench_t2v_checkgroup = gr.CheckboxGroup( choices=TASK_INFO_T2V, value=TASK_INFO_T2V, label="Evaluation Dimension", interactive=True, ) with gr.Row(): vbench_t2v_download_btn = gr.Button("📥 Download", variant="secondary") with gr.Row(): vbench_t2v_download_file = gr.File(label="下载文件", visible=False) data_component_t2v = gr.components.Dataframe( value=get_baseline_df_t2v, headers=COLUMN_NAMES_T2V, type="pandas", datatype=T2V_TITLE_TYPE, interactive=False, visible=True, # height=700, ) # vbench_t2v_checkgroup.change(fn=on_filter_model_size_method_change_t2v, inputs=[vbench_t2v_checkgroup], outputs=data_component_t2v) vbench_t2v_creativity_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Creativity"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False) vbench_t2v_commonsense_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Commonsense"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False) vbench_t2v_control_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Controllability"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False) vbench_t2v_human_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Human Fidelity"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False) vbench_t2v_physics_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Physics"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False) vbench_t2v_text_rendering_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Text Rendering"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False) vbench_t2v_quality_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Quality"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False) vbench_t2v_download_btn.click(fn=download_t2v_xlsx, inputs=[vbench_t2v_checkgroup, vbench_t2v_download_file], outputs=vbench_t2v_download_file, api_name=False) # Table i2v with gr.TabItem("Image to Video", elem_id="vbench-tab-table", id=2): with gr.Row(): with gr.Column(scale=1.0): # selection for column part: checkbox_group_i2v = gr.CheckboxGroup( choices=I2V_TAB, value=I2V_TAB, label="Evaluation Quality Dimension", interactive=True, ) with gr.Row(): i2v_download_btn = gr.Button("📥 Download", variant="secondary") with gr.Row(): i2v_download_file = gr.File(label="下载文件", visible=False) data_component_i2v = gr.components.Dataframe( value=get_baseline_df_i2v, headers=COLUMN_NAMES_I2V, type="pandas", datatype=I2V_TITLE_TYPE, interactive=False, visible=True, ) checkbox_group_i2v.change(fn=on_filter_and_update_download_i2v, inputs=[checkbox_group_i2v], outputs=[data_component_i2v, i2v_download_file], api_name=False) i2v_download_btn.click(fn=download_i2v_xlsx, inputs=[checkbox_group_i2v, i2v_download_file], outputs=i2v_download_file, api_name=False) # Table T2I Leaderboard with gr.TabItem("Text to Image", elem_id="ibench-tab-table", id=3): with gr.Row(): with gr.Column(scale=1.0): # selection for column part: # Text Rendering 始终显示在选项中,但默认不选中(因为默认 subject 是 "🌐 All") initial_t2i_choices = T2I_TAB.copy() initial_t2i_value = [col for col in T2I_TAB if col != "Text Rendering"] checkbox_group_t2i = gr.CheckboxGroup( choices=initial_t2i_choices, value=initial_t2i_value, label="Evaluation Dimension", interactive=True, ) with gr.Row(): with gr.Column(scale=1.0): select_subject_button = gr.Radio( choices=T2I_SHOW_SUBJECT_TAB, value="🌐 All", label="Subject", interactive=True ) with gr.Row(): t2i_download_btn = gr.Button("📥 Download", variant="secondary") with gr.Row(): t2i_download_file = gr.File(label="下载文件", visible=False) data_component_t2i = gr.components.Dataframe( value=get_baseline_df_t2i, headers=COLUMN_NAMES_T2I, type="pandas", datatype=T2I_TITLE_TYPE, interactive=False, visible=True, ) def on_subject_change(select_subject_value, current_selected): # 先更新 checkbox 选项 checkbox_update, updated_selected = update_t2i_checkbox_choices(select_subject_value, current_selected) # 然后更新数据和文件 data, file = on_filter_and_update_download_t2i(updated_selected, select_subject_value) return checkbox_update, data, file checkbox_group_t2i.change(fn=on_filter_and_update_download_t2i, inputs=[checkbox_group_t2i, select_subject_button], outputs=[data_component_t2i, t2i_download_file], api_name=False) select_subject_button.change(fn=on_subject_change, inputs=[select_subject_button, checkbox_group_t2i], outputs=[checkbox_group_t2i, data_component_t2i, t2i_download_file], api_name=False) t2i_download_btn.click(fn=download_t2i_xlsx, inputs=[checkbox_group_t2i, select_subject_button, t2i_download_file], outputs=t2i_download_file, api_name=False) # Image to Image Leaderboard with gr.TabItem("Image to Image", elem_id="ibench-tab-table", id=4): with gr.Row(): i2i_controllable_generation_button = gr.Button("Controllable Generation") i2i_global_editing_button = gr.Button("Global Editing") i2i_local_editing_button = gr.Button("Local Editing") i2i_reference_generation_button = gr.Button("Reference Generation") i2i_reference_editing_button = gr.Button("Reference Editing") with gr.Row(): i2i_checkgroup_tasks = gr.CheckboxGroup( choices=TASK_I2I, value=TASK_I2I, label="Specific Tasks", interactive=True, ) with gr.Row(): i2i_download_btn = gr.Button("📥 Download", variant="secondary") with gr.Row(): i2i_download_file = gr.File(label="下载文件", visible=False) data_component_i2i = gr.components.Dataframe( value=get_baseline_df_i2i, headers=MODEL_INFO_TAB_I2I+TASK_I2I, type="pandas", datatype=I2I_TITLE_TYPE, interactive=False, visible=True, ) i2i_controllable_generation_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Controllable Generation"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False) i2i_global_editing_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Global Editing"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False) i2i_local_editing_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Local Editing"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False) i2i_reference_generation_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Reference Generation"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False) i2i_reference_editing_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Reference Editing"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False) i2i_download_btn.click(fn=download_i2i_xlsx, inputs=[i2i_checkgroup_tasks, i2i_download_file], outputs=i2i_download_file, api_name=False) # About with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=5): gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text") with gr.Row(): data_run = gr.Button("Refresh") data_run.click(refresh_and_switch_tab, inputs=[vbench_t2v_checkgroup, i2i_checkgroup_tasks], outputs=[data_component_t2v, data_component_i2i, vbench_t2v_download_file, i2i_download_file], api_name=False) block.launch(share=True)