xiaoxiaoshu's picture
Update app.py
e82a702 verified
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions']
import os
import gradio as gr
import pandas as pd
import json
import numpy as np
from constants import *
from huggingface_hub import Repository
HF_TOKEN = os.environ.get("HF_TOKEN")
global data_component, filter_component
category_to_dimension = {}
for key, value in DIM2CAT_T2V.items():
if value not in category_to_dimension:
category_to_dimension[value] = []
category_to_dimension[value].append(key)
def upload_file(files):
file_paths = [file.name for file in files]
return file_paths
def get_normalized_i2v_df(df):
normalize_df = df.copy().fillna(0.0)
for column in normalize_df.columns[4:]:
min_val = NORMALIZE_DIC[column]['Min']
max_val = NORMALIZE_DIC[column]['Max']
normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
return normalize_df
def get_normalized_t2v_df(df):
normalize_df = df.copy()
for column in normalize_df.columns:
min_val = NORMALIZE_DIC[column]['Min']
max_val = NORMALIZE_DIC[column]['Max']
normalize_df[column] = (normalize_df[column] - min_val) / (max_val - min_val)
return normalize_df
def calculate_selected_score_i2v(df, selected_columns):
selected_QUALITY = [i for i in selected_columns if i in I2V_QUALITY_LIST]
selected_I2V = [i for i in selected_columns if i in I2V_LIST]
selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_QUALITY])
selected_i2v_score = df[selected_I2V].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_I2V ])
if selected_quality_score.isna().any().any() and selected_i2v_score.isna().any().any():
selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
return selected_score.fillna(0.0)
if selected_quality_score.isna().any().any():
return selected_i2v_score
if selected_i2v_score.isna().any().any():
return selected_quality_score
selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
return selected_score.fillna(0.0)
def get_final_score_t2i(df, selected_columns, select_subject_button=None):
normalize_df = df.copy().fillna(0.0)
for name in df.drop('Model Name', axis=1).drop('Resolution', axis=1).drop('Total Score', axis=1):
if name in DIM_WEIGHT_T2I:
normalize_df[name] = normalize_df[name]*DIM_WEIGHT_T2I[name]
# 根据 Subject 和选中的维度计算 Selected Score
# 如果 subject 是 text-and-typography/text-and-typography-cn,或者手动选中了 Text Rendering,使用包含 Text Rendering 的计算公式
use_text_rendering_formula = (
select_subject_button in ["text-and-typography", "text-and-typography-cn"] or
"Text Rendering" in selected_columns
)
if use_text_rendering_formula:
# 当 Subject 为 text-and-typography/text-and-typography-cn,或手动选中了 Text Rendering 时
# select_score = ((Aesthetic + Image Quality)/2 + (Prompt Semantic Alignment + Text Rendering)/2)/2
aesthetic_quality_avg = (normalize_df["Aesthetic"] + normalize_df["Image Quality"]) / 2
semantic_text_avg = (normalize_df["Prompt Semantic Alignment"] + normalize_df["Text Rendering"]) / 2
selected_score = (aesthetic_quality_avg + semantic_text_avg) / 2
else:
# 其他值时
# select_score = ((Aesthetic + Image Quality)/2 + Prompt Semantic Alignment)/2
aesthetic_quality_avg = (normalize_df["Aesthetic"] + normalize_df["Image Quality"]) / 2
selected_score = (aesthetic_quality_avg + normalize_df["Prompt Semantic Alignment"]) / 2
if 'Selected Score' in df:
df['Selected Score'] = selected_score
else:
df.insert(4, 'Selected Score', selected_score)
return df
def get_final_score_i2i(df, selected_task_button=TASK_I2I):
normalize_df = df.copy().fillna(0.0)
selected_score = normalize_df[selected_task_button].sum(axis=1)/len(selected_task_button)
total_score = normalize_df[TASK_I2I].sum(axis=1)/len(TASK_I2I)
if 'Total Score' in df:
df['Total Score'] = total_score
else:
df.insert(2, 'Total Score', total_score)
if 'Selected Score' in df:
df['Selected Score'] = selected_score
else:
df.insert(3, 'Selected Score', selected_score)
return df
def get_final_score_i2v(df, selected_columns):
normalize_df = get_normalized_i2v_df(df)
try:
for name in normalize_df.drop('Model Name', axis=1).drop('Resolution', axis=1).drop('Duration', axis=1).drop('FPS', axis=1):
normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name]
except:
for name in normalize_df.drop('Model Name', axis=1).drop('Resolution', axis=1).drop('Duration', axis=1).drop('FPS', axis=1):
normalize_df[name] = normalize_df[name]*DIM_WEIGHT_I2V[name]
quality_score = normalize_df[I2V_QUALITY_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_QUALITY_LIST])
i2v_score = normalize_df[I2V_LIST].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in I2V_LIST ])
final_score = (quality_score * I2V_QUALITY_WEIGHT + i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
if 'Total Score' in df:
df['Total Score'] = final_score
else:
df.insert(1, 'Total Score', final_score)
if 'I2V Score' in df:
df['I2V Score'] = i2v_score
else:
df.insert(2, 'I2V Score', i2v_score)
if 'Quality Score' in df:
df['Quality Score'] = quality_score
else:
df.insert(3, 'Quality Score', quality_score)
selected_score = calculate_selected_score_i2v(normalize_df, selected_columns)
if 'Selected Score' in df:
df['Selected Score'] = selected_score
else:
df.insert(1, 'Selected Score', selected_score)
mask = df.iloc[:, 8:].isnull().any(axis=1)
df.loc[mask, ['Total Score', 'I2V Score','Selected Score' ]] = np.nan
return df
def get_final_score_t2v(df, selected_columns):
score_names = []
for cur_score in category_to_dimension:
score_name = f"{cur_score} Score"
score_names.append(score_name)
filtered_columns = [col for col in category_to_dimension[cur_score] if col in selected_columns]
if cur_score == "Quality":
print(filtered_columns)
normalize_df = get_normalized_t2v_df(df[filtered_columns])
for name in filtered_columns:
normalize_df[name] = normalize_df[name]*DIM_WEIGHT_QUALITY_T2V[name]
score = normalize_df[filtered_columns].sum(axis=1)/sum([DIM_WEIGHT_QUALITY_T2V[i] for i in filtered_columns])
else:
score = df[filtered_columns].mean(axis=1)
if score_name in df:
df[score_name] = score
else:
df.insert(1, score_name, score)
dim_cols = [c for c in selected_columns if c in DIM_KEY_TO_COL.values()]
text_rendering_cols = [c for c in selected_columns if c in DIM_KEY_TO_COL_TEXT_RENDERING.values()]
quality_cols = [c for c in selected_columns if c in DIM_KEY_TO_COL_QUALITY.values()]
scores = pd.DataFrame()
weighted_cols = []
if len(dim_cols) > 0:
for col in dim_cols:
scores[col] = df[col]
weighted_cols.append(col)
if len(text_rendering_cols) > 0:
text_rendering_score = df[text_rendering_cols].mean(axis=1)
scores['Text Rendering'] = text_rendering_score
weighted_cols.append("Text Rendering")
if len(quality_cols) > 0:
quality_normalize_df = get_normalized_t2v_df(df[quality_cols])
for name in quality_cols:
quality_normalize_df[name] = quality_normalize_df[name]*DIM_WEIGHT_QUALITY_T2V[name]
quality_score = quality_normalize_df[quality_cols].sum(axis=1)/sum([DIM_WEIGHT_QUALITY_T2V[i] for i in quality_cols])
scores['Quality'] = quality_score
weighted_cols.append("Quality")
for col in weighted_cols:
scores[col] = scores[col] * DIM_WEIGHT_T2V[col]
selected_score = scores[weighted_cols].sum(axis=1)/sum([DIM_WEIGHT_T2V[i] for i in weighted_cols])
if 'Selected Score' in df:
df['Selected Score'] = selected_score
else:
df.insert(1, 'Selected Score', selected_score)
return df
def get_df_from_json_t2i(file_dir, select_subject_button=None):
if not os.path.isdir(file_dir):
return pd.DataFrame(columns=COLUMN_NAMES_T2I)
colname_to_key = {
"Aesthetic": "aesthetic",
"Image Quality": "imaging",
"Prompt Semantic Alignment": "semantic_alignment",
"Text Rendering": "text_rendering",
"Total Score": "task_score",
}
image_generation_params = [
"Model Name",
"Resolution"
]
rows = []
for filename in os.listdir(file_dir):
if not filename.endswith(".json") or not filename.startswith("eval_results_"):
continue
file_path = os.path.join(file_dir, filename)
try:
with open(file_path, "r") as f:
data = json.load(f)
except Exception:
continue
row = {}
for col_name, json_key in colname_to_key.items():
if col_name == "Total Score":
value = data.get("task_score", 0)
row[col_name] = value
continue
dimension_results = data.get("dimension_results", {})
dim_info = dimension_results.get(json_key, {})
result_items = dim_info.get("result_items", [])
if select_subject_button is None:
value = dim_info.get("score", 0)
else:
score_list = []
target_categories = [select_subject_button]
for item in result_items:
item_categories = item.get("Category", [])
# 检查是否有任何目标 category 在 item 的 Category 列表中
if any(cat in item_categories for cat in target_categories):
value = item.get("image_results", 0)
score_list.append(value)
# print(select_subject_button, score_list)
if len(score_list) > 0:
value = sum(score_list) / len(score_list)
else:
value = 0
if col_name in SHOW_DIM_WEIGHT_IMAGE:
value = value * SHOW_DIM_WEIGHT_IMAGE[col_name]
row[col_name] = value
for key in image_generation_params:
value = data.get("image_generation_params").get(key)
row[key] = value
rows.append(row)
if len(rows) == 0:
return pd.DataFrame(columns=COLUMN_NAMES_T2I)
df = pd.DataFrame(rows)[["Model Name", "Resolution", "Total Score"] + T2I_TAB]
return df
def get_df_from_json_i2i(file_dir):
select_task_button = TASK_I2I
if not os.path.isdir(file_dir):
return pd.DataFrame(columns=MODEL_INFO_TAB_I2I+select_task_button)
rows = []
image_generation_params = ["Model Name"]
task_dims_list = [f"{task}_{dim}" for task in select_task_button for dim in I2I_TAB_DICT[task]]
for filename in os.listdir(file_dir):
if not filename.endswith(".json") or not filename.startswith("eval_results_"):
continue
file_path = os.path.join(file_dir, filename)
try:
with open(file_path, "r") as f:
data = json.load(f)
except Exception:
continue
row = {}
model_image_generation_params = data.get("image_generation_params", {})
for key in image_generation_params:
value = model_image_generation_params.get(key, "None")
row[key] = value
for task in select_task_button:
value = 0
task_info = data.get(task)
value = task_info.get("task_score", 0)
row[task] = value
for dim, dim_info in task_info.get("dimension_results").items():
dim_score = dim_info.get("score", 0)
if dim in SHOW_DIM_WEIGHT_IMAGE:
dim_score = dim_score * SHOW_DIM_WEIGHT_IMAGE[dim]
row[f"{task}_{DIM_KEY_TO_COL_I2I[dim]}"] = dim_score
rows.append(row)
if len(rows) == 0:
return pd.DataFrame(columns=MODEL_INFO_TAB_I2I+select_task_button)
df = pd.DataFrame(rows)[["Model Name"] + select_task_button + task_dims_list]
return df
def get_df_from_json_i2v(file_dir):
if not os.path.isdir(file_dir):
return pd.DataFrame(columns=COLUMN_NAMES_I2V)
colname_to_key = {
# "Video-Text Camera Motion": "camera_motion",
"Video-Image Subject Consistency": "i2v_subject",
"Video-Image Background Consistency": "i2v_background",
"Subject Consistency": "subject_consistency",
"Background Consistency": "background_consistency",
"Motion Smoothness": "motion_smoothness",
"Dynamic Degree": "dynamic_degree",
"Aesthetic Quality": "aesthetic_quality",
"Imaging Quality": "imaging_quality",
"Temporal Flickering": "temporal_flickering",
}
rows = []
for filename in os.listdir(file_dir):
if not filename.endswith(".json") or not filename.startswith("eval_results_"):
continue
file_path = os.path.join(file_dir, filename)
try:
with open(file_path, "r") as f:
data = json.load(f)
except Exception:
continue
row = {}
for col_name, json_key in colname_to_key.items():
value = 0
dim_info = data.get(json_key)
if isinstance(dim_info, dict):
score_list = dim_info.get("score", [])
if isinstance(score_list, list) and len(score_list) >= 1:
value = score_list[0]
row[col_name] = value
video_generation_params = data.get("video_generation_params", {})
for key, value in video_generation_params.items():
row[key] = value
rows.append(row)
if len(rows) == 0:
return pd.DataFrame(columns=COLUMN_NAMES_I2V)
df = pd.DataFrame(rows)[["Model Name", "Resolution", "Duration", "FPS"] + I2V_TAB]
return df
def get_df_from_json_t2v(file_dir):
if not os.path.isdir(file_dir):
return pd.DataFrame(columns=COLUMN_NAMES_T2V)
rows = []
for filename in os.listdir(file_dir):
if not filename.endswith(".json") or not filename.startswith("eval_results_"):
continue
file_path = os.path.join(T2V_DIR, filename)
try:
with open(file_path, "r") as f:
data = json.load(f)
except Exception as e:
continue
row = {}
for json_key, col_name in DIM_KEY_TO_COL.items():
dim_info = data.get(json_key, {})
score_list = dim_info.get("score", [])
row[col_name] = score_list[1] if isinstance(score_list, list) and len(score_list) == 3 else 0
for json_key, col_name in DIM_KEY_TO_COL_TEXT_RENDERING.items():
dim_info = data.get(json_key, {})
score_list = dim_info.get("score", [])
row[col_name] = score_list[1] if isinstance(score_list, list) and len(score_list) == 3 else 0
for json_key, col_name in DIM_KEY_TO_COL_QUALITY.items():
dim_info = data.get(json_key, {})
score_list = dim_info.get("score", [])
row[col_name] = score_list[0] if isinstance(score_list, list) and len(score_list) == 1 else 0
final_fixed = data.get("final_score_fixed", {})
for json_key, col_name in FINAL_FIXED_KEY_TO_COL.items():
row[col_name] = final_fixed.get(json_key)
video_generation_params = data.get("video_generation_params")
for key, value in video_generation_params.items():
row[key] = value
rows.append(row)
if len(rows) == 0:
return pd.DataFrame(columns=COLUMN_NAMES_T2V)
df = pd.DataFrame(rows)
return df
def try_sync_repo():
try:
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
submission_repo.git_pull()
return True
except Exception as e:
print(f"[警告] 无法同步远程仓库,将使用本地数据: {e}")
return False
def get_baseline_df_i2v():
try_sync_repo()
df = get_df_from_json_i2v(I2V_DIR)
df = get_final_score_i2v(df, checkbox_group_i2v.value)
df = df.sort_values(by="Selected Score", ascending=False)
present_columns = MODEL_INFO_TAB_I2V + checkbox_group_i2v.value
df = df[present_columns]
df = convert_scores_to_percentage(df)
return df
def get_baseline_df_t2i():
try_sync_repo()
# 兼容"显示值 -> 内部值"的映射;🌐 All 映射为 None(表示不过滤)
_subject_value = select_subject_button.value
if _subject_value is not None and _subject_value in T2I_SUBJECT_REVERSE_MAP:
_subject_value = T2I_SUBJECT_REVERSE_MAP[_subject_value]
df = get_df_from_json_t2i(T2I_DIR, _subject_value)
# 传递 _subject_value 给 get_final_score_t2i,用于正确计算 Selected Score
df = get_final_score_t2i(df, checkbox_group_t2i.value, _subject_value)
df = df.sort_values(by="Selected Score", ascending=False)
present_columns = MODEL_INFO_TAB_T2I + checkbox_group_t2i.value
df = df[present_columns]
df = convert_scores_to_percentage(df)
return df
def get_baseline_df_i2i():
try_sync_repo()
df = get_df_from_json_i2i(I2I_DIR)
df = get_final_score_i2i(df)
df = df.sort_values(by="Selected Score", ascending=False)
present_columns = MODEL_INFO_TAB_I2I + TASK_I2I
df = df[present_columns]
df = convert_scores_to_percentage(df)
return df
def get_baseline_df_t2v():
try_sync_repo()
df = get_all_df_t2v(TASK_INFO_T2V)
df = df[COLUMN_NAMES_T2V]
df = df.sort_values(by="Total Score", ascending=False)
df = convert_scores_to_percentage(df)
return df
def get_all_df_i2v(selected_columns, dir=I2V_DIR):
try_sync_repo()
df = get_df_from_json_i2v(dir)
df = get_final_score_i2v(df, selected_columns)
df = df.sort_values(by="Selected Score", ascending=False)
return df
def get_all_df_t2i(selected_columns, select_subject_button=None, dir=T2I_DIR):
try_sync_repo()
df = get_df_from_json_t2i(dir, select_subject_button)
df = get_final_score_t2i(df, selected_columns, select_subject_button)
df = df.sort_values(by="Selected Score", ascending=False)
return df
def get_all_df_t2v(selected_columns, dir=T2V_DIR):
try_sync_repo()
df = get_df_from_json_t2v(dir)
df = get_final_score_t2v(df, selected_columns)
df = df.sort_values(by="Total Score", ascending=False)
return df
def convert_scores_to_percentage(df):
df_copy = df.copy()
NON_SCORE_COLS = ['Model Name', 'Resolution', 'Duration', 'FPS']
for col in df_copy.columns:
if col not in NON_SCORE_COLS:
numeric_series = pd.to_numeric(df_copy[col], errors='coerce')
if numeric_series.notna().any():
processed_series = round(numeric_series * 100, 2)
formatted_series = processed_series.apply(lambda x: f"{x:05.2f}%" if pd.notna(x) else x)
df_copy[col] = df_copy[col].astype(str).where(formatted_series.isna(), formatted_series)
return df_copy
def on_filter_model_size_method_change_t2i(selected_columns, select_subject_button=None):
updated_data = get_all_df_t2i(selected_columns, select_subject_button, T2I_DIR)
selected_columns = [item for item in T2I_TAB if item in selected_columns]
present_columns = MODEL_INFO_TAB_T2I+ selected_columns
updated_data = updated_data[present_columns]
updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
updated_headers = present_columns
update_datatype = [T2I_TITLE_TYPE[COLUMN_NAMES_T2I.index(x)] for x in updated_headers]
updated_data = convert_scores_to_percentage(updated_data)
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component
def on_filter_model_size_method_change_i2i(selected_categories):
selected_tasks = TASK_INFO_I2I[selected_categories]
updated_data = get_df_from_json_i2i(I2I_DIR)
updated_data = get_final_score_i2i(updated_data, selected_tasks)
present_columns = MODEL_INFO_TAB_I2I + selected_tasks
updated_data = updated_data[present_columns]
updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
updated_headers = present_columns
update_datatype = ["markdown"] + ["number"] * (len(updated_headers)-1)
updated_data = convert_scores_to_percentage(updated_data)
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component, gr.update(value=selected_tasks)
def on_filter_task_method_change_i2i(selected_tasks):
updated_data = get_df_from_json_i2i(I2I_DIR)
updated_data = get_final_score_i2i(updated_data, selected_tasks)
present_columns = MODEL_INFO_TAB_I2I + selected_tasks
present_dim = []
# if len(selected_tasks) == 1:
# present_dim = I2I_TAB_DICT[selected_tasks[0]]
# all_present_columns = present_columns + [f"{selected_tasks[0]}_{dim}" for dim in present_dim]
# else:
# all_present_columns = present_columns
all_present_columns = present_columns
updated_data = updated_data[all_present_columns]
# if len(selected_tasks) == 1:
# rename_dict = {}
# for dim in present_dim:
# old_col = f"{selected_tasks[0]}_{dim}"
# if old_col in updated_data.columns:
# rename_dict[old_col] = dim
# updated_data = updated_data.rename(columns=rename_dict)
updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
updated_headers = present_columns + present_dim
update_datatype = ["markdown"] + ["number"] * (len(updated_headers)-1)
updated_data = convert_scores_to_percentage(updated_data)
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component
# select function
def on_filter_model_size_method_change_i2v(selected_columns):
updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
selected_columns = [item for item in I2V_TAB if item in selected_columns]
present_columns = MODEL_INFO_TAB_I2V + selected_columns
updated_data = updated_data[present_columns]
updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
updated_data = convert_scores_to_percentage(updated_data)
updated_headers = present_columns
update_datatype = [I2V_TITLE_TYPE[COLUMN_NAMES_I2V.index(x)] for x in updated_headers]
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component
def on_filter_model_size_method_change_t2v(selected_columns=TASK_INFO_T2V):
updated_data = get_all_df_t2v(selected_columns, T2V_DIR)
present_columns = MODEL_INFO_TAB_T2V + selected_columns
updated_headers = present_columns
update_datatype = [T2V_TITLE_TYPE[COLUMN_NAMES_T2V.index(x)] for x in updated_headers]
updated_data = updated_data[present_columns]
updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
updated_data = convert_scores_to_percentage(updated_data)
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component
def on_filter_model_size_method_score_change_t2v(select_score):
selected_columns = category_to_dimension[select_score]
updated_data = get_all_df_t2v(selected_columns, T2V_DIR)
present_columns = MODEL_INFO_TAB_T2V + [f"{select_score} Score"] + selected_columns
updated_headers = present_columns
updated_data = updated_data[present_columns]
updated_data = updated_data.sort_values(by=f"{select_score} Score", ascending=False)
updated_data = convert_scores_to_percentage(updated_data)
update_datatype = [T2V_TITLE_TYPE[COLUMN_NAMES_T2V.index(x)] for x in updated_headers]
filter_component = gr.components.Dataframe(
value=updated_data,
headers=updated_headers,
type="pandas",
datatype=update_datatype,
interactive=False,
visible=True,
)
return filter_component, gr.update(value=selected_columns)
def refresh_and_switch_tab(t2v_selected_columns, i2i_selected_tasks):
t2v_data, t2v_file = on_filter_dim_and_update_download_t2v(t2v_selected_columns)
i2i_data, i2i_file = on_filter_task_and_update_download_i2i(i2i_selected_tasks)
return t2v_data, i2i_data, t2v_file, i2i_file
def download_t2v_xlsx(selected_columns, vbench_t2v_download_file=None):
import tempfile
updated_data = get_all_df_t2v(selected_columns, T2V_DIR)
present_columns = MODEL_INFO_T2V + selected_columns
updated_data = updated_data[present_columns]
updated_data = convert_scores_to_percentage(updated_data)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="VBench_T2V_Leaderboard_")
updated_data.to_excel(temp_file.name, index=False, engine='openpyxl')
if vbench_t2v_download_file is not None:
return gr.update(value=temp_file.name, visible=True)
else:
return gr.update(value=temp_file.name)
def download_t2i_xlsx(selected_columns, select_subject_button=None,t2i_download_file=None):
import tempfile
if select_subject_button is not None and select_subject_button in T2I_SUBJECT_REVERSE_MAP:
select_subject_button = T2I_SUBJECT_REVERSE_MAP[select_subject_button]
updated_data = get_all_df_t2i(selected_columns,select_subject_button, T2I_DIR)
selected_columns = [item for item in T2I_TAB if item in selected_columns]
present_columns = MODEL_INFO_TAB_T2I + selected_columns
updated_data = updated_data[present_columns]
updated_data = convert_scores_to_percentage(updated_data)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="IBench_T2I_Leaderboard_")
updated_data.to_excel(temp_file.name, index=False, engine='openpyxl')
if t2i_download_file is not None:
return gr.update(value=temp_file.name, visible=True)
else:
return gr.update(value=temp_file.name)
def download_i2i_xlsx(selected_tasks, i2i_download_file=None):
import tempfile
updated_data = get_df_from_json_i2i(I2I_DIR)
updated_data = get_final_score_i2i(updated_data, selected_tasks)
present_columns = MODEL_INFO_TAB_I2I + selected_tasks
updated_data = updated_data[present_columns]
updated_data = convert_scores_to_percentage(updated_data)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="IBench_I2I_Leaderboard_")
updated_data.to_excel(temp_file.name, index=False, engine='openpyxl')
if i2i_download_file is not None:
return gr.update(value=temp_file.name, visible=True)
else:
return gr.update(value=temp_file.name)
def download_i2v_xlsx(selected_columns, i2v_download_file=None):
import tempfile
updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
selected_columns = [item for item in I2V_TAB if item in selected_columns]
present_columns = MODEL_INFO_TAB_I2V + selected_columns
updated_data = updated_data[present_columns]
updated_data = convert_scores_to_percentage(updated_data)
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".xlsx", prefix="VBench_I2V_Leaderboard_")
updated_data.to_excel(temp_file.name, index=False, engine='openpyxl')
if i2v_download_file is not None:
return gr.update(value=temp_file.name, visible=True)
else:
return gr.update(value=temp_file.name)
def on_filter_and_update_download_t2v(select_score, selected_columns):
data, checkgroup = on_filter_model_size_method_score_change_t2v(select_score)
file = download_t2v_xlsx(selected_columns)
return data, checkgroup, file
def on_filter_dim_and_update_download_t2v(selected_columns):
data = on_filter_model_size_method_change_t2v(selected_columns)
file = download_t2v_xlsx(selected_columns)
return data, file
def on_filter_task_and_update_download_i2i(selected_tasks):
data = on_filter_task_method_change_i2i(selected_tasks)
file = download_i2i_xlsx(selected_tasks)
return data, file
def on_filter_and_update_download_i2v(selected_columns):
data = on_filter_model_size_method_change_i2v(selected_columns)
file = download_i2v_xlsx(selected_columns)
return data, file
def update_t2i_checkbox_choices(select_subject_button_value, current_selected_columns):
"""根据 subject 更新 checkbox 选项
- 🌐 All、🔤 Text & Typography (EN)、🔤 Text & Typography (CN) 显示 Text Rendering
- 其他 subject 不显示 Text Rendering
返回: (gr.update对象, 更新后的选中值列表)
"""
# 检查是否应该显示 Text Rendering
# 🌐 All、🔤 Text & Typography (EN)、🔤 Text & Typography (CN) 显示 Text Rendering
show_text_rendering = (
select_subject_button_value == "🌐 All" or
select_subject_button_value == "🔤 Text & Typography (EN)" or
select_subject_button_value == "🔤 Text & Typography (CN)"
)
# 检查是否是 text-and-typography 或 text-and-typography-cn(用于自动选中)
is_text_typography = False
if select_subject_button_value is not None:
# 先转换为内部值
internal_subject = T2I_SUBJECT_REVERSE_MAP.get(select_subject_button_value, select_subject_button_value)
is_text_typography = internal_subject in ["text-and-typography", "text-and-typography-cn"]
# 根据是否显示 Text Rendering 构建可用的 choices
if show_text_rendering:
# 包含 Text Rendering
available_choices = T2I_TAB.copy()
else:
# 不包含 Text Rendering
available_choices = [col for col in T2I_TAB if col != "Text Rendering"]
# 更新当前选中的值,移除不可用的选项
updated_selected = [col for col in current_selected_columns if col in available_choices]
# 根据 subject 处理 Text Rendering 的选中状态
if is_text_typography:
# 当 subject 是 text-and-typography 或 text-and-typography-cn 时,确保 Text Rendering 被选中
if "Text Rendering" not in updated_selected:
updated_selected.append("Text Rendering")
else:
# 当 subject 不是 text-and-typography 或 text-and-typography-cn 时,移除 Text Rendering 的选中状态
if "Text Rendering" in updated_selected:
updated_selected.remove("Text Rendering")
# 如果没有任何选中项,至少选中第一个可用项
if len(updated_selected) == 0 and len(available_choices) > 0:
updated_selected = [available_choices[0]]
checkbox_update = gr.update(choices=available_choices, value=updated_selected)
return checkbox_update, updated_selected
def on_filter_and_update_download_t2i(selected_columns, select_subject_button=None):
if select_subject_button is not None and select_subject_button in T2I_SUBJECT_REVERSE_MAP:
select_subject_button = T2I_SUBJECT_REVERSE_MAP[select_subject_button]
data = on_filter_model_size_method_change_t2i(selected_columns, select_subject_button)
file = download_t2i_xlsx(selected_columns, select_subject_button)
return data, file
def on_filter_and_update_download_i2i(selected_categories, selected_tasks):
data, checkgroup = on_filter_model_size_method_change_i2i(selected_categories)
file = download_i2i_xlsx(selected_tasks)
return data, checkgroup, file
css = """
table {
text-align: center;
}
thead th {
text-align: center !important;
}
tbody td {
text-align: center !important;
}
/* 让 Text to Video 的 7 个评分按钮宽度一致:固定为 5 列网格布局(第二行自动补空位) */
#vbench_t2v_score_buttons {
display: grid !important;
grid-template-columns: repeat(7, minmax(0, 1fr));
gap: 8px;
align-items: stretch;
}
/* 防止内部容器撑开导致网格溢出 */
#vbench_t2v_score_buttons > div {
min-width: 0;
}
/* 让按钮填满各自网格单元格 */
#vbench_t2v_score_buttons button {
width: 100%;
}
"""
block = gr.Blocks(css=css)
with block:
gr.Markdown(
LEADERBORAD_INTRODUCTION
)
with gr.Tabs(elem_classes="tab-buttons") as tabs:
# VBench 2.0
with gr.TabItem("Text to Video", elem_id="vbench-tab-table", id=1):
gr.Markdown(
TABLE_INTRODUCTION
)
with gr.Row(elem_id="vbench_t2v_score_buttons"):
vbench_t2v_creativity_button = gr.Button("Show Creativity Score")
vbench_t2v_commonsense_button = gr.Button("Show Commonsense Score")
vbench_t2v_control_button = gr.Button("Show Controllability Score")
vbench_t2v_human_button = gr.Button("Show Human Fidelity Score")
vbench_t2v_physics_button = gr.Button("Show Physics Score")
vbench_t2v_text_rendering_button = gr.Button("Show Text Rendering Score")
vbench_t2v_quality_button = gr.Button("Show Quality Score")
with gr.Row():
vbench_t2v_checkgroup = gr.CheckboxGroup(
choices=TASK_INFO_T2V,
value=TASK_INFO_T2V,
label="Evaluation Dimension",
interactive=True,
)
with gr.Row():
vbench_t2v_download_btn = gr.Button("📥 Download", variant="secondary")
with gr.Row():
vbench_t2v_download_file = gr.File(label="下载文件", visible=False)
data_component_t2v = gr.components.Dataframe(
value=get_baseline_df_t2v,
headers=COLUMN_NAMES_T2V,
type="pandas",
datatype=T2V_TITLE_TYPE,
interactive=False,
visible=True,
# height=700,
)
# vbench_t2v_checkgroup.change(fn=on_filter_model_size_method_change_t2v, inputs=[vbench_t2v_checkgroup], outputs=data_component_t2v)
vbench_t2v_creativity_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Creativity"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False)
vbench_t2v_commonsense_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Commonsense"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False)
vbench_t2v_control_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Controllability"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False)
vbench_t2v_human_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Human Fidelity"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False)
vbench_t2v_physics_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Physics"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False)
vbench_t2v_text_rendering_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Text Rendering"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False)
vbench_t2v_quality_button.click(fn=on_filter_and_update_download_t2v, inputs=[gr.State("Quality"), vbench_t2v_checkgroup], outputs=[data_component_t2v, vbench_t2v_checkgroup, vbench_t2v_download_file], api_name=False)
vbench_t2v_download_btn.click(fn=download_t2v_xlsx, inputs=[vbench_t2v_checkgroup, vbench_t2v_download_file], outputs=vbench_t2v_download_file, api_name=False)
# Table i2v
with gr.TabItem("Image to Video", elem_id="vbench-tab-table", id=2):
with gr.Row():
with gr.Column(scale=1.0):
# selection for column part:
checkbox_group_i2v = gr.CheckboxGroup(
choices=I2V_TAB,
value=I2V_TAB,
label="Evaluation Quality Dimension",
interactive=True,
)
with gr.Row():
i2v_download_btn = gr.Button("📥 Download", variant="secondary")
with gr.Row():
i2v_download_file = gr.File(label="下载文件", visible=False)
data_component_i2v = gr.components.Dataframe(
value=get_baseline_df_i2v,
headers=COLUMN_NAMES_I2V,
type="pandas",
datatype=I2V_TITLE_TYPE,
interactive=False,
visible=True,
)
checkbox_group_i2v.change(fn=on_filter_and_update_download_i2v, inputs=[checkbox_group_i2v], outputs=[data_component_i2v, i2v_download_file], api_name=False)
i2v_download_btn.click(fn=download_i2v_xlsx, inputs=[checkbox_group_i2v, i2v_download_file], outputs=i2v_download_file, api_name=False)
# Table T2I Leaderboard
with gr.TabItem("Text to Image", elem_id="ibench-tab-table", id=3):
with gr.Row():
with gr.Column(scale=1.0):
# selection for column part:
# Text Rendering 始终显示在选项中,但默认不选中(因为默认 subject 是 "🌐 All")
initial_t2i_choices = T2I_TAB.copy()
initial_t2i_value = [col for col in T2I_TAB if col != "Text Rendering"]
checkbox_group_t2i = gr.CheckboxGroup(
choices=initial_t2i_choices,
value=initial_t2i_value,
label="Evaluation Dimension",
interactive=True,
)
with gr.Row():
with gr.Column(scale=1.0):
select_subject_button = gr.Radio(
choices=T2I_SHOW_SUBJECT_TAB,
value="🌐 All",
label="Subject",
interactive=True
)
with gr.Row():
t2i_download_btn = gr.Button("📥 Download", variant="secondary")
with gr.Row():
t2i_download_file = gr.File(label="下载文件", visible=False)
data_component_t2i = gr.components.Dataframe(
value=get_baseline_df_t2i,
headers=COLUMN_NAMES_T2I,
type="pandas",
datatype=T2I_TITLE_TYPE,
interactive=False,
visible=True,
)
def on_subject_change(select_subject_value, current_selected):
# 先更新 checkbox 选项
checkbox_update, updated_selected = update_t2i_checkbox_choices(select_subject_value, current_selected)
# 然后更新数据和文件
data, file = on_filter_and_update_download_t2i(updated_selected, select_subject_value)
return checkbox_update, data, file
checkbox_group_t2i.change(fn=on_filter_and_update_download_t2i, inputs=[checkbox_group_t2i, select_subject_button], outputs=[data_component_t2i, t2i_download_file], api_name=False)
select_subject_button.change(fn=on_subject_change, inputs=[select_subject_button, checkbox_group_t2i], outputs=[checkbox_group_t2i, data_component_t2i, t2i_download_file], api_name=False)
t2i_download_btn.click(fn=download_t2i_xlsx, inputs=[checkbox_group_t2i, select_subject_button, t2i_download_file], outputs=t2i_download_file, api_name=False)
# Image to Image Leaderboard
with gr.TabItem("Image to Image", elem_id="ibench-tab-table", id=4):
with gr.Row():
i2i_controllable_generation_button = gr.Button("Controllable Generation")
i2i_global_editing_button = gr.Button("Global Editing")
i2i_local_editing_button = gr.Button("Local Editing")
i2i_reference_generation_button = gr.Button("Reference Generation")
i2i_reference_editing_button = gr.Button("Reference Editing")
with gr.Row():
i2i_checkgroup_tasks = gr.CheckboxGroup(
choices=TASK_I2I,
value=TASK_I2I,
label="Specific Tasks",
interactive=True,
)
with gr.Row():
i2i_download_btn = gr.Button("📥 Download", variant="secondary")
with gr.Row():
i2i_download_file = gr.File(label="下载文件", visible=False)
data_component_i2i = gr.components.Dataframe(
value=get_baseline_df_i2i,
headers=MODEL_INFO_TAB_I2I+TASK_I2I,
type="pandas",
datatype=I2I_TITLE_TYPE,
interactive=False,
visible=True,
)
i2i_controllable_generation_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Controllable Generation"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False)
i2i_global_editing_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Global Editing"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False)
i2i_local_editing_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Local Editing"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False)
i2i_reference_generation_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Reference Generation"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False)
i2i_reference_editing_button.click(fn=on_filter_and_update_download_i2i, inputs=[gr.State("Reference Editing"), i2i_checkgroup_tasks], outputs=[data_component_i2i, i2i_checkgroup_tasks, i2i_download_file], api_name=False)
i2i_download_btn.click(fn=download_i2i_xlsx, inputs=[i2i_checkgroup_tasks, i2i_download_file], outputs=i2i_download_file, api_name=False)
# About
with gr.TabItem("📝 About", elem_id="mvbench-tab-table", id=5):
gr.Markdown(LEADERBORAD_INFO, elem_classes="markdown-text")
with gr.Row():
data_run = gr.Button("Refresh")
data_run.click(refresh_and_switch_tab, inputs=[vbench_t2v_checkgroup, i2i_checkgroup_tasks], outputs=[data_component_t2v, data_component_i2i, vbench_t2v_download_file, i2i_download_file], api_name=False)
block.launch(share=True)