Spaces:
Running
Running
root
commited on
Commit
·
0f2b9f8
1
Parent(s):
3ab3a6b
add final score of vbench2
Browse files- app.py +31 -46
- constants.py +31 -3
app.py
CHANGED
|
@@ -43,7 +43,6 @@ def add_new_eval(
|
|
| 43 |
return "Error! Empty file!"
|
| 44 |
if model_link == '' or model_name_textbox == '' or contact_email == '':
|
| 45 |
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
|
| 46 |
-
# upload_data=json.loads(input_file)
|
| 47 |
upload_content = input_file
|
| 48 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 49 |
submission_repo.git_pull()
|
|
@@ -53,7 +52,6 @@ def add_new_eval(
|
|
| 53 |
update_time = now.strftime("%Y-%m-%d") # Capture update time
|
| 54 |
with open(f'{SUBMISSION_NAME}/{filename}.zip','wb') as f:
|
| 55 |
f.write(input_file)
|
| 56 |
-
# shutil.copyfile(CSV_DIR, os.path.join(SUBMISSION_NAME, f"{input_file}"))
|
| 57 |
|
| 58 |
csv_data = pd.read_csv(CSV_DIR)
|
| 59 |
|
|
@@ -172,7 +170,6 @@ def add_new_eval_i2v(
|
|
| 172 |
update_time = now.strftime("%Y-%m-%d") # Capture update time
|
| 173 |
with open(f'{SUBMISSION_NAME}/{filename}.zip','wb') as f:
|
| 174 |
f.write(input_file)
|
| 175 |
-
# shutil.copyfile(CSV_DIR, os.path.join(SUBMISSION_NAME, f"{input_file}"))
|
| 176 |
|
| 177 |
csv_data = pd.read_csv(I2V_DIR)
|
| 178 |
|
|
@@ -260,8 +257,6 @@ def add_new_eval_i2v(
|
|
| 260 |
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
| 261 |
|
| 262 |
def get_normalized_df(df):
|
| 263 |
-
# final_score = df.drop('name', axis=1).sum(axis=1)
|
| 264 |
-
# df.insert(1, 'Overall Score', final_score)
|
| 265 |
normalize_df = df.copy().fillna(0.0)
|
| 266 |
for column in normalize_df.columns[1:-5]:
|
| 267 |
min_val = NORMALIZE_DIC[column]['Min']
|
|
@@ -279,7 +274,6 @@ def get_normalized_i2v_df(df):
|
|
| 279 |
|
| 280 |
|
| 281 |
def calculate_selected_score(df, selected_columns):
|
| 282 |
-
# selected_score = df[selected_columns].sum(axis=1)
|
| 283 |
selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
|
| 284 |
selected_SEMANTIC = [i for i in selected_columns if i in SEMANTIC_LIST]
|
| 285 |
selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_QUALITY])
|
|
@@ -291,12 +285,10 @@ def calculate_selected_score(df, selected_columns):
|
|
| 291 |
return selected_semantic_score
|
| 292 |
if selected_semantic_score.isna().any().any():
|
| 293 |
return selected_quality_score
|
| 294 |
-
# print(selected_semantic_score,selected_quality_score )
|
| 295 |
selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
|
| 296 |
return selected_score.fillna(0.0)
|
| 297 |
|
| 298 |
def calculate_selected_score_i2v(df, selected_columns):
|
| 299 |
-
# selected_score = df[selected_columns].sum(axis=1)
|
| 300 |
selected_QUALITY = [i for i in selected_columns if i in I2V_QUALITY_LIST]
|
| 301 |
selected_I2V = [i for i in selected_columns if i in I2V_LIST]
|
| 302 |
selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_QUALITY])
|
|
@@ -308,7 +300,6 @@ def calculate_selected_score_i2v(df, selected_columns):
|
|
| 308 |
return selected_i2v_score
|
| 309 |
if selected_i2v_score.isna().any().any():
|
| 310 |
return selected_quality_score
|
| 311 |
-
# print(selected_i2v_score,selected_quality_score )
|
| 312 |
selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
|
| 313 |
return selected_score.fillna(0.0)
|
| 314 |
|
|
@@ -371,14 +362,10 @@ def get_final_score_i2v(df, selected_columns):
|
|
| 371 |
df['Selected Score'] = selected_score
|
| 372 |
else:
|
| 373 |
df.insert(1, 'Selected Score', selected_score)
|
| 374 |
-
# df.loc[df[9:].isnull().any(axis=1), ['Total Score', 'I2V Score']] = 'N.A.'
|
| 375 |
mask = df.iloc[:, 5:-5].isnull().any(axis=1)
|
| 376 |
df.loc[mask, ['Total Score', 'I2V Score','Selected Score' ]] = np.nan
|
| 377 |
-
# df.fillna('N.A.', inplace=True)
|
| 378 |
return df
|
| 379 |
|
| 380 |
-
|
| 381 |
-
|
| 382 |
def get_final_score_quality(df, selected_columns):
|
| 383 |
normalize_df = get_normalized_df(df)
|
| 384 |
for name in normalize_df.drop('Model Name (clickable)', axis=1):
|
|
@@ -389,7 +376,6 @@ def get_final_score_quality(df, selected_columns):
|
|
| 389 |
df['Quality Score'] = quality_score
|
| 390 |
else:
|
| 391 |
df.insert(1, 'Quality Score', quality_score)
|
| 392 |
-
# selected_score = normalize_df[selected_columns].sum(axis=1) / len(selected_columns)
|
| 393 |
selected_score = normalize_df[selected_columns].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_columns])
|
| 394 |
if 'Selected Score' in df:
|
| 395 |
df['Selected Score'] = selected_score
|
|
@@ -397,7 +383,28 @@ def get_final_score_quality(df, selected_columns):
|
|
| 397 |
df.insert(1, 'Selected Score', selected_score)
|
| 398 |
return df
|
| 399 |
|
| 400 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 401 |
|
| 402 |
def get_baseline_df():
|
| 403 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
|
@@ -452,12 +459,9 @@ def get_baseline_df_2():
|
|
| 452 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 453 |
submission_repo.git_pull()
|
| 454 |
df = pd.read_csv(VBENCH2_DIR)
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
# present_columns = MODEL_INFO + checkbox_group.value
|
| 458 |
-
# print(present_columns)
|
| 459 |
df = df[COLUMN_NAMES_2]
|
| 460 |
-
# Add this line to display the results evaluated by VBench by default
|
| 461 |
df = convert_scores_to_percentage(df)
|
| 462 |
return df
|
| 463 |
|
|
@@ -497,27 +501,22 @@ def get_all_df2(dir=VBENCH2_DIR):
|
|
| 497 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 498 |
submission_repo.git_pull()
|
| 499 |
df = pd.read_csv(dir)
|
| 500 |
-
|
| 501 |
-
|
| 502 |
return df
|
| 503 |
|
| 504 |
|
| 505 |
def convert_scores_to_percentage(df):
|
| 506 |
-
# Operate on every column in the DataFrame (except the'name 'column)
|
| 507 |
if "Sampled by" in df.columns:
|
| 508 |
skip_col =3
|
| 509 |
else:
|
| 510 |
skip_col =1
|
| 511 |
print(df)
|
| 512 |
for column in df.columns[skip_col:]: # 假设第一列是'name'
|
| 513 |
-
# if df[column].isdigit():
|
| 514 |
-
# print(df[column])
|
| 515 |
-
# is_numeric = pd.to_numeric(df[column], errors='coerce').notna().all()
|
| 516 |
valid_numeric_count = pd.to_numeric(df[column], errors='coerce').notna().sum()
|
| 517 |
if valid_numeric_count > 0:
|
| 518 |
df[column] = round(df[column] * 100,2)
|
| 519 |
df[column] = df[column].apply(lambda x: f"{x:05.2f}%" if pd.notna(pd.to_numeric(x, errors='coerce')) else x)
|
| 520 |
-
# df[column] = df[column].apply(lambda x: f"{x:05.2f}") + '%'
|
| 521 |
return df
|
| 522 |
|
| 523 |
def choose_all_quailty():
|
|
@@ -562,8 +561,6 @@ def on_filter_model_size_method_change(selected_columns, vbench_team_sample, vbe
|
|
| 562 |
|
| 563 |
def on_filter_model_size_method_change_quality(selected_columns):
|
| 564 |
updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
|
| 565 |
-
#print(updated_data)
|
| 566 |
-
# columns:
|
| 567 |
selected_columns = [item for item in QUALITY_TAB if item in selected_columns]
|
| 568 |
present_columns = MODEL_INFO_TAB_QUALITY + selected_columns
|
| 569 |
updated_data = updated_data[present_columns]
|
|
@@ -571,7 +568,6 @@ def on_filter_model_size_method_change_quality(selected_columns):
|
|
| 571 |
updated_data = convert_scores_to_percentage(updated_data)
|
| 572 |
updated_headers = present_columns
|
| 573 |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
|
| 574 |
-
# print(updated_data,present_columns,update_datatype)
|
| 575 |
filter_component = gr.components.Dataframe(
|
| 576 |
value=updated_data,
|
| 577 |
headers=updated_headers,
|
|
@@ -586,8 +582,6 @@ def on_filter_model_size_method_change_i2v(selected_columns,vbench_team_sample,
|
|
| 586 |
updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
|
| 587 |
if vbench_team_sample:
|
| 588 |
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
| 589 |
-
# if vbench_team_eval:
|
| 590 |
-
# updated_data = updated_data[updated_data['Eval'] == 'VBench Team']
|
| 591 |
selected_columns = [item for item in I2V_TAB if item in selected_columns]
|
| 592 |
present_columns = MODEL_INFO_TAB_I2V + selected_columns
|
| 593 |
updated_data = updated_data[present_columns]
|
|
@@ -595,7 +589,6 @@ def on_filter_model_size_method_change_i2v(selected_columns,vbench_team_sample,
|
|
| 595 |
updated_data = convert_scores_to_percentage(updated_data)
|
| 596 |
updated_headers = present_columns
|
| 597 |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES_I2V.index(x)] for x in updated_headers]
|
| 598 |
-
# print(updated_data,present_columns,update_datatype)
|
| 599 |
filter_component = gr.components.Dataframe(
|
| 600 |
value=updated_data,
|
| 601 |
headers=updated_headers,
|
|
@@ -631,22 +624,14 @@ def on_filter_model_size_method_change_long(selected_columns, vbench_team_sample
|
|
| 631 |
|
| 632 |
|
| 633 |
def on_filter_model_size_method_change_2(vbench_team_sample, vbench_team_eval=False):
|
| 634 |
-
updated_data =
|
| 635 |
if vbench_team_sample:
|
| 636 |
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
| 637 |
if vbench_team_eval:
|
| 638 |
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
# selected_columns = [item for item in TASK_INFO if item in selected_columns]
|
| 642 |
-
# present_columns = MODEL_INFO + selected_columns
|
| 643 |
-
# updated_data = updated_data[present_columns]
|
| 644 |
-
# updated_data = updated_data.sort_values(by="Selected Score", ascending=False)
|
| 645 |
-
# updated_data = convert_scores_to_percentage(updated_data)
|
| 646 |
-
updated_headers = COLUMN_NAMES_2
|
| 647 |
-
# print(COLUMN_NAMES,updated_headers,DATA_TITILE_TYPE )
|
| 648 |
update_datatype = VBENCH2_TITLE_TYPE
|
| 649 |
-
# print(updated_data,present_columns,update_datatype)
|
| 650 |
filter_component = gr.components.Dataframe(
|
| 651 |
value=updated_data,
|
| 652 |
headers=updated_headers,
|
|
@@ -759,8 +744,8 @@ with block:
|
|
| 759 |
visible=True,
|
| 760 |
height=700,
|
| 761 |
)
|
| 762 |
-
|
| 763 |
-
|
| 764 |
|
| 765 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
| 766 |
with gr.Accordion("INSTRUCTION", open=False):
|
|
|
|
| 43 |
return "Error! Empty file!"
|
| 44 |
if model_link == '' or model_name_textbox == '' or contact_email == '':
|
| 45 |
return gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)
|
|
|
|
| 46 |
upload_content = input_file
|
| 47 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 48 |
submission_repo.git_pull()
|
|
|
|
| 52 |
update_time = now.strftime("%Y-%m-%d") # Capture update time
|
| 53 |
with open(f'{SUBMISSION_NAME}/{filename}.zip','wb') as f:
|
| 54 |
f.write(input_file)
|
|
|
|
| 55 |
|
| 56 |
csv_data = pd.read_csv(CSV_DIR)
|
| 57 |
|
|
|
|
| 170 |
update_time = now.strftime("%Y-%m-%d") # Capture update time
|
| 171 |
with open(f'{SUBMISSION_NAME}/{filename}.zip','wb') as f:
|
| 172 |
f.write(input_file)
|
|
|
|
| 173 |
|
| 174 |
csv_data = pd.read_csv(I2V_DIR)
|
| 175 |
|
|
|
|
| 257 |
return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)
|
| 258 |
|
| 259 |
def get_normalized_df(df):
|
|
|
|
|
|
|
| 260 |
normalize_df = df.copy().fillna(0.0)
|
| 261 |
for column in normalize_df.columns[1:-5]:
|
| 262 |
min_val = NORMALIZE_DIC[column]['Min']
|
|
|
|
| 274 |
|
| 275 |
|
| 276 |
def calculate_selected_score(df, selected_columns):
|
|
|
|
| 277 |
selected_QUALITY = [i for i in selected_columns if i in QUALITY_LIST]
|
| 278 |
selected_SEMANTIC = [i for i in selected_columns if i in SEMANTIC_LIST]
|
| 279 |
selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_QUALITY])
|
|
|
|
| 285 |
return selected_semantic_score
|
| 286 |
if selected_semantic_score.isna().any().any():
|
| 287 |
return selected_quality_score
|
|
|
|
| 288 |
selected_score = (selected_quality_score * QUALITY_WEIGHT + selected_semantic_score * SEMANTIC_WEIGHT) / (QUALITY_WEIGHT + SEMANTIC_WEIGHT)
|
| 289 |
return selected_score.fillna(0.0)
|
| 290 |
|
| 291 |
def calculate_selected_score_i2v(df, selected_columns):
|
|
|
|
| 292 |
selected_QUALITY = [i for i in selected_columns if i in I2V_QUALITY_LIST]
|
| 293 |
selected_I2V = [i for i in selected_columns if i in I2V_LIST]
|
| 294 |
selected_quality_score = df[selected_QUALITY].sum(axis=1)/sum([DIM_WEIGHT_I2V[i] for i in selected_QUALITY])
|
|
|
|
| 300 |
return selected_i2v_score
|
| 301 |
if selected_i2v_score.isna().any().any():
|
| 302 |
return selected_quality_score
|
|
|
|
| 303 |
selected_score = (selected_quality_score * I2V_QUALITY_WEIGHT + selected_i2v_score * I2V_WEIGHT) / (I2V_QUALITY_WEIGHT + I2V_WEIGHT)
|
| 304 |
return selected_score.fillna(0.0)
|
| 305 |
|
|
|
|
| 362 |
df['Selected Score'] = selected_score
|
| 363 |
else:
|
| 364 |
df.insert(1, 'Selected Score', selected_score)
|
|
|
|
| 365 |
mask = df.iloc[:, 5:-5].isnull().any(axis=1)
|
| 366 |
df.loc[mask, ['Total Score', 'I2V Score','Selected Score' ]] = np.nan
|
|
|
|
| 367 |
return df
|
| 368 |
|
|
|
|
|
|
|
| 369 |
def get_final_score_quality(df, selected_columns):
|
| 370 |
normalize_df = get_normalized_df(df)
|
| 371 |
for name in normalize_df.drop('Model Name (clickable)', axis=1):
|
|
|
|
| 376 |
df['Quality Score'] = quality_score
|
| 377 |
else:
|
| 378 |
df.insert(1, 'Quality Score', quality_score)
|
|
|
|
| 379 |
selected_score = normalize_df[selected_columns].sum(axis=1)/sum([DIM_WEIGHT[i] for i in selected_columns])
|
| 380 |
if 'Selected Score' in df:
|
| 381 |
df['Selected Score'] = selected_score
|
|
|
|
| 383 |
df.insert(1, 'Selected Score', selected_score)
|
| 384 |
return df
|
| 385 |
|
| 386 |
+
def get_final_score2(df, selected_columns):
|
| 387 |
+
category_to_dimension = {}
|
| 388 |
+
|
| 389 |
+
for key, value in VBENCH2_DIM2CAT.items():
|
| 390 |
+
if value not in category_to_dimension:
|
| 391 |
+
category_to_dimension[value] = []
|
| 392 |
+
category_to_dimension[value].append(key)
|
| 393 |
+
score_names = []
|
| 394 |
+
for cur_score in category_to_dimension:
|
| 395 |
+
score_name = f"{cur_score} Score"
|
| 396 |
+
score_names.append(score_name)
|
| 397 |
+
score = df[category_to_dimension[cur_score]].mean(axis=1)
|
| 398 |
+
if score_name in df:
|
| 399 |
+
df[score_name] = score
|
| 400 |
+
else:
|
| 401 |
+
df.insert(1, score_name, score)
|
| 402 |
+
avg_score = df[score_names].mean(axis=1)
|
| 403 |
+
if 'Total Score' in df:
|
| 404 |
+
df['Total Score'] = avg_score
|
| 405 |
+
else:
|
| 406 |
+
df.insert(1, 'Total Score', avg_score)
|
| 407 |
+
return df
|
| 408 |
|
| 409 |
def get_baseline_df():
|
| 410 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
|
|
|
| 459 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 460 |
submission_repo.git_pull()
|
| 461 |
df = pd.read_csv(VBENCH2_DIR)
|
| 462 |
+
df = get_final_score2(df, TASK_INFO_2)
|
| 463 |
+
df = df.sort_values(by="Total Score", ascending=False)
|
|
|
|
|
|
|
| 464 |
df = df[COLUMN_NAMES_2]
|
|
|
|
| 465 |
df = convert_scores_to_percentage(df)
|
| 466 |
return df
|
| 467 |
|
|
|
|
| 501 |
submission_repo = Repository(local_dir=SUBMISSION_NAME, clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN, repo_type="dataset")
|
| 502 |
submission_repo.git_pull()
|
| 503 |
df = pd.read_csv(dir)
|
| 504 |
+
df = get_final_score2(df, selected_columns)
|
| 505 |
+
df = df.sort_values(by="Selected Score", ascending=False)
|
| 506 |
return df
|
| 507 |
|
| 508 |
|
| 509 |
def convert_scores_to_percentage(df):
|
|
|
|
| 510 |
if "Sampled by" in df.columns:
|
| 511 |
skip_col =3
|
| 512 |
else:
|
| 513 |
skip_col =1
|
| 514 |
print(df)
|
| 515 |
for column in df.columns[skip_col:]: # 假设第一列是'name'
|
|
|
|
|
|
|
|
|
|
| 516 |
valid_numeric_count = pd.to_numeric(df[column], errors='coerce').notna().sum()
|
| 517 |
if valid_numeric_count > 0:
|
| 518 |
df[column] = round(df[column] * 100,2)
|
| 519 |
df[column] = df[column].apply(lambda x: f"{x:05.2f}%" if pd.notna(pd.to_numeric(x, errors='coerce')) else x)
|
|
|
|
| 520 |
return df
|
| 521 |
|
| 522 |
def choose_all_quailty():
|
|
|
|
| 561 |
|
| 562 |
def on_filter_model_size_method_change_quality(selected_columns):
|
| 563 |
updated_data = get_all_df_quality(selected_columns, QUALITY_DIR)
|
|
|
|
|
|
|
| 564 |
selected_columns = [item for item in QUALITY_TAB if item in selected_columns]
|
| 565 |
present_columns = MODEL_INFO_TAB_QUALITY + selected_columns
|
| 566 |
updated_data = updated_data[present_columns]
|
|
|
|
| 568 |
updated_data = convert_scores_to_percentage(updated_data)
|
| 569 |
updated_headers = present_columns
|
| 570 |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES.index(x)] for x in updated_headers]
|
|
|
|
| 571 |
filter_component = gr.components.Dataframe(
|
| 572 |
value=updated_data,
|
| 573 |
headers=updated_headers,
|
|
|
|
| 582 |
updated_data = get_all_df_i2v(selected_columns, I2V_DIR)
|
| 583 |
if vbench_team_sample:
|
| 584 |
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
|
|
|
|
|
|
| 585 |
selected_columns = [item for item in I2V_TAB if item in selected_columns]
|
| 586 |
present_columns = MODEL_INFO_TAB_I2V + selected_columns
|
| 587 |
updated_data = updated_data[present_columns]
|
|
|
|
| 589 |
updated_data = convert_scores_to_percentage(updated_data)
|
| 590 |
updated_headers = present_columns
|
| 591 |
update_datatype = [DATA_TITILE_TYPE[COLUMN_NAMES_I2V.index(x)] for x in updated_headers]
|
|
|
|
| 592 |
filter_component = gr.components.Dataframe(
|
| 593 |
value=updated_data,
|
| 594 |
headers=updated_headers,
|
|
|
|
| 624 |
|
| 625 |
|
| 626 |
def on_filter_model_size_method_change_2(vbench_team_sample, vbench_team_eval=False):
|
| 627 |
+
updated_data = get_all_df2(VBENCH2_DIR)
|
| 628 |
if vbench_team_sample:
|
| 629 |
updated_data = updated_data[updated_data["Sampled by"] == 'VBench Team']
|
| 630 |
if vbench_team_eval:
|
| 631 |
updated_data = updated_data[updated_data['Evaluated by'] == 'VBench Team']
|
| 632 |
+
|
| 633 |
+
updated_headers = COLUMN_NAMES_2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 634 |
update_datatype = VBENCH2_TITLE_TYPE
|
|
|
|
| 635 |
filter_component = gr.components.Dataframe(
|
| 636 |
value=updated_data,
|
| 637 |
headers=updated_headers,
|
|
|
|
| 744 |
visible=True,
|
| 745 |
height=700,
|
| 746 |
)
|
| 747 |
+
vbench_team_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
| 748 |
+
vbench_validate_filter_2.change(fn=on_filter_model_size_method_change_2, inputs=[vbench_team_filter_2, vbench_validate_filter], outputs=data_component_2)
|
| 749 |
|
| 750 |
with gr.TabItem("Video Quality", elem_id="vbench-tab-table", id=3):
|
| 751 |
with gr.Accordion("INSTRUCTION", open=False):
|
constants.py
CHANGED
|
@@ -55,8 +55,15 @@ MODEL_INFO_2 = [
|
|
| 55 |
"Sampled by",
|
| 56 |
"Evaluated by",
|
| 57 |
"Accessibility",
|
| 58 |
-
"Date"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
]
|
|
|
|
| 60 |
TASK_INFO_2 = [
|
| 61 |
"Human Anatomy",
|
| 62 |
"Human Clothes",
|
|
@@ -65,7 +72,7 @@ TASK_INFO_2 = [
|
|
| 65 |
"Diversity",
|
| 66 |
"Mechanics",
|
| 67 |
"Material",
|
| 68 |
-
"
|
| 69 |
"Multi-View Consistency",
|
| 70 |
"Dynamic Spatial Relationship",
|
| 71 |
"Dynamic Attribute",
|
|
@@ -194,7 +201,7 @@ I2V_QUALITY_WEIGHT = 1.0
|
|
| 194 |
|
| 195 |
DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
|
| 196 |
I2V_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
|
| 197 |
-
VBENCH2_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
|
| 198 |
|
| 199 |
SUBMISSION_NAME = "vbench_leaderboard_submission"
|
| 200 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Vchitect/", SUBMISSION_NAME)
|
|
@@ -301,4 +308,25 @@ NORMALIZE_DIC_I2V = {
|
|
| 301 |
"Aesthetic Quality":{"Min": 0.0, "Max": 1.0},
|
| 302 |
"Imaging Quality":{"Min": 0.0, "Max": 1.0},
|
| 303 |
"Temporal Flickering":{"Min":0.6293, "Max": 1.0}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
}
|
|
|
|
| 55 |
"Sampled by",
|
| 56 |
"Evaluated by",
|
| 57 |
"Accessibility",
|
| 58 |
+
"Date",
|
| 59 |
+
"Total Score",
|
| 60 |
+
'Creativity Score',
|
| 61 |
+
'Commonsense Score',
|
| 62 |
+
'Controllability Score',
|
| 63 |
+
'Human Fidelity Score',
|
| 64 |
+
'Physics Score'
|
| 65 |
]
|
| 66 |
+
|
| 67 |
TASK_INFO_2 = [
|
| 68 |
"Human Anatomy",
|
| 69 |
"Human Clothes",
|
|
|
|
| 72 |
"Diversity",
|
| 73 |
"Mechanics",
|
| 74 |
"Material",
|
| 75 |
+
"Thermotics",
|
| 76 |
"Multi-View Consistency",
|
| 77 |
"Dynamic Spatial Relationship",
|
| 78 |
"Dynamic Attribute",
|
|
|
|
| 201 |
|
| 202 |
DATA_TITILE_TYPE = ['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
|
| 203 |
I2V_TITILE_TYPE = ['markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
|
| 204 |
+
VBENCH2_TITLE_TYPE = ['markdown', 'markdown', 'markdown', 'markdown', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
|
| 205 |
|
| 206 |
SUBMISSION_NAME = "vbench_leaderboard_submission"
|
| 207 |
SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/Vchitect/", SUBMISSION_NAME)
|
|
|
|
| 308 |
"Aesthetic Quality":{"Min": 0.0, "Max": 1.0},
|
| 309 |
"Imaging Quality":{"Min": 0.0, "Max": 1.0},
|
| 310 |
"Temporal Flickering":{"Min":0.6293, "Max": 1.0}
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
VBENCH2_DIM2CAT = {
|
| 314 |
+
"Human Anatomy": "Human Fidelity",
|
| 315 |
+
"Human Identity": "Human Fidelity",
|
| 316 |
+
"Human Clothes": "Human Fidelity",
|
| 317 |
+
"Diversity": "Creativity",
|
| 318 |
+
"Composition": "Creativity",
|
| 319 |
+
"Dynamic Spatial Relationship": "Controllability",
|
| 320 |
+
"Dynamic Attribute": "Controllability",
|
| 321 |
+
"Motion Order Understanding": "Controllability",
|
| 322 |
+
"Human Interaction": "Controllability",
|
| 323 |
+
"Complex Landscape": "Controllability",
|
| 324 |
+
"Complex Plot": "Controllability",
|
| 325 |
+
"Camera Motion": "Controllability",
|
| 326 |
+
"Motion Rationality": "Commonsense",
|
| 327 |
+
"Instance Preservation": "Commonsense",
|
| 328 |
+
"Mechanics": "Physics",
|
| 329 |
+
"Thermotics": "Physics",
|
| 330 |
+
"Material": "Physics",
|
| 331 |
+
"Multi-View Consistency": "Physics"
|
| 332 |
}
|