|
|
import ast |
|
|
import argparse |
|
|
import glob |
|
|
import pickle |
|
|
|
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
block_css = """ |
|
|
#notice_markdown { |
|
|
font-size: 104% |
|
|
} |
|
|
#notice_markdown th { |
|
|
display: none; |
|
|
} |
|
|
#notice_markdown td { |
|
|
padding-top: 6px; |
|
|
padding-bottom: 6px; |
|
|
} |
|
|
#leaderboard_markdown { |
|
|
font-size: 104% |
|
|
} |
|
|
#leaderboard_markdown td { |
|
|
padding-top: 6px; |
|
|
padding-bottom: 6px; |
|
|
} |
|
|
#leaderboard_dataframe td { |
|
|
line-height: 0.1em; |
|
|
font-size: 8px; |
|
|
} |
|
|
footer { |
|
|
display:none !important |
|
|
} |
|
|
.image-container { |
|
|
display: flex; |
|
|
align-items: center; |
|
|
padding: 1px; |
|
|
} |
|
|
.image-container img { |
|
|
margin: 0 30px; |
|
|
height: 20px; |
|
|
max-height: 100%; |
|
|
width: auto; |
|
|
max-width: 20%; |
|
|
} |
|
|
""" |
|
|
def model_hyperlink(model_name, link): |
|
|
return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>' |
|
|
def load_leaderboard_table_csv(filename, add_hyperlink=True): |
|
|
lines = open(filename).readlines() |
|
|
heads = [v.strip() for v in lines[0].split(",")] |
|
|
rows = [] |
|
|
for i in range(1, len(lines)): |
|
|
row = [v.strip() for v in lines[i].split(",")] |
|
|
for j in range(len(heads)): |
|
|
item = {} |
|
|
for h, v in zip(heads, row): |
|
|
item[h] = v |
|
|
if add_hyperlink: |
|
|
item["Model"] = model_hyperlink(item["Model"], item["Link"]) |
|
|
rows.append(item) |
|
|
return rows |
|
|
|
|
|
def get_arena_table(model_table_df): |
|
|
|
|
|
model_table_df = model_table_df.sort_values(by=["Average Score"], ascending=False) |
|
|
values = [] |
|
|
for i in range(len(model_table_df)): |
|
|
row = [] |
|
|
model_key = model_table_df.index[i] |
|
|
model_name = model_table_df["Model"].values[model_key] |
|
|
|
|
|
row.append(i + 1) |
|
|
|
|
|
row.append(model_name) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
row.append( |
|
|
model_table_df["Open Source"].values[model_key] |
|
|
) |
|
|
row.append( |
|
|
model_table_df["Text Recognition"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Text Referring"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Text Spotting"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Relation Extraction"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Element Parsing"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Mathematical Calculation"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Visual Text Understanding"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Knowledge Reasoning"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Average Score"].values[model_key] |
|
|
) |
|
|
values.append(row) |
|
|
return values |
|
|
|
|
|
def get_cn_table(model_table_df): |
|
|
|
|
|
model_table_df = model_table_df.sort_values(by=["Average Score"], ascending=False) |
|
|
values = [] |
|
|
for i in range(len(model_table_df)): |
|
|
row = [] |
|
|
model_key = model_table_df.index[i] |
|
|
model_name = model_table_df["Model"].values[model_key] |
|
|
|
|
|
row.append(i + 1) |
|
|
|
|
|
row.append(model_name) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Open Source"].values[model_key] |
|
|
) |
|
|
row.append( |
|
|
model_table_df["Text Recognition"].values[model_key] |
|
|
) |
|
|
|
|
|
|
|
|
row.append( |
|
|
model_table_df["Relation Extraction"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Element Parsing"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Visual Text Understanding"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Knowledge Reasoning"].values[model_key] |
|
|
) |
|
|
|
|
|
row.append( |
|
|
model_table_df["Average Score"].values[model_key] |
|
|
) |
|
|
values.append(row) |
|
|
return values |
|
|
|
|
|
def build_leaderboard_tab(leaderboard_table_file, leaderboard_table_file_2, show_plot=False): |
|
|
if leaderboard_table_file: |
|
|
data = load_leaderboard_table_csv(leaderboard_table_file) |
|
|
data_2 = load_leaderboard_table_csv(leaderboard_table_file_2) |
|
|
|
|
|
model_table_df = pd.DataFrame(data) |
|
|
model_table_df_2 = pd.DataFrame(data_2) |
|
|
md_head = f""" |
|
|
# π OCRBench v2 Leaderboard |
|
|
| [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR) | [Paper](https://arxiv.org/abs/2305.07895) | |
|
|
""" |
|
|
gr.Markdown(md_head, elem_id="leaderboard_markdown") |
|
|
with gr.Tabs() as tabs: |
|
|
|
|
|
with gr.Tab("OCRBench v2", id=0): |
|
|
arena_table_vals = get_arena_table(model_table_df) |
|
|
md = "OCRBench v2 is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation." |
|
|
gr.Markdown(md, elem_id="leaderboard_markdown") |
|
|
gr.Dataframe( |
|
|
headers=[ |
|
|
"Rank", |
|
|
"Name", |
|
|
"Open Source", |
|
|
"Text Recognition", |
|
|
"Text Referring", |
|
|
"Text Spotting", |
|
|
"Relation Extraction", |
|
|
"Element Parsing", |
|
|
"Mathematical Calculation", |
|
|
"Visual Text Understanding", |
|
|
"Knowledge Reasoning", |
|
|
"Average Score", |
|
|
], |
|
|
datatype=[ |
|
|
"str", |
|
|
"markdown", |
|
|
"str", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
], |
|
|
value=arena_table_vals, |
|
|
elem_id="arena_leaderboard_dataframe", |
|
|
wrap=False, |
|
|
) |
|
|
with gr.Tab("Text Recognition", id=1): |
|
|
arena_table_vals = get_cn_table(model_table_df_2) |
|
|
md = "OCRBench is a comprehensive evaluation benchmark designed to assess the OCR capabilities of Large Multimodal Models. It comprises five components: Text Recognition, SceneText-Centric VQA, Document-Oriented VQA, Key Information Extraction, and Handwritten Mathematical Expression Recognition. The benchmark includes 1000 question-answer pairs, and all the answers undergo manual verification and correction to ensure a more precise evaluation." |
|
|
gr.Markdown(md, elem_id="leaderboard_markdown") |
|
|
gr.Dataframe( |
|
|
headers=[ |
|
|
"Rank", |
|
|
"Name", |
|
|
"Open Source", |
|
|
"Text Recognition", |
|
|
"Relation Extraction", |
|
|
"Element Parsing", |
|
|
"Visual Text Understanding", |
|
|
"Knowledge Reasoning", |
|
|
"Average Score", |
|
|
], |
|
|
datatype=[ |
|
|
"str", |
|
|
"markdown", |
|
|
"str", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
"number", |
|
|
], |
|
|
value=arena_table_vals, |
|
|
elem_id="arena_leaderboard_dataframe", |
|
|
|
|
|
|
|
|
wrap=True, |
|
|
) |
|
|
else: |
|
|
pass |
|
|
md_tail = f""" |
|
|
# Notice |
|
|
Sometimes, API calls to closed-source models may not succeed. In such cases, we will repeat the calls for unsuccessful samples until it becomes impossible to obtain a successful response. It is important to note that due to rigorous security reviews by OpenAI, GPT4V refuses to provide results for the 84 samples in OCRBench. |
|
|
If you would like to include your model in the OCRBench leaderboard, please follow the evaluation instructions provided on [GitHub](https://github.com/Yuliang-Liu/MultimodalOCR), [VLMEvalKit](https://github.com/open-compass/VLMEvalKit) or [lmms-eval](https://github.com/EvolvingLMMs-Lab/lmms-eval) and feel free to contact us via email at zhangli123@hust.edu.cn. We will update the leaderboard in time.""" |
|
|
gr.Markdown(md_tail, elem_id="leaderboard_markdown") |
|
|
|
|
|
def build_demo(leaderboard_table_file, leaderboard_table_file_2): |
|
|
text_size = gr.themes.sizes.text_lg |
|
|
|
|
|
with gr.Blocks( |
|
|
title="OCRBench Leaderboard", |
|
|
theme=gr.themes.Base(text_size=text_size), |
|
|
css=block_css, |
|
|
) as demo: |
|
|
leader_components = build_leaderboard_tab( |
|
|
leaderboard_table_file, leaderboard_table_file_2, show_plot=True |
|
|
) |
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument("--share", action="store_true") |
|
|
parser.add_argument("--OCRBench_file", type=str, default="./OCRBench_en.csv") |
|
|
parser.add_argument("--OCRBench_file_2", type=str, default="./OCRBench_cn.csv") |
|
|
args = parser.parse_args() |
|
|
|
|
|
demo = build_demo(args.OCRBench_file, args.OCRBench_file_2) |
|
|
demo.launch() |