File size: 3,776 Bytes
8e5aec2
 
 
1596349
8e5aec2
 
 
 
 
 
 
 
 
 
1596349
 
 
 
 
 
 
 
 
 
 
 
 
8e5aec2
 
 
 
1596349
8e5aec2
 
 
1596349
 
 
 
 
 
60f464d
 
f0bd283
 
8e5aec2
 
 
 
 
 
1596349
1134a74
 
3c0af53
1596349
 
 
60f464d
3c0af53
 
1596349
1134a74
8e5aec2
 
 
1596349
8e5aec2
1134a74
 
3c0af53
 
 
8e5aec2
1134a74
8e5aec2
1134a74
 
 
8e5aec2
 
 
1596349
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from dataclasses import dataclass
from enum import Enum


@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # benchmark key in parsed results, metric key (for lm-eval compatibility), and display label
    dim_layout = Task("dimension_layout", "error_score", "Layout")
    dim_attribute = Task("dimension_attribute", "error_score", "Attribute")
    dim_text = Task("dimension_text", "error_score", "Text")
    dim_knowledge = Task("dimension_knowledge", "error_score", "Knowledge")
    dom_slides = Task("domain_slides", "error_score", "Slides")
    dom_webpage = Task("domain_webpage", "error_score", "Webpage")
    dom_poster = Task("domain_poster", "error_score", "Poster")
    dom_chart = Task("domain_chart", "error_score", "Chart")
    dom_scientific_figure = Task("domain_scientific_figure", "error_score", "Scientific Figure")


NUM_FEWSHOT = 0  # Change with your few shot
# ---------------------------------------------------


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">BizGenEval Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
BizGenEval is a benchmark for commercial visual content generation quality.
This leaderboard reports score breakdowns by:

- Capability dimensions: Layout, Attribute, Text, Knowledge
- Content domains: Slides, Webpage, Poster, Chart, Scientific Figure

All leaderboard scores are displayed as `hard(easy)` when ranking by hard, and `easy(hard)` when ranking by easy, on a
0-100 scale.

GitHub: [microsoft/BizGenEval](https://github.com/microsoft/BizGenEval)
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## How it works

1. Run BizGenEval evaluation locally.
2. Summarize your scores into the 4 capability dimensions and 5 content domains.
3. Enter the hard/easy scores in the `Submit here!` tab.
4. Click `Submit Result` to add a new row to the public leaderboard.

## Score Protocol

- Scores are shown as `hard(easy)` when ranking by hard, and `easy(hard)` when ranking by easy.
- The leaderboard is sorted by the average score of the currently selected `Rank By` mode. `Hard` is the default.
- If two models have the same average on the selected mode, the other mode is used as a tiebreaker.
- Displayed columns include 4 capability dimensions and 5 content domains.
- User submissions are appended as new leaderboard rows.
"""

EVALUATION_QUEUE_TEXT = """
## Submission Guide

1. Enter the model name exactly as you want it to appear on the leaderboard.
2. Fill in all 18 scores on a `0-100` scale.
3. You can enter integers or decimals. Scores are saved with one decimal place.
4. If you enter more than one decimal place, the score will be rounded to one decimal place before it is saved.
5. Click `Submit Result` to add a new row to the public leaderboard.

### Required scores

- Capability dimensions: `Layout`, `Attribute`, `Text`, `Knowledge`
- Content domains: `Slides`, `Webpage`, `Poster`, `Chart`, `Scientific Figure`
- Each item needs both `hard` and `easy`
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@misc{{li2026bizgeneval,
  title={{BizGenEval: A Systematic Benchmark for Commercial Visual Content Generation}},
  author={{Yan Li and Zezi Zeng and Ziwei Zhou and Xin Gao and Muzhao Tian and Yifan Yang and Mingxi Cheng and Qi Dai and Yuqing Yang and Lili Qiu and Zhendong Wang and Zhengyuan Yang and Xue Yang and Lijuan Wang and Ji Li and Chong Luo}},
  year={{2026}},
  eprint={{2603.25732}},
  archivePrefix={{arXiv}},
  primaryClass={{cs.CV}},
  url={{https://arxiv.org/abs/2603.25732}}
}}"""