MBench_Leaderboard / constants.py
PeanutUp's picture
update UI colors, sortable header and remove metrics
1e84160
Raw
History Blame Contribute Delete
2.24 kB
BENCHMARK_DATASET_REPO = "studyOverflow/TempMemoryData"
LEADERBOARD_REPO = "PeanutUp/membench_leaderboard_submission"
LOCAL_LEADERBOARD_DIR = "./membench_leaderboard_submission"
RESULTS_CSV = "./membench_leaderboard_submission/results.csv"
MODEL_INFO_COLUMNS = [
"Rank",
"Model Name",
"Model Link",
"Model Type",
"Certification",
"Accessibility",
"Sampled by",
"Evaluated by",
"Date",
"Total M-Score",
"Entity Score",
"Environment Score",
"Causal Score",
]
METRIC_COLUMNS = [
"Object Geometry",
"Object Texture",
"Human Identity",
"Human Appearance",
"Epipolar Geometry",
"Reprojection Consistency",
"Lighting Consistency",
"Style Consistency",
"State Progress",
"Physical Plausibility",
"Text Interaction",
"Action Interaction",
]
ALL_COLUMNS = MODEL_INFO_COLUMNS + METRIC_COLUMNS
MODEL_TYPE_CHOICES = [
"All",
"text-conditioned",
"action-conditioned",
]
LEADERBOARD_INTRO = """
# MBench Leaderboard
**MBench** evaluates memory capability in video world models: whether generated worlds keep entities, environments, and causal state coherent across long horizons and interaction.
- **Entity Consistency:** object geometry/texture and human identity/appearance.
- **Environment Consistency:** spatial consistency, reprojection, lighting, and style stability.
- **Causal Consistency:** state progress, physical plausibility, and text/action interaction consistency.
Seed leaderboard values are transcribed from Table 2 of the MBench paper. Aggregate columns are derived as unweighted averages over the reported sub-dimensions until official leaderboard totals are released.
Dataset: `studyOverflow/TempMemoryData`
Leaderboard data repo: `PeanutUp/membench_leaderboard_submission`
Project page: https://peanutup.github.io/MBench-project/
GitHub: https://github.com/study-overflow/MBench
"""
SUBMIT_INTRO = """
# Submit to MBench
Upload a ZIP file containing one JSON result file. Submissions are saved to `submissions/pending/` in the leaderboard data repo and are **not** added to the public leaderboard automatically.
Required JSON keys: `model_name`, `model_link`, `model_type`, and `total_m_score`.
"""