Spaces:

DearSloth
/

RoleRMBench

Sleeping

File size: 1,738 Bytes

5d015e0
 
 
 
 
 
 
 
 
 
 
 
 
47dba14
 
 
 
 
 
 
 
 
5d015e0
 
 
 
 
47dba14
5d015e0
 
 
79e5cbd
 
479ad42
8500b21
 
5d015e0
 
 
8500b21
d1beb96
8500b21
d1beb96
8500b21
d1beb96
8500b21
 
d1beb96

from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key, metric_key, name to display in the leaderboard 
    task0 = Task("avg", "score", "Avg")
    task1 = Task("nar", "score", "Nar")
    task2 = Task("mt", "score", "MT")
    task3 = Task("con", "score", "Con")
    task4 = Task("if", "score", "IF")
    task5 = Task("scn", "score", "Scn")
    task6 = Task("saf", "score", "Saf")
    task7 = Task("att", "score", "Att")

# ---------------------------------------------------


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">🎭 RoleRMBench Leaderboard</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
RoleRMBench evaluates reward models on role-playing scenarios across multiple dimensions.

For more information, please refer to: [https://github.com/Dear-Sloth/RoleRMBench](https://github.com/Dear-Sloth/RoleRMBench)

*Feel free to submit your results to our 🤗 HuggingFace leaderboard.*
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""@misc{ding2025rolermbenchrolermreward,
      title={RoleRMBench & RoleRM: Towards Reward Modeling for Profile-Based Role Play in Dialogue Systems}, 
      author={Hang Ding and Qiming Feng and Dongqi Liu and Qi Zhao and Tao Yao and Shuo Wang and Dongsheng Chen and Jian Li and Zhenye Gan and Jiangning Zhang and Chengjie Wang and Yabiao Wang},
      year={2025},
      eprint={2512.10575},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2512.10575}, 
}"""