from dataclasses import dataclass, make_dataclass from enum import Enum from src.about import Tasks def fields(raw_class): return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] # These classes are for user facing column names, # to avoid having to change them all around the code # when a modif is needed @dataclass(frozen=True) class ColumnContent: name: str type: str displayed_by_default: bool hidden: bool = False never_hidden: bool = False ## Leaderboard columns auto_eval_column_dict = [] # Main columns (displayed by default, in order) auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True))) auto_eval_column_dict.append(("organization", ColumnContent, ColumnContent("Organization", "str", True))) auto_eval_column_dict.append( ("model_type_symbol", ColumnContent, ColumnContent("Model Type", "markdown", True, never_hidden=True)) ) # Cost/Efficiency metrics auto_eval_column_dict.append(("agent_steps", ColumnContent, ColumnContent("Agent Steps", "number", True))) auto_eval_column_dict.append(("cost_usd", ColumnContent, ColumnContent("Cost (USD)", "number", True))) # Scores for idx, task in enumerate(Tasks): # Only show overall ANLS (first task) by default displayed_by_default = idx == 0 auto_eval_column_dict.append( (task.name, ColumnContent, ColumnContent(task.value.col_name, "number", displayed_by_default)) ) # Model information (will be hidden in display but needed for filtering) auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False, hidden=False))) auto_eval_column_dict.append(("submission_date", ColumnContent, ColumnContent("Submission Date", "str", False))) auto_eval_column_dict.append(("link", ColumnContent, ColumnContent("Link", "str", False, hidden=True))) # We use make dataclass to dynamically fill the scores from Tasks AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True) ## For the queue columns in the submission tab @dataclass(frozen=True) class EvalQueueColumn: # Queue column model = ColumnContent("model", "markdown", True) model_type = ColumnContent("model_type", "str", True) organization = ColumnContent("organization", "str", True) status = ColumnContent("status", "str", True) ## All the model information that we might need @dataclass class ModelDetails: name: str display_name: str = "" symbol: str = "" # emoji fallback icon_filename: str = "" color: str = "" class ModelType(Enum): API = ModelDetails(name="api", symbol="☁️", icon_filename="snow_cloud2.svg", color="#FF9F36") OpenWeight = ModelDetails(name="open-weight", symbol="🔓", icon_filename="snow_code.svg", color="#75CDD7") Unknown = ModelDetails(name="", symbol="?", icon_filename="", color="") def to_str(self, separator=" "): return f"{self.value.symbol}{separator}{self.value.name}" @staticmethod def from_str(type): if "api" in type.lower() or "🔌" in type: return ModelType.API if "open-weight" in type.lower() or "open weight" in type.lower() or "🔓" in type: return ModelType.OpenWeight return ModelType.Unknown # Column selection COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)] BENCHMARK_COLS = [t.value.col_name for t in Tasks]