Spaces:
Running
Running
| from dataclasses import dataclass, make_dataclass | |
| from enum import Enum | |
| from src.about import Tasks | |
| def fields(raw_class): | |
| return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"] | |
| # These classes are for user facing column names, | |
| # to avoid having to change them all around the code | |
| # when a modif is needed | |
| class ColumnContent: | |
| name: str | |
| type: str | |
| displayed_by_default: bool | |
| hidden: bool = False | |
| never_hidden: bool = False | |
| ## Leaderboard columns | |
| auto_eval_column_dict = [] | |
| # Main columns (displayed by default, in order) | |
| auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True))) | |
| auto_eval_column_dict.append(("organization", ColumnContent, ColumnContent("Organization", "str", True))) | |
| auto_eval_column_dict.append( | |
| ("model_type_symbol", ColumnContent, ColumnContent("Model Type", "markdown", True, never_hidden=True)) | |
| ) | |
| # Cost/Efficiency metrics | |
| auto_eval_column_dict.append(("agent_steps", ColumnContent, ColumnContent("Agent Steps", "number", True))) | |
| auto_eval_column_dict.append(("cost_usd", ColumnContent, ColumnContent("Cost (USD)", "number", True))) | |
| # Scores | |
| for idx, task in enumerate(Tasks): | |
| # Only show overall ANLS (first task) by default | |
| displayed_by_default = idx == 0 | |
| auto_eval_column_dict.append( | |
| (task.name, ColumnContent, ColumnContent(task.value.col_name, "number", displayed_by_default)) | |
| ) | |
| # Model information (will be hidden in display but needed for filtering) | |
| auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False, hidden=False))) | |
| auto_eval_column_dict.append(("submission_date", ColumnContent, ColumnContent("Submission Date", "str", False))) | |
| auto_eval_column_dict.append(("link", ColumnContent, ColumnContent("Link", "str", False, hidden=True))) | |
| # We use make dataclass to dynamically fill the scores from Tasks | |
| AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True) | |
| ## For the queue columns in the submission tab | |
| class EvalQueueColumn: # Queue column | |
| model = ColumnContent("model", "markdown", True) | |
| model_type = ColumnContent("model_type", "str", True) | |
| organization = ColumnContent("organization", "str", True) | |
| status = ColumnContent("status", "str", True) | |
| ## All the model information that we might need | |
| class ModelDetails: | |
| name: str | |
| display_name: str = "" | |
| symbol: str = "" # emoji fallback | |
| icon_filename: str = "" | |
| color: str = "" | |
| class ModelType(Enum): | |
| API = ModelDetails(name="api", symbol="βοΈ", icon_filename="snow_cloud2.svg", color="#FF9F36") | |
| OpenWeight = ModelDetails(name="open-weight", symbol="π", icon_filename="snow_code.svg", color="#75CDD7") | |
| Unknown = ModelDetails(name="", symbol="?", icon_filename="", color="") | |
| def to_str(self, separator=" "): | |
| return f"{self.value.symbol}{separator}{self.value.name}" | |
| def from_str(type): | |
| if "api" in type.lower() or "π" in type: | |
| return ModelType.API | |
| if "open-weight" in type.lower() or "open weight" in type.lower() or "π" in type: | |
| return ModelType.OpenWeight | |
| return ModelType.Unknown | |
| # Column selection | |
| COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden] | |
| EVAL_COLS = [c.name for c in fields(EvalQueueColumn)] | |
| EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)] | |
| BENCHMARK_COLS = [t.value.col_name for t in Tasks] | |