Borchmann's picture
Upload folder using huggingface_hub
87993b5 verified
raw
history blame
3.54 kB
from dataclasses import dataclass, make_dataclass
from enum import Enum
from src.about import Tasks
def fields(raw_class):
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modif is needed
@dataclass(frozen=True)
class ColumnContent:
name: str
type: str
displayed_by_default: bool
hidden: bool = False
never_hidden: bool = False
## Leaderboard columns
auto_eval_column_dict = []
# Main columns (displayed by default, in order)
auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)))
auto_eval_column_dict.append(("organization", ColumnContent, ColumnContent("Organization", "str", True)))
auto_eval_column_dict.append(
("model_type_symbol", ColumnContent, ColumnContent("Model Type", "markdown", True, never_hidden=True))
)
# Cost/Efficiency metrics
auto_eval_column_dict.append(("agent_steps", ColumnContent, ColumnContent("Agent Steps", "number", True)))
auto_eval_column_dict.append(("cost_usd", ColumnContent, ColumnContent("Cost (USD)", "number", True)))
# Scores
for idx, task in enumerate(Tasks):
# Only show overall ANLS (first task) by default
displayed_by_default = idx == 0
auto_eval_column_dict.append(
(task.name, ColumnContent, ColumnContent(task.value.col_name, "number", displayed_by_default))
)
# Model information (will be hidden in display but needed for filtering)
auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False, hidden=False)))
auto_eval_column_dict.append(("submission_date", ColumnContent, ColumnContent("Submission Date", "str", False)))
auto_eval_column_dict.append(("link", ColumnContent, ColumnContent("Link", "str", False, hidden=True)))
# We use make dataclass to dynamically fill the scores from Tasks
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
## For the queue columns in the submission tab
@dataclass(frozen=True)
class EvalQueueColumn: # Queue column
model = ColumnContent("model", "markdown", True)
model_type = ColumnContent("model_type", "str", True)
organization = ColumnContent("organization", "str", True)
status = ColumnContent("status", "str", True)
## All the model information that we might need
@dataclass
class ModelDetails:
name: str
display_name: str = ""
symbol: str = "" # emoji fallback
icon_filename: str = ""
color: str = ""
class ModelType(Enum):
API = ModelDetails(name="api", symbol="☁️", icon_filename="snow_cloud2.svg", color="#FF9F36")
OpenWeight = ModelDetails(name="open-weight", symbol="πŸ”“", icon_filename="snow_code.svg", color="#75CDD7")
Unknown = ModelDetails(name="", symbol="?", icon_filename="", color="")
def to_str(self, separator=" "):
return f"{self.value.symbol}{separator}{self.value.name}"
@staticmethod
def from_str(type):
if "api" in type.lower() or "πŸ”Œ" in type:
return ModelType.API
if "open-weight" in type.lower() or "open weight" in type.lower() or "πŸ”“" in type:
return ModelType.OpenWeight
return ModelType.Unknown
# Column selection
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
BENCHMARK_COLS = [t.value.col_name for t in Tasks]