Spaces:
Running
Running
File size: 3,537 Bytes
1c6306b 6da8289 1c6306b c731123 1c6306b 6da8289 1c6306b 87993b5 6da8289 87993b5 6da8289 87993b5 6da8289 c731123 6da8289 87993b5 c731123 87993b5 1c6306b 6da8289 87993b5 1c6306b 6da8289 1c6306b 6da8289 1c6306b 6da8289 1c6306b 6da8289 1c6306b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 | from dataclasses import dataclass, make_dataclass
from enum import Enum
from src.about import Tasks
def fields(raw_class):
return [v for k, v in raw_class.__dict__.items() if k[:2] != "__" and k[-2:] != "__"]
# These classes are for user facing column names,
# to avoid having to change them all around the code
# when a modif is needed
@dataclass(frozen=True)
class ColumnContent:
name: str
type: str
displayed_by_default: bool
hidden: bool = False
never_hidden: bool = False
## Leaderboard columns
auto_eval_column_dict = []
# Main columns (displayed by default, in order)
auto_eval_column_dict.append(("model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)))
auto_eval_column_dict.append(("organization", ColumnContent, ColumnContent("Organization", "str", True)))
auto_eval_column_dict.append(
("model_type_symbol", ColumnContent, ColumnContent("Model Type", "markdown", True, never_hidden=True))
)
# Cost/Efficiency metrics
auto_eval_column_dict.append(("agent_steps", ColumnContent, ColumnContent("Agent Steps", "number", True)))
auto_eval_column_dict.append(("cost_usd", ColumnContent, ColumnContent("Cost (USD)", "number", True)))
# Scores
for idx, task in enumerate(Tasks):
# Only show overall ANLS (first task) by default
displayed_by_default = idx == 0
auto_eval_column_dict.append(
(task.name, ColumnContent, ColumnContent(task.value.col_name, "number", displayed_by_default))
)
# Model information (will be hidden in display but needed for filtering)
auto_eval_column_dict.append(("model_type", ColumnContent, ColumnContent("Type", "str", False, hidden=False)))
auto_eval_column_dict.append(("submission_date", ColumnContent, ColumnContent("Submission Date", "str", False)))
auto_eval_column_dict.append(("link", ColumnContent, ColumnContent("Link", "str", False, hidden=True)))
# We use make dataclass to dynamically fill the scores from Tasks
AutoEvalColumn = make_dataclass("AutoEvalColumn", auto_eval_column_dict, frozen=True)
## For the queue columns in the submission tab
@dataclass(frozen=True)
class EvalQueueColumn: # Queue column
model = ColumnContent("model", "markdown", True)
model_type = ColumnContent("model_type", "str", True)
organization = ColumnContent("organization", "str", True)
status = ColumnContent("status", "str", True)
## All the model information that we might need
@dataclass
class ModelDetails:
name: str
display_name: str = ""
symbol: str = "" # emoji fallback
icon_filename: str = ""
color: str = ""
class ModelType(Enum):
API = ModelDetails(name="api", symbol="โ๏ธ", icon_filename="snow_cloud2.svg", color="#FF9F36")
OpenWeight = ModelDetails(name="open-weight", symbol="๐", icon_filename="snow_code.svg", color="#75CDD7")
Unknown = ModelDetails(name="", symbol="?", icon_filename="", color="")
def to_str(self, separator=" "):
return f"{self.value.symbol}{separator}{self.value.name}"
@staticmethod
def from_str(type):
if "api" in type.lower() or "๐" in type:
return ModelType.API
if "open-weight" in type.lower() or "open weight" in type.lower() or "๐" in type:
return ModelType.OpenWeight
return ModelType.Unknown
# Column selection
COLS = [c.name for c in fields(AutoEvalColumn) if not c.hidden]
EVAL_COLS = [c.name for c in fields(EvalQueueColumn)]
EVAL_TYPES = [c.type for c in fields(EvalQueueColumn)]
BENCHMARK_COLS = [t.value.col_name for t in Tasks]
|