|
|
""" |
|
|
Centralized Column Definitions |
|
|
|
|
|
Single source of truth for all leaderboard columns. |
|
|
Add new columns here and they propagate everywhere automatically. |
|
|
""" |
|
|
|
|
|
from dataclasses import dataclass |
|
|
from enum import Enum, auto |
|
|
from typing import List, Dict, Optional |
|
|
|
|
|
|
|
|
class ColumnType(Enum): |
|
|
"""Column data types for Gradio.""" |
|
|
NUMBER = "number" |
|
|
STRING = "str" |
|
|
HTML = "html" |
|
|
|
|
|
|
|
|
class ColumnGroup(Enum): |
|
|
"""Column groupings for organization and filtering.""" |
|
|
CORE = auto() |
|
|
LEGAL = auto() |
|
|
MTEB = auto() |
|
|
TOKENIZER = auto() |
|
|
MODEL_INFO = auto() |
|
|
CORRELATION = auto() |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class ColumnDefinition: |
|
|
""" |
|
|
Complete definition for a leaderboard column. |
|
|
|
|
|
This is the single source of truth - all column metadata lives here. |
|
|
""" |
|
|
name: str |
|
|
api_name: Optional[str] = None |
|
|
column_type: ColumnType = ColumnType.STRING |
|
|
group: ColumnGroup = ColumnGroup.CORE |
|
|
width: str = "120px" |
|
|
decimals: int = 2 |
|
|
default_visible: bool = True |
|
|
colorize: bool = False |
|
|
description: str = "" |
|
|
|
|
|
@property |
|
|
def csv_key(self) -> str: |
|
|
"""Get the key used in CSV files.""" |
|
|
return self.api_name or self.name |
|
|
|
|
|
|
|
|
COLUMN_DEFINITIONS: List[ColumnDefinition] = [ |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Rank", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.CORE, |
|
|
width="50px", |
|
|
decimals=0, |
|
|
default_visible=True, |
|
|
description="Rank by MTEB Score (Mean TaskType)" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Model", |
|
|
column_type=ColumnType.HTML, |
|
|
group=ColumnGroup.CORE, |
|
|
width="280px", |
|
|
default_visible=True, |
|
|
colorize=False, |
|
|
description="Model name with HuggingFace link" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="MTEB Score", |
|
|
api_name="Mean (TaskType)", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MTEB, |
|
|
width="140px", |
|
|
default_visible=True, |
|
|
colorize=True, |
|
|
description="MTEB Score: Average of task type category scores" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Legal Score", |
|
|
api_name="Score(Legal)", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.LEGAL, |
|
|
width="120px", |
|
|
default_visible=True, |
|
|
colorize=True, |
|
|
description="Mean of legal benchmark scores (Contracts, Regulation, Caselaw)" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Pure Token Count", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.TOKENIZER, |
|
|
width="150px", |
|
|
decimals=0, |
|
|
default_visible=True, |
|
|
description="Tokens that are morphologically pure" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Max Sequence Length", |
|
|
api_name="Max Tokens", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MODEL_INFO, |
|
|
width="160px", |
|
|
decimals=0, |
|
|
default_visible=True, |
|
|
description="Maximum sequence length" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Parameters", |
|
|
api_name="Number of Parameters", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MODEL_INFO, |
|
|
width="120px", |
|
|
decimals=0, |
|
|
default_visible=True, |
|
|
description="Number of model parameters (e.g., 1.2B)" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Model Architecture", |
|
|
column_type=ColumnType.STRING, |
|
|
group=ColumnGroup.MODEL_INFO, |
|
|
width="180px", |
|
|
default_visible=True, |
|
|
description="Underlying model architecture (e.g., XLMRobertaModel)" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Mean (Task)", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MTEB, |
|
|
width="120px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Average of all individual task scores" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Contracts", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.LEGAL, |
|
|
width="110px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on Turkish legal contract analysis" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Regulation", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.LEGAL, |
|
|
width="110px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on Turkish tax rulings retrieval" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Caselaw", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.LEGAL, |
|
|
width="110px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on Court of Cassation case retrieval" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Classification", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MTEB, |
|
|
width="130px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on Turkish classification tasks" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Clustering", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MTEB, |
|
|
width="120px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on Turkish clustering tasks" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Pair Classification", |
|
|
api_name="PairClassification", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MTEB, |
|
|
width="150px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on pair classification tasks (NLI)" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Retrieval", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MTEB, |
|
|
width="120px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on information retrieval tasks" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="STS", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MTEB, |
|
|
width="100px", |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Performance on Semantic Textual Similarity tasks" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Correlation", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.CORRELATION, |
|
|
width="120px", |
|
|
decimals=3, |
|
|
default_visible=False, |
|
|
colorize=True, |
|
|
description="Weighted average of correlation metrics" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Tokenizer Type", |
|
|
column_type=ColumnType.STRING, |
|
|
group=ColumnGroup.TOKENIZER, |
|
|
width="180px", |
|
|
default_visible=False, |
|
|
description="Tokenizer implementation type" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Unique Token Count", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.TOKENIZER, |
|
|
width="160px", |
|
|
decimals=0, |
|
|
default_visible=False, |
|
|
description="Number of unique tokens on Turkish MMLU" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Turkish Token Count", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.TOKENIZER, |
|
|
width="170px", |
|
|
decimals=0, |
|
|
default_visible=False, |
|
|
description="Unique tokens that are valid Turkish" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Turkish Token %", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.TOKENIZER, |
|
|
width="140px", |
|
|
default_visible=False, |
|
|
description="Percentage of valid Turkish tokens" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Pure Token %", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.TOKENIZER, |
|
|
width="130px", |
|
|
default_visible=False, |
|
|
description="Percentage of pure root word tokens" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Embed Dim", |
|
|
api_name="Embedding Dimensions", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MODEL_INFO, |
|
|
width="120px", |
|
|
decimals=0, |
|
|
default_visible=False, |
|
|
description="Embedding dimension size" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Vocab Size", |
|
|
column_type=ColumnType.NUMBER, |
|
|
group=ColumnGroup.MODEL_INFO, |
|
|
width="120px", |
|
|
decimals=0, |
|
|
default_visible=False, |
|
|
description="Vocabulary size" |
|
|
), |
|
|
|
|
|
ColumnDefinition( |
|
|
name="Model Type", |
|
|
column_type=ColumnType.STRING, |
|
|
group=ColumnGroup.MODEL_INFO, |
|
|
width="130px", |
|
|
default_visible=False, |
|
|
description="Model type: Embedding, MLM, CLM-Embedding, or Seq2Seq" |
|
|
), |
|
|
] |
|
|
|
|
|
|
|
|
class ColumnRegistry: |
|
|
""" |
|
|
Central registry for column definitions. |
|
|
|
|
|
Provides convenient access methods for column metadata. |
|
|
""" |
|
|
|
|
|
def __init__(self, definitions: List[ColumnDefinition] = None): |
|
|
self._definitions = definitions or COLUMN_DEFINITIONS |
|
|
self._by_name: Dict[str, ColumnDefinition] = { |
|
|
col.name: col for col in self._definitions |
|
|
} |
|
|
self._by_csv_key: Dict[str, ColumnDefinition] = { |
|
|
col.csv_key: col for col in self._definitions |
|
|
} |
|
|
|
|
|
@property |
|
|
def all_columns(self) -> List[str]: |
|
|
"""All column names in order.""" |
|
|
return [col.name for col in self._definitions] |
|
|
|
|
|
@property |
|
|
def default_columns(self) -> List[str]: |
|
|
"""Columns visible by default.""" |
|
|
return [col.name for col in self._definitions if col.default_visible] |
|
|
|
|
|
@property |
|
|
def optional_columns(self) -> List[str]: |
|
|
"""Columns that can be toggled on/off.""" |
|
|
return [col.name for col in self._definitions if not col.default_visible] |
|
|
|
|
|
@property |
|
|
def score_columns(self) -> List[str]: |
|
|
"""Columns that should be colorized.""" |
|
|
return [col.name for col in self._definitions if col.colorize] |
|
|
|
|
|
@property |
|
|
def numeric_columns(self) -> List[str]: |
|
|
"""Columns with numeric type.""" |
|
|
return [col.name for col in self._definitions if col.column_type == ColumnType.NUMBER] |
|
|
|
|
|
def get(self, name: str) -> Optional[ColumnDefinition]: |
|
|
"""Get column definition by name.""" |
|
|
return self._by_name.get(name) |
|
|
|
|
|
def get_by_csv_key(self, csv_key: str) -> Optional[ColumnDefinition]: |
|
|
"""Get column definition by CSV key.""" |
|
|
return self._by_csv_key.get(csv_key) |
|
|
|
|
|
def get_by_group(self, group: ColumnGroup) -> List[ColumnDefinition]: |
|
|
"""Get all columns in a group.""" |
|
|
return [col for col in self._definitions if col.group == group] |
|
|
|
|
|
def get_group_names(self, group: ColumnGroup) -> List[str]: |
|
|
"""Get column names for a group.""" |
|
|
return [col.name for col in self.get_by_group(group)] |
|
|
|
|
|
def get_datatypes(self, columns: List[str]) -> List[str]: |
|
|
"""Get Gradio datatypes for given columns.""" |
|
|
return [ |
|
|
self._by_name[col].column_type.value |
|
|
for col in columns |
|
|
if col in self._by_name |
|
|
] |
|
|
|
|
|
def get_widths(self, columns: List[str]) -> List[str]: |
|
|
"""Get column widths for given columns.""" |
|
|
return [ |
|
|
self._by_name[col].width |
|
|
for col in columns |
|
|
if col in self._by_name |
|
|
] |
|
|
|
|
|
def get_csv_mapping(self) -> Dict[str, str]: |
|
|
"""Get mapping from CSV keys to display names.""" |
|
|
return { |
|
|
col.csv_key: col.name |
|
|
for col in self._definitions |
|
|
if col.csv_key != col.name |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
column_registry = ColumnRegistry() |
|
|
|