File size: 12,400 Bytes
bc37111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
"""
Centralized Column Definitions

Single source of truth for all leaderboard columns.
Add new columns here and they propagate everywhere automatically.
"""

from dataclasses import dataclass
from enum import Enum, auto
from typing import List, Dict, Optional


class ColumnType(Enum):
    """Column data types for Gradio."""
    NUMBER = "number"
    STRING = "str"
    HTML = "html"


class ColumnGroup(Enum):
    """Column groupings for organization and filtering."""
    CORE = auto()           # Always visible: Rank, Model
    LEGAL = auto()          # Legal benchmark scores
    MTEB = auto()           # MTEB task type scores
    TOKENIZER = auto()      # Tokenizer quality metrics
    MODEL_INFO = auto()     # Model metadata
    CORRELATION = auto()    # Correlation metrics


@dataclass
class ColumnDefinition:
    """
    Complete definition for a leaderboard column.
    
    This is the single source of truth - all column metadata lives here.
    """
    name: str
    api_name: Optional[str] = None
    column_type: ColumnType = ColumnType.STRING
    group: ColumnGroup = ColumnGroup.CORE
    width: str = "120px"
    decimals: int = 2
    default_visible: bool = True
    colorize: bool = False
    description: str = ""
    
    @property
    def csv_key(self) -> str:
        """Get the key used in CSV files."""
        return self.api_name or self.name


COLUMN_DEFINITIONS: List[ColumnDefinition] = [
    # 1. Rank (always first)
    ColumnDefinition(
        name="Rank",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.CORE,
        width="50px",
        decimals=0,
        default_visible=True,
        description="Rank by MTEB Score (Mean TaskType)"
    ),
    # 2. Model (always second)
    ColumnDefinition(
        name="Model",
        column_type=ColumnType.HTML,
        group=ColumnGroup.CORE,
        width="280px",
        default_visible=True,
        colorize=False,
        description="Model name with HuggingFace link"
    ),
    # 3. MTEB Score - default
    ColumnDefinition(
        name="MTEB Score",
        api_name="Mean (TaskType)",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MTEB,
        width="140px",
        default_visible=True,
        colorize=True,
        description="MTEB Score: Average of task type category scores"
    ),
    # 4. Legal Score - default
    ColumnDefinition(
        name="Legal Score",
        api_name="Score(Legal)",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.LEGAL,
        width="120px",
        default_visible=True,
        colorize=True,
        description="Mean of legal benchmark scores (Contracts, Regulation, Caselaw)"
    ),
    # 5. Pure Token Count - default
    ColumnDefinition(
        name="Pure Token Count",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.TOKENIZER,
        width="150px",
        decimals=0,
        default_visible=True,
        description="Tokens that are morphologically pure"
    ),
    # 6. Max Sequence Length - default
    ColumnDefinition(
        name="Max Sequence Length",
        api_name="Max Tokens",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MODEL_INFO,
        width="160px",
        decimals=0,
        default_visible=True,
        description="Maximum sequence length"
    ),
    # 7. Parameters - default
    ColumnDefinition(
        name="Parameters",
        api_name="Number of Parameters",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MODEL_INFO,
        width="120px",
        decimals=0,
        default_visible=True,
        description="Number of model parameters (e.g., 1.2B)"
    ),
    # 8. Model Architecture - default
    ColumnDefinition(
        name="Model Architecture",
        column_type=ColumnType.STRING,
        group=ColumnGroup.MODEL_INFO,
        width="180px",
        default_visible=True,
        description="Underlying model architecture (e.g., XLMRobertaModel)"
    ),
    # 9. Mean (Task) - optional
    ColumnDefinition(
        name="Mean (Task)",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MTEB,
        width="120px",
        default_visible=False,
        colorize=True,
        description="Average of all individual task scores"
    ),
    # 10. Contracts - optional
    ColumnDefinition(
        name="Contracts",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.LEGAL,
        width="110px",
        default_visible=False,
        colorize=True,
        description="Performance on Turkish legal contract analysis"
    ),
    # 11. Regulation - optional
    ColumnDefinition(
        name="Regulation",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.LEGAL,
        width="110px",
        default_visible=False,
        colorize=True,
        description="Performance on Turkish tax rulings retrieval"
    ),
    # 12. Caselaw - optional
    ColumnDefinition(
        name="Caselaw",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.LEGAL,
        width="110px",
        default_visible=False,
        colorize=True,
        description="Performance on Court of Cassation case retrieval"
    ),
    # 13. Classification - optional
    ColumnDefinition(
        name="Classification",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MTEB,
        width="130px",
        default_visible=False,
        colorize=True,
        description="Performance on Turkish classification tasks"
    ),
    # 14. Clustering - optional
    ColumnDefinition(
        name="Clustering",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MTEB,
        width="120px",
        default_visible=False,
        colorize=True,
        description="Performance on Turkish clustering tasks"
    ),
    # 15. Pair Classification - optional
    ColumnDefinition(
        name="Pair Classification",
        api_name="PairClassification",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MTEB,
        width="150px",
        default_visible=False,
        colorize=True,
        description="Performance on pair classification tasks (NLI)"
    ),
    # 16. Retrieval - optional
    ColumnDefinition(
        name="Retrieval",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MTEB,
        width="120px",
        default_visible=False,
        colorize=True,
        description="Performance on information retrieval tasks"
    ),
    # 17. STS - optional
    ColumnDefinition(
        name="STS",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MTEB,
        width="100px",
        default_visible=False,
        colorize=True,
        description="Performance on Semantic Textual Similarity tasks"
    ),
    # 18. Correlation - optional
    ColumnDefinition(
        name="Correlation",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.CORRELATION,
        width="120px",
        decimals=3,
        default_visible=False,
        colorize=True,
        description="Weighted average of correlation metrics"
    ),
    # 19. Tokenizer Type - optional
    ColumnDefinition(
        name="Tokenizer Type",
        column_type=ColumnType.STRING,
        group=ColumnGroup.TOKENIZER,
        width="180px",
        default_visible=False,
        description="Tokenizer implementation type"
    ),
    # 20. Unique Token Count - optional
    ColumnDefinition(
        name="Unique Token Count",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.TOKENIZER,
        width="160px",
        decimals=0,
        default_visible=False,
        description="Number of unique tokens on Turkish MMLU"
    ),
    # 21. Turkish Token Count - optional
    ColumnDefinition(
        name="Turkish Token Count",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.TOKENIZER,
        width="170px",
        decimals=0,
        default_visible=False,
        description="Unique tokens that are valid Turkish"
    ),
    # 22. Turkish Token % - optional
    ColumnDefinition(
        name="Turkish Token %",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.TOKENIZER,
        width="140px",
        default_visible=False,
        description="Percentage of valid Turkish tokens"
    ),
    # 23. Pure Token % - optional
    ColumnDefinition(
        name="Pure Token %",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.TOKENIZER,
        width="130px",
        default_visible=False,
        description="Percentage of pure root word tokens"
    ),
    # 24. Embed Dim - optional
    ColumnDefinition(
        name="Embed Dim",
        api_name="Embedding Dimensions",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MODEL_INFO,
        width="120px",
        decimals=0,
        default_visible=False,
        description="Embedding dimension size"
    ),
    # 25. Vocab Size - optional
    ColumnDefinition(
        name="Vocab Size",
        column_type=ColumnType.NUMBER,
        group=ColumnGroup.MODEL_INFO,
        width="120px",
        decimals=0,
        default_visible=False,
        description="Vocabulary size"
    ),
    # 26. Model Type - optional
    ColumnDefinition(
        name="Model Type",
        column_type=ColumnType.STRING,
        group=ColumnGroup.MODEL_INFO,
        width="130px",
        default_visible=False,
        description="Model type: Embedding, MLM, CLM-Embedding, or Seq2Seq"
    ),
]


class ColumnRegistry:
    """
    Central registry for column definitions.
    
    Provides convenient access methods for column metadata.
    """
    
    def __init__(self, definitions: List[ColumnDefinition] = None):
        self._definitions = definitions or COLUMN_DEFINITIONS
        self._by_name: Dict[str, ColumnDefinition] = {
            col.name: col for col in self._definitions
        }
        self._by_csv_key: Dict[str, ColumnDefinition] = {
            col.csv_key: col for col in self._definitions
        }
    
    @property
    def all_columns(self) -> List[str]:
        """All column names in order."""
        return [col.name for col in self._definitions]
    
    @property
    def default_columns(self) -> List[str]:
        """Columns visible by default."""
        return [col.name for col in self._definitions if col.default_visible]
    
    @property
    def optional_columns(self) -> List[str]:
        """Columns that can be toggled on/off."""
        return [col.name for col in self._definitions if not col.default_visible]
    
    @property
    def score_columns(self) -> List[str]:
        """Columns that should be colorized."""
        return [col.name for col in self._definitions if col.colorize]
    
    @property
    def numeric_columns(self) -> List[str]:
        """Columns with numeric type."""
        return [col.name for col in self._definitions if col.column_type == ColumnType.NUMBER]
    
    def get(self, name: str) -> Optional[ColumnDefinition]:
        """Get column definition by name."""
        return self._by_name.get(name)
    
    def get_by_csv_key(self, csv_key: str) -> Optional[ColumnDefinition]:
        """Get column definition by CSV key."""
        return self._by_csv_key.get(csv_key)
    
    def get_by_group(self, group: ColumnGroup) -> List[ColumnDefinition]:
        """Get all columns in a group."""
        return [col for col in self._definitions if col.group == group]
    
    def get_group_names(self, group: ColumnGroup) -> List[str]:
        """Get column names for a group."""
        return [col.name for col in self.get_by_group(group)]
    
    def get_datatypes(self, columns: List[str]) -> List[str]:
        """Get Gradio datatypes for given columns."""
        return [
            self._by_name[col].column_type.value 
            for col in columns 
            if col in self._by_name
        ]
    
    def get_widths(self, columns: List[str]) -> List[str]:
        """Get column widths for given columns."""
        return [
            self._by_name[col].width 
            for col in columns 
            if col in self._by_name
        ]
    
    def get_csv_mapping(self) -> Dict[str, str]:
        """Get mapping from CSV keys to display names."""
        return {
            col.csv_key: col.name 
            for col in self._definitions 
            if col.csv_key != col.name
        }


# Global registry instance
column_registry = ColumnRegistry()