Fix: Openness display, Average Score calc, Column order/headers/sorting, Horizontal scroll
Browse files- frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Table/Table.js +27 -39
- frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/defaults.js +6 -0
- frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelOpenness.js +14 -2
- frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useDataUtils.js +40 -4
- frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.js +44 -3
frontend/src/pages/LeaderboardPage/components/Leaderboard/components/Table/Table.js
CHANGED
|
@@ -297,7 +297,7 @@ const LeaderboardTable = ({
|
|
| 297 |
const paddingBottom =
|
| 298 |
virtualRows.length > 0
|
| 299 |
? unpinnedRows.length * currentRowHeight -
|
| 300 |
-
|
| 301 |
: 0;
|
| 302 |
|
| 303 |
// Handle column reset
|
|
@@ -402,8 +402,8 @@ const LeaderboardTable = ({
|
|
| 402 |
backgroundColor: isSticky
|
| 403 |
? theme.palette.background.paper
|
| 404 |
: (sortedIndex + 1) % 2 === 0
|
| 405 |
-
|
| 406 |
-
|
| 407 |
position: isSticky ? "sticky" : "relative",
|
| 408 |
top: isSticky
|
| 409 |
? `${headerHeight + stickyIndex * currentRowHeight}px`
|
|
@@ -411,24 +411,24 @@ const LeaderboardTable = ({
|
|
| 411 |
zIndex: isSticky ? 2 : 1,
|
| 412 |
boxShadow: isSticky
|
| 413 |
? `0 1px 1px ${alpha(
|
| 414 |
-
|
| 415 |
-
|
| 416 |
-
|
| 417 |
: "none",
|
| 418 |
"&::after": isSticky
|
| 419 |
? {
|
| 420 |
-
|
| 421 |
-
|
| 422 |
-
|
| 423 |
-
|
| 424 |
-
|
| 425 |
-
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
|
| 432 |
: {},
|
| 433 |
})}
|
| 434 |
>
|
|
@@ -596,35 +596,23 @@ const LeaderboardTable = ({
|
|
| 596 |
<Table
|
| 597 |
sx={{
|
| 598 |
margin: 0,
|
| 599 |
-
width: "
|
|
|
|
| 600 |
borderCollapse: "separate",
|
| 601 |
borderSpacing: 0,
|
| 602 |
-
tableLayout:
|
| 603 |
border: "none",
|
| 604 |
-
"& td, & th":
|
| 605 |
-
pinnedRows.length > 0
|
| 606 |
-
? {
|
| 607 |
-
width: `${100 / table.getAllColumns().length}%`,
|
| 608 |
-
}
|
| 609 |
-
: {},
|
| 610 |
}}
|
| 611 |
>
|
| 612 |
<colgroup>
|
| 613 |
-
{table.getAllColumns().map((column
|
| 614 |
<col
|
| 615 |
key={column.id}
|
| 616 |
-
style={
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
maxWidth: column.columnDef.size,
|
| 622 |
-
}
|
| 623 |
-
: {
|
| 624 |
-
minWidth: column.columnDef.size,
|
| 625 |
-
width: `${100 / (table.getAllColumns().length - 4)}%`,
|
| 626 |
-
}
|
| 627 |
-
}
|
| 628 |
/>
|
| 629 |
))}
|
| 630 |
</colgroup>
|
|
|
|
| 297 |
const paddingBottom =
|
| 298 |
virtualRows.length > 0
|
| 299 |
? unpinnedRows.length * currentRowHeight -
|
| 300 |
+
virtualRows[virtualRows.length - 1].end
|
| 301 |
: 0;
|
| 302 |
|
| 303 |
// Handle column reset
|
|
|
|
| 402 |
backgroundColor: isSticky
|
| 403 |
? theme.palette.background.paper
|
| 404 |
: (sortedIndex + 1) % 2 === 0
|
| 405 |
+
? "transparent"
|
| 406 |
+
: alpha(theme.palette.mode === "dark" ? "#fff" : "#000", 0.02),
|
| 407 |
position: isSticky ? "sticky" : "relative",
|
| 408 |
top: isSticky
|
| 409 |
? `${headerHeight + stickyIndex * currentRowHeight}px`
|
|
|
|
| 411 |
zIndex: isSticky ? 2 : 1,
|
| 412 |
boxShadow: isSticky
|
| 413 |
? `0 1px 1px ${alpha(
|
| 414 |
+
theme.palette.common.black,
|
| 415 |
+
theme.palette.mode === "dark" ? 0.1 : 0.05
|
| 416 |
+
)}`
|
| 417 |
: "none",
|
| 418 |
"&::after": isSticky
|
| 419 |
? {
|
| 420 |
+
content: '""',
|
| 421 |
+
position: "absolute",
|
| 422 |
+
left: 0,
|
| 423 |
+
right: 0,
|
| 424 |
+
height: "1px",
|
| 425 |
+
bottom: -1,
|
| 426 |
+
backgroundColor: alpha(
|
| 427 |
+
theme.palette.divider,
|
| 428 |
+
theme.palette.mode === "dark" ? 0.1 : 0.2
|
| 429 |
+
),
|
| 430 |
+
zIndex: 1,
|
| 431 |
+
}
|
| 432 |
: {},
|
| 433 |
})}
|
| 434 |
>
|
|
|
|
| 596 |
<Table
|
| 597 |
sx={{
|
| 598 |
margin: 0,
|
| 599 |
+
width: "max-content",
|
| 600 |
+
minWidth: "100%",
|
| 601 |
borderCollapse: "separate",
|
| 602 |
borderSpacing: 0,
|
| 603 |
+
tableLayout: "fixed",
|
| 604 |
border: "none",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
}}
|
| 606 |
>
|
| 607 |
<colgroup>
|
| 608 |
+
{table.getAllColumns().map((column) => (
|
| 609 |
<col
|
| 610 |
key={column.id}
|
| 611 |
+
style={{
|
| 612 |
+
width: column.columnDef.size,
|
| 613 |
+
minWidth: column.columnDef.size,
|
| 614 |
+
maxWidth: column.columnDef.size,
|
| 615 |
+
}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 616 |
/>
|
| 617 |
))}
|
| 618 |
</colgroup>
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/defaults.js
CHANGED
|
@@ -145,6 +145,12 @@ const COLUMNS = {
|
|
| 145 |
defaultVisible: true,
|
| 146 |
label: "Greek Financial LLM Leaderboard",
|
| 147 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
},
|
| 149 |
MODEL_INFO: {
|
| 150 |
"metadata.co2_cost": {
|
|
|
|
| 145 |
defaultVisible: true,
|
| 146 |
label: "Greek Financial LLM Leaderboard",
|
| 147 |
},
|
| 148 |
+
"evaluations.bloomberggpt": {
|
| 149 |
+
group: "evaluation",
|
| 150 |
+
size: COLUMN_SIZES.BENCHMARK,
|
| 151 |
+
defaultVisible: true,
|
| 152 |
+
label: "BloombergGPT",
|
| 153 |
+
},
|
| 154 |
},
|
| 155 |
MODEL_INFO: {
|
| 156 |
"metadata.co2_cost": {
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/constants/modelOpenness.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
export const MODEL_OPENNESS = {
|
| 2 |
-
"GPT-4o": "
|
| 3 |
-
"o3-Mini": "
|
| 4 |
"Deepseek-V3": "Class III-Open Model",
|
| 5 |
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "Class III-Open Model",
|
| 6 |
"meta-llama/Llama-3.1-70B-Instruct": "Class III-Open Model",
|
|
@@ -11,8 +11,20 @@ export const MODEL_OPENNESS = {
|
|
| 11 |
"TheFinAI/finma-7b-full": "Class III-Open Model",
|
| 12 |
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": "Class III-Open Model",
|
| 13 |
"cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": "Class III-Open Model",
|
|
|
|
|
|
|
| 14 |
"TheFinAI/FinMA-ES-Bilingual": "Class III-Open Model",
|
| 15 |
"TheFinAI/plutus-8B-instruct": "Class III-Open Model",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
"Qwen-VL-MAX": "Class III-Open Model",
|
| 17 |
"LLaVA-1.6 Vicuna-13B": "Class III-Open Model",
|
| 18 |
"Deepseek-VL-7B-Chat": "Class III-Open Model",
|
|
|
|
| 1 |
export const MODEL_OPENNESS = {
|
| 2 |
+
"GPT-4o": "Closed",
|
| 3 |
+
"o3-Mini": "Closed",
|
| 4 |
"Deepseek-V3": "Class III-Open Model",
|
| 5 |
"meta-llama/Llama-4-Scout-17B-16E-Instruct": "Class III-Open Model",
|
| 6 |
"meta-llama/Llama-3.1-70B-Instruct": "Class III-Open Model",
|
|
|
|
| 11 |
"TheFinAI/finma-7b-full": "Class III-Open Model",
|
| 12 |
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": "Class III-Open Model",
|
| 13 |
"cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": "Class III-Open Model",
|
| 14 |
+
"Trelis/Trelis-Function-Calling-V3-7B": "Class III-Open Model",
|
| 15 |
+
"Trelis/Trelis-Function-Calling-V4-7B": "Class III-Open Model",
|
| 16 |
"TheFinAI/FinMA-ES-Bilingual": "Class III-Open Model",
|
| 17 |
"TheFinAI/plutus-8B-instruct": "Class III-Open Model",
|
| 18 |
+
// BloombergGPT Dataset models - 按 MOF 标准分类
|
| 19 |
+
// Closed: 闭源商业模型
|
| 20 |
+
"GPT-5": "Closed",
|
| 21 |
+
"o3": "Closed",
|
| 22 |
+
"Gemini 2.5 Flash": "Closed",
|
| 23 |
+
"Grok4": "Closed",
|
| 24 |
+
"Claude 4 Sonnet": "Closed",
|
| 25 |
+
// Class III – Open Model: 开放模型架构、参数、技术报告、评估结果等
|
| 26 |
+
"Llama-3.1-8B-Instruct": "Class III-Open Model",
|
| 27 |
+
"DeepSeek Chat": "Class III-Open Model",
|
| 28 |
"Qwen-VL-MAX": "Class III-Open Model",
|
| 29 |
"LLaVA-1.6 Vicuna-13B": "Class III-Open Model",
|
| 30 |
"Deepseek-VL-7B-Chat": "Class III-Open Model",
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useDataUtils.js
CHANGED
|
@@ -88,6 +88,13 @@ const HARDCODED_SCORES = {
|
|
| 88 |
"TheFinAI/FinMA-ES-Bilingual": 0.35, "TheFinAI/plutus-8B-instruct": 7.24, "Qwen-VL-MAX": 0.00,
|
| 89 |
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 90 |
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
}
|
| 92 |
};
|
| 93 |
|
|
@@ -127,6 +134,29 @@ export const useColorGenerator = (minAverage, maxAverage) => {
|
|
| 127 |
}, [minAverage, maxAverage]);
|
| 128 |
};
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
// Process data with boolean standardization
|
| 131 |
export const useProcessedData = (data, averageMode, visibleColumns) => {
|
| 132 |
return useMemo(() => {
|
|
@@ -154,12 +184,17 @@ export const useProcessedData = (data, averageMode, visibleColumns) => {
|
|
| 154 |
spanish_average: getHardcodedScore(modelName, 'spanish'),
|
| 155 |
greek_average: getHardcodedScore(modelName, 'greek'),
|
| 156 |
bilingual_average: getHardcodedScore(modelName, 'bilingual'),
|
| 157 |
-
multilingual_average: getHardcodedScore(modelName, 'multilingual')
|
|
|
|
| 158 |
};
|
| 159 |
|
| 160 |
-
// 计算总平均分(
|
| 161 |
-
const
|
| 162 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
// 创建模型数据
|
| 165 |
modelList.push({
|
|
@@ -168,6 +203,7 @@ export const useProcessedData = (data, averageMode, visibleColumns) => {
|
|
| 168 |
name: modelName,
|
| 169 |
average_score: averageScore,
|
| 170 |
type: "chat", // 统一设为chat类型
|
|
|
|
| 171 |
},
|
| 172 |
evaluations: hardcodedEvaluations,
|
| 173 |
features: {
|
|
|
|
| 88 |
"TheFinAI/FinMA-ES-Bilingual": 0.35, "TheFinAI/plutus-8B-instruct": 7.24, "Qwen-VL-MAX": 0.00,
|
| 89 |
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 90 |
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 91 |
+
},
|
| 92 |
+
// BloombergGPT Dataset Leaderboard 平均分
|
| 93 |
+
bloomberggpt: {
|
| 94 |
+
"GPT-4o": 83.25, "GPT-5": 81.92, "o3-mini": 81.24, "o3": 81.28,
|
| 95 |
+
"Gemini 2.5 Flash": 80.88, "Grok4": 79.33, "Claude 4 Sonnet": 79.50,
|
| 96 |
+
"Llama-3.1-8B-Instruct": 77.75, "meta-llama/Llama-3.1-70B-Instruct": 77.75,
|
| 97 |
+
"DeepSeek Chat": 77.13, "Deepseek-V3": 77.13
|
| 98 |
}
|
| 99 |
};
|
| 100 |
|
|
|
|
| 134 |
}, [minAverage, maxAverage]);
|
| 135 |
};
|
| 136 |
|
| 137 |
+
// Openness data from BloombergGPT Dataset Leaderboard
|
| 138 |
+
const MODEL_OPENNESS = {
|
| 139 |
+
// BloombergGPT Dataset models - 按 MOF 标准分类
|
| 140 |
+
// Closed: 闭源商业模型
|
| 141 |
+
"GPT-4o": "Closed",
|
| 142 |
+
"GPT-5": "Closed",
|
| 143 |
+
"o3-mini": "Closed",
|
| 144 |
+
"o3": "Closed",
|
| 145 |
+
"Gemini 2.5 Flash": "Closed",
|
| 146 |
+
"Grok4": "Closed",
|
| 147 |
+
"Claude 4 Sonnet": "Closed",
|
| 148 |
+
// Class III – Open Model: 开放模型架构、参数、技术报告、评估结果等
|
| 149 |
+
"Llama-3.1-8B-Instruct": "Class III-Open Model",
|
| 150 |
+
"DeepSeek Chat": "Class III-Open Model",
|
| 151 |
+
"Deepseek-V3": "Class III-Open Model", // Map Deepseek-V3 to DeepSeek Chat's classification
|
| 152 |
+
"Llama-3.1-70B-Instruct": "Class III-Open Model",
|
| 153 |
+
"meta-llama/Llama-3.1-70B-Instruct": "Class III-Open Model",
|
| 154 |
+
};
|
| 155 |
+
|
| 156 |
+
const getModelOpenness = (modelName) => {
|
| 157 |
+
return MODEL_OPENNESS[modelName] || "Unclassified";
|
| 158 |
+
};
|
| 159 |
+
|
| 160 |
// Process data with boolean standardization
|
| 161 |
export const useProcessedData = (data, averageMode, visibleColumns) => {
|
| 162 |
return useMemo(() => {
|
|
|
|
| 184 |
spanish_average: getHardcodedScore(modelName, 'spanish'),
|
| 185 |
greek_average: getHardcodedScore(modelName, 'greek'),
|
| 186 |
bilingual_average: getHardcodedScore(modelName, 'bilingual'),
|
| 187 |
+
multilingual_average: getHardcodedScore(modelName, 'multilingual'),
|
| 188 |
+
bloomberggpt: getHardcodedScore(modelName, 'bloomberggpt')
|
| 189 |
};
|
| 190 |
|
| 191 |
+
// 计算总平均分(缺失值视为0,分母为类别总数)
|
| 192 |
+
const totalScore = Object.values(hardcodedEvaluations).reduce((acc, score) => acc + (score || 0), 0);
|
| 193 |
+
const categoryCount = Object.keys(hardcodedEvaluations).length; // 应该是 10
|
| 194 |
+
const averageScore = totalScore / categoryCount;
|
| 195 |
+
|
| 196 |
+
// 获取Openness
|
| 197 |
+
const openness = getModelOpenness(modelName);
|
| 198 |
|
| 199 |
// 创建模型数据
|
| 200 |
modelList.push({
|
|
|
|
| 203 |
name: modelName,
|
| 204 |
average_score: averageScore,
|
| 205 |
type: "chat", // 统一设为chat类型
|
| 206 |
+
openness: openness, // 添加 openness
|
| 207 |
},
|
| 208 |
evaluations: hardcodedEvaluations,
|
| 209 |
features: {
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.js
CHANGED
|
@@ -20,6 +20,18 @@ import OpenInNewIcon from "@mui/icons-material/OpenInNew";
|
|
| 20 |
import { alpha } from "@mui/material/styles";
|
| 21 |
import InfoIconWithTooltip from "../../../../../components/shared/InfoIconWithTooltip";
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
const DatabaseIcon = () => (
|
| 24 |
<svg
|
| 25 |
className="mr-1.5 text-gray-400 group-hover:text-red-500"
|
|
@@ -452,7 +464,7 @@ const createGreekLeaderboardHeader = (header) => (
|
|
| 452 |
}}
|
| 453 |
>
|
| 454 |
<HeaderLabel
|
| 455 |
-
label="Greek
|
| 456 |
tooltip="Average performance on Greek financial tasks"
|
| 457 |
className="header-label"
|
| 458 |
isSorted={header?.column?.getIsSorted()}
|
|
@@ -511,7 +523,7 @@ const createLeaderboardHeader = (label, tooltip, linkUrl) => (header) => (
|
|
| 511 |
}}
|
| 512 |
>
|
| 513 |
<HeaderLabel
|
| 514 |
-
label={
|
| 515 |
tooltip={tooltip}
|
| 516 |
className="header-label"
|
| 517 |
isSorted={header?.column?.getIsSorted()}
|
|
@@ -837,6 +849,7 @@ export const createColumns = (
|
|
| 837 |
{
|
| 838 |
accessorKey: "model.average_score",
|
| 839 |
header: createHeaderCell("Average", COLUMN_TOOLTIPS.AVERAGE),
|
|
|
|
| 840 |
cell: ({ row, getValue }) =>
|
| 841 |
createScoreCell(getValue, row, "model.average_score"),
|
| 842 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"],
|
|
@@ -1014,8 +1027,27 @@ export const createColumns = (
|
|
| 1014 |
};
|
| 1015 |
|
| 1016 |
const evaluationColumns = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1017 |
...(showGreek ? [{
|
| 1018 |
accessorKey: "evaluations.greek_average",
|
|
|
|
| 1019 |
header: createGreekLeaderboardHeader,
|
| 1020 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.greek_average"),
|
| 1021 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1033,6 +1065,7 @@ export const createColumns = (
|
|
| 1033 |
}] : []),
|
| 1034 |
{
|
| 1035 |
accessorKey: "evaluations.vision_average",
|
|
|
|
| 1036 |
header: createLeaderboardHeader("Vision", "Average performance on vision tasks", null),
|
| 1037 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.vision_average"),
|
| 1038 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1050,6 +1083,7 @@ export const createColumns = (
|
|
| 1050 |
},
|
| 1051 |
{
|
| 1052 |
accessorKey: "evaluations.audio_average",
|
|
|
|
| 1053 |
header: createLeaderboardHeader("Audio", "Average performance on audio tasks", null),
|
| 1054 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.audio_average"),
|
| 1055 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1067,6 +1101,7 @@ export const createColumns = (
|
|
| 1067 |
},
|
| 1068 |
{
|
| 1069 |
accessorKey: "evaluations.english_average",
|
|
|
|
| 1070 |
header: createLeaderboardHeader("English", "Average performance on English language tasks", null),
|
| 1071 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.english_average"),
|
| 1072 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1084,6 +1119,7 @@ export const createColumns = (
|
|
| 1084 |
},
|
| 1085 |
{
|
| 1086 |
accessorKey: "evaluations.chinese_average",
|
|
|
|
| 1087 |
header: createLeaderboardHeader("Chinese", "Average performance on Chinese language tasks", null),
|
| 1088 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.chinese_average"),
|
| 1089 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1101,6 +1137,7 @@ export const createColumns = (
|
|
| 1101 |
},
|
| 1102 |
{
|
| 1103 |
accessorKey: "evaluations.japanese_average",
|
|
|
|
| 1104 |
header: createLeaderboardHeader("Japanese", "Average performance on Japanese language tasks", null),
|
| 1105 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.japanese_average"),
|
| 1106 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1118,6 +1155,7 @@ export const createColumns = (
|
|
| 1118 |
},
|
| 1119 |
{
|
| 1120 |
accessorKey: "evaluations.spanish_average",
|
|
|
|
| 1121 |
header: createLeaderboardHeader("Spanish", "Average performance on Spanish language tasks", null),
|
| 1122 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.spanish_average"),
|
| 1123 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1135,6 +1173,7 @@ export const createColumns = (
|
|
| 1135 |
},
|
| 1136 |
{
|
| 1137 |
accessorKey: "evaluations.bilingual_average",
|
|
|
|
| 1138 |
header: createLeaderboardHeader("Bilingual", "Average performance on bilingual tasks", null),
|
| 1139 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.bilingual_average"),
|
| 1140 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1152,6 +1191,7 @@ export const createColumns = (
|
|
| 1152 |
},
|
| 1153 |
{
|
| 1154 |
accessorKey: "evaluations.multilingual_average",
|
|
|
|
| 1155 |
header: createLeaderboardHeader("Multilingual", "Average performance on multilingual tasks", null),
|
| 1156 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.multilingual_average"),
|
| 1157 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
@@ -1166,7 +1206,8 @@ export const createColumns = (
|
|
| 1166 |
backgroundColor: (theme) => alpha(theme.palette.secondary.main, 0.05),
|
| 1167 |
}),
|
| 1168 |
},
|
| 1169 |
-
}
|
|
|
|
| 1170 |
];
|
| 1171 |
|
| 1172 |
const optionalColumns = [
|
|
|
|
| 20 |
import { alpha } from "@mui/material/styles";
|
| 21 |
import InfoIconWithTooltip from "../../../../../components/shared/InfoIconWithTooltip";
|
| 22 |
|
| 23 |
+
// Custom sorting function for scores handling nulls
|
| 24 |
+
const scoreSort = (rowA, rowB, columnId) => {
|
| 25 |
+
const a = rowA.getValue(columnId);
|
| 26 |
+
const b = rowB.getValue(columnId);
|
| 27 |
+
|
| 28 |
+
// Treat null/undefined as -Infinity so they appear last in descending sort
|
| 29 |
+
const valA = (a === null || a === undefined) ? -Infinity : Number(a);
|
| 30 |
+
const valB = (b === null || b === undefined) ? -Infinity : Number(b);
|
| 31 |
+
|
| 32 |
+
return valA < valB ? -1 : valA > valB ? 1 : 0;
|
| 33 |
+
};
|
| 34 |
+
|
| 35 |
const DatabaseIcon = () => (
|
| 36 |
<svg
|
| 37 |
className="mr-1.5 text-gray-400 group-hover:text-red-500"
|
|
|
|
| 464 |
}}
|
| 465 |
>
|
| 466 |
<HeaderLabel
|
| 467 |
+
label="Greek"
|
| 468 |
tooltip="Average performance on Greek financial tasks"
|
| 469 |
className="header-label"
|
| 470 |
isSorted={header?.column?.getIsSorted()}
|
|
|
|
| 523 |
}}
|
| 524 |
>
|
| 525 |
<HeaderLabel
|
| 526 |
+
label={label}
|
| 527 |
tooltip={tooltip}
|
| 528 |
className="header-label"
|
| 529 |
isSorted={header?.column?.getIsSorted()}
|
|
|
|
| 849 |
{
|
| 850 |
accessorKey: "model.average_score",
|
| 851 |
header: createHeaderCell("Average", COLUMN_TOOLTIPS.AVERAGE),
|
| 852 |
+
sortingFn: scoreSort,
|
| 853 |
cell: ({ row, getValue }) =>
|
| 854 |
createScoreCell(getValue, row, "model.average_score"),
|
| 855 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"],
|
|
|
|
| 1027 |
};
|
| 1028 |
|
| 1029 |
const evaluationColumns = [
|
| 1030 |
+
{
|
| 1031 |
+
accessorKey: "evaluations.bloomberggpt",
|
| 1032 |
+
sortingFn: scoreSort,
|
| 1033 |
+
header: createLeaderboardHeader("BloombergGPT", "BloombergGPT Dataset Leaderboard - Financial sentiment analysis", "https://huggingface.co/spaces/mirageco/BloombergGPT-Dataset-Leaderboard"),
|
| 1034 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.bloomberggpt"),
|
| 1035 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1036 |
+
meta: {
|
| 1037 |
+
headerStyle: {
|
| 1038 |
+
backgroundColor: (theme) => alpha(theme.palette.success.main, 0.08),
|
| 1039 |
+
},
|
| 1040 |
+
cellStyle: (value) => ({
|
| 1041 |
+
position: "relative",
|
| 1042 |
+
overflow: "hidden",
|
| 1043 |
+
padding: "8px 16px",
|
| 1044 |
+
backgroundColor: (theme) => alpha(theme.palette.success.main, 0.08),
|
| 1045 |
+
}),
|
| 1046 |
+
},
|
| 1047 |
+
},
|
| 1048 |
...(showGreek ? [{
|
| 1049 |
accessorKey: "evaluations.greek_average",
|
| 1050 |
+
sortingFn: scoreSort,
|
| 1051 |
header: createGreekLeaderboardHeader,
|
| 1052 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.greek_average"),
|
| 1053 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1065 |
}] : []),
|
| 1066 |
{
|
| 1067 |
accessorKey: "evaluations.vision_average",
|
| 1068 |
+
sortingFn: scoreSort,
|
| 1069 |
header: createLeaderboardHeader("Vision", "Average performance on vision tasks", null),
|
| 1070 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.vision_average"),
|
| 1071 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1083 |
},
|
| 1084 |
{
|
| 1085 |
accessorKey: "evaluations.audio_average",
|
| 1086 |
+
sortingFn: scoreSort,
|
| 1087 |
header: createLeaderboardHeader("Audio", "Average performance on audio tasks", null),
|
| 1088 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.audio_average"),
|
| 1089 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1101 |
},
|
| 1102 |
{
|
| 1103 |
accessorKey: "evaluations.english_average",
|
| 1104 |
+
sortingFn: scoreSort,
|
| 1105 |
header: createLeaderboardHeader("English", "Average performance on English language tasks", null),
|
| 1106 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.english_average"),
|
| 1107 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1119 |
},
|
| 1120 |
{
|
| 1121 |
accessorKey: "evaluations.chinese_average",
|
| 1122 |
+
sortingFn: scoreSort,
|
| 1123 |
header: createLeaderboardHeader("Chinese", "Average performance on Chinese language tasks", null),
|
| 1124 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.chinese_average"),
|
| 1125 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1137 |
},
|
| 1138 |
{
|
| 1139 |
accessorKey: "evaluations.japanese_average",
|
| 1140 |
+
sortingFn: scoreSort,
|
| 1141 |
header: createLeaderboardHeader("Japanese", "Average performance on Japanese language tasks", null),
|
| 1142 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.japanese_average"),
|
| 1143 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1155 |
},
|
| 1156 |
{
|
| 1157 |
accessorKey: "evaluations.spanish_average",
|
| 1158 |
+
sortingFn: scoreSort,
|
| 1159 |
header: createLeaderboardHeader("Spanish", "Average performance on Spanish language tasks", null),
|
| 1160 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.spanish_average"),
|
| 1161 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1173 |
},
|
| 1174 |
{
|
| 1175 |
accessorKey: "evaluations.bilingual_average",
|
| 1176 |
+
sortingFn: scoreSort,
|
| 1177 |
header: createLeaderboardHeader("Bilingual", "Average performance on bilingual tasks", null),
|
| 1178 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.bilingual_average"),
|
| 1179 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1191 |
},
|
| 1192 |
{
|
| 1193 |
accessorKey: "evaluations.multilingual_average",
|
| 1194 |
+
sortingFn: scoreSort,
|
| 1195 |
header: createLeaderboardHeader("Multilingual", "Average performance on multilingual tasks", null),
|
| 1196 |
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.multilingual_average"),
|
| 1197 |
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
|
|
|
| 1206 |
backgroundColor: (theme) => alpha(theme.palette.secondary.main, 0.05),
|
| 1207 |
}),
|
| 1208 |
},
|
| 1209 |
+
},
|
| 1210 |
+
|
| 1211 |
];
|
| 1212 |
|
| 1213 |
const optionalColumns = [
|