hardcoded model list for multifinben
Browse files- frontend/src/pages/LeaderboardPage/LeaderboardPage.js +31 -36
- frontend/src/pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext.js +16 -1
- frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useDataUtils.js +163 -286
- frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useLeaderboardData.js +4 -53
- frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.js +197 -0
frontend/src/pages/LeaderboardPage/LeaderboardPage.js
CHANGED
|
@@ -1,49 +1,44 @@
|
|
| 1 |
-
import
|
|
|
|
| 2 |
import Leaderboard from "./components/Leaderboard/Leaderboard";
|
| 3 |
-
import { Box } from "@mui/material";
|
| 4 |
-
import PageHeader from "../../components/shared/PageHeader";
|
| 5 |
-
import Logo from "../../components/Logo/Logo";
|
| 6 |
-
import { useLeaderboardData } from "../../pages/LeaderboardPage/components/Leaderboard/hooks/useLeaderboardData";
|
| 7 |
-
import { useLeaderboard } from "../../pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext";
|
| 8 |
-
|
| 9 |
-
function LeaderboardPage() {
|
| 10 |
-
const { data, isLoading, error } = useLeaderboardData();
|
| 11 |
-
const { actions } = useLeaderboard();
|
| 12 |
-
|
| 13 |
-
useEffect(() => {
|
| 14 |
-
if (data) {
|
| 15 |
-
actions.setModels(data);
|
| 16 |
-
}
|
| 17 |
-
actions.setLoading(isLoading);
|
| 18 |
-
actions.setError(error);
|
| 19 |
-
}, [data, isLoading, error, actions]);
|
| 20 |
|
|
|
|
| 21 |
return (
|
| 22 |
-
<
|
|
|
|
| 23 |
sx={{
|
| 24 |
-
|
| 25 |
display: "flex",
|
| 26 |
flexDirection: "column",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
}}
|
| 28 |
>
|
| 29 |
-
<Box
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
</Box>
|
| 34 |
-
<PageHeader
|
| 35 |
-
title="Open Financial LLM Leaderboard"
|
| 36 |
-
subtitle={
|
| 37 |
-
<>
|
| 38 |
-
Benchmark for large language models in {" "}
|
| 39 |
-
<span style={{ fontWeight: 600 }}>financial</span> domain {" "}
|
| 40 |
-
across multiple languages
|
| 41 |
-
</>
|
| 42 |
-
}
|
| 43 |
-
/>
|
| 44 |
<Leaderboard />
|
| 45 |
-
</
|
| 46 |
);
|
| 47 |
-
}
|
| 48 |
|
| 49 |
export default LeaderboardPage;
|
|
|
|
| 1 |
+
import React from "react";
|
| 2 |
+
import { Box, Typography, Container } from "@mui/material";
|
| 3 |
import Leaderboard from "./components/Leaderboard/Leaderboard";
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
const LeaderboardPage = () => {
|
| 6 |
return (
|
| 7 |
+
<Container
|
| 8 |
+
maxWidth={false}
|
| 9 |
sx={{
|
| 10 |
+
p: { xs: 1, sm: 2, md: 3 },
|
| 11 |
display: "flex",
|
| 12 |
flexDirection: "column",
|
| 13 |
+
alignItems: "center",
|
| 14 |
+
height: "100%",
|
| 15 |
+
maxWidth: "100vw",
|
| 16 |
+
overflow: "hidden"
|
| 17 |
}}
|
| 18 |
>
|
| 19 |
+
<Box sx={{ mb: 3, width: "100%", textAlign: "center" }}>
|
| 20 |
+
<Typography
|
| 21 |
+
variant="h4"
|
| 22 |
+
component="h1"
|
| 23 |
+
sx={{
|
| 24 |
+
fontWeight: 700,
|
| 25 |
+
mb: 1,
|
| 26 |
+
fontSize: { xs: "1.5rem", sm: "1.75rem", md: "2rem" },
|
| 27 |
+
}}
|
| 28 |
+
>
|
| 29 |
+
Open Financial LLM Leaderboard - Multi-modal & Multi-lingual
|
| 30 |
+
</Typography>
|
| 31 |
+
<Typography
|
| 32 |
+
variant="body1"
|
| 33 |
+
color="text.secondary"
|
| 34 |
+
sx={{ maxWidth: "800px", mx: "auto" }}
|
| 35 |
+
>
|
| 36 |
+
Comprehensive evaluation of language models on financial tasks across multiple languages and modalities
|
| 37 |
+
</Typography>
|
| 38 |
</Box>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
<Leaderboard />
|
| 40 |
+
</Container>
|
| 41 |
);
|
| 42 |
+
};
|
| 43 |
|
| 44 |
export default LeaderboardPage;
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/context/LeaderboardContext.js
CHANGED
|
@@ -29,7 +29,22 @@ const DEFAULT_DISPLAY = {
|
|
| 29 |
scoreDisplay: TABLE_DEFAULTS.SCORE_DISPLAY,
|
| 30 |
averageMode: TABLE_DEFAULTS.AVERAGE_MODE,
|
| 31 |
rankingMode: TABLE_DEFAULTS.RANKING_MODE,
|
| 32 |
-
visibleColumns:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
};
|
| 34 |
|
| 35 |
// Create initial counter structure
|
|
|
|
| 29 |
scoreDisplay: TABLE_DEFAULTS.SCORE_DISPLAY,
|
| 30 |
averageMode: TABLE_DEFAULTS.AVERAGE_MODE,
|
| 31 |
rankingMode: TABLE_DEFAULTS.RANKING_MODE,
|
| 32 |
+
visibleColumns: [
|
| 33 |
+
'isPinned',
|
| 34 |
+
'rank',
|
| 35 |
+
'model_type',
|
| 36 |
+
'id',
|
| 37 |
+
'model.average_score',
|
| 38 |
+
'evaluations.vision_average',
|
| 39 |
+
'evaluations.audio_average',
|
| 40 |
+
'evaluations.english_average',
|
| 41 |
+
'evaluations.chinese_average',
|
| 42 |
+
'evaluations.japanese_average',
|
| 43 |
+
'evaluations.spanish_average',
|
| 44 |
+
'evaluations.greek_average',
|
| 45 |
+
'evaluations.bilingual_average',
|
| 46 |
+
'evaluations.multilingual_average'
|
| 47 |
+
],
|
| 48 |
};
|
| 49 |
|
| 50 |
// Create initial counter structure
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useDataUtils.js
CHANGED
|
@@ -6,6 +6,91 @@ import {
|
|
| 6 |
} from "../utils/searchUtils";
|
| 7 |
import { ALLOWED_MODELS, isModelAllowed } from "../constants/allowedModels";
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
// Calculate min/max averages
|
| 10 |
export const useAverageRange = (data) => {
|
| 11 |
return useMemo(() => {
|
|
@@ -39,142 +124,96 @@ export const useColorGenerator = (minAverage, maxAverage) => {
|
|
| 39 |
// Process data with boolean standardization
|
| 40 |
export const useProcessedData = (data, averageMode, visibleColumns) => {
|
| 41 |
return useMemo(() => {
|
| 42 |
-
//
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
};
|
| 59 |
|
| 60 |
-
//
|
| 61 |
-
const
|
| 62 |
-
|
| 63 |
-
Object.entries(item.evaluations).forEach(([key, value]) => {
|
| 64 |
-
if (!greekDatasets.includes(key)) {
|
| 65 |
-
includedEvaluations[key] = value;
|
| 66 |
-
}
|
| 67 |
-
});
|
| 68 |
-
// Add Greek average
|
| 69 |
-
if (greekAverage !== null) {
|
| 70 |
-
includedEvaluations.greek_average = { normalized_score: greekAverage };
|
| 71 |
-
}
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
return visibleColumns.includes(`evaluations.${key}.normalized_score`);
|
| 77 |
-
})
|
| 78 |
-
.map(([, value]) => value.normalized_score);
|
| 79 |
-
|
| 80 |
-
const average =
|
| 81 |
-
evaluationScores.length > 0
|
| 82 |
-
? evaluationScores.reduce((a, b) => a + b, 0) /
|
| 83 |
-
evaluationScores.length
|
| 84 |
-
: averageMode === "visible"
|
| 85 |
-
? null
|
| 86 |
-
: 0;
|
| 87 |
-
|
| 88 |
-
// Boolean standardization
|
| 89 |
-
const standardizedFeatures = {
|
| 90 |
-
...item.features,
|
| 91 |
-
is_moe: Boolean(item.features.is_moe),
|
| 92 |
-
is_flagged: Boolean(item.features.is_flagged),
|
| 93 |
-
is_highlighted_by_maintainer: Boolean(
|
| 94 |
-
item.features.is_highlighted_by_maintainer
|
| 95 |
-
),
|
| 96 |
-
is_merged: Boolean(item.features.is_merged),
|
| 97 |
-
is_not_available_on_hub: Boolean(item.features.is_not_available_on_hub),
|
| 98 |
-
};
|
| 99 |
-
|
| 100 |
-
return {
|
| 101 |
-
...item,
|
| 102 |
-
features: standardizedFeatures,
|
| 103 |
-
evaluations: enhancedEvaluations, // Use enhanced evaluations
|
| 104 |
model: {
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
},
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
| 121 |
});
|
| 122 |
|
| 123 |
-
//
|
| 124 |
-
|
| 125 |
-
// Check if a matching model already exists
|
| 126 |
-
const modelExists = Object.keys(existingModelsMap).some(name =>
|
| 127 |
-
name.toLowerCase().includes(allowedModelName.toLowerCase())
|
| 128 |
-
);
|
| 129 |
-
|
| 130 |
-
if (!modelExists) {
|
| 131 |
-
// Create a "missing" placeholder
|
| 132 |
-
filteredModels.push({
|
| 133 |
-
id: `missing-${allowedModelName}`,
|
| 134 |
-
model: {
|
| 135 |
-
name: allowedModelName,
|
| 136 |
-
average_score: null,
|
| 137 |
-
type: "Unknown",
|
| 138 |
-
},
|
| 139 |
-
evaluations: {
|
| 140 |
-
greek_average: null
|
| 141 |
-
},
|
| 142 |
-
features: {
|
| 143 |
-
is_moe: false,
|
| 144 |
-
is_flagged: false,
|
| 145 |
-
is_highlighted_by_maintainer: false,
|
| 146 |
-
is_merged: false,
|
| 147 |
-
is_not_available_on_hub: true,
|
| 148 |
-
},
|
| 149 |
-
metadata: {
|
| 150 |
-
submission_date: new Date().toISOString(),
|
| 151 |
-
},
|
| 152 |
-
isMissing: true, // Mark as missing
|
| 153 |
-
});
|
| 154 |
-
}
|
| 155 |
-
});
|
| 156 |
-
|
| 157 |
-
// Sort the results
|
| 158 |
-
filteredModels.sort((a, b) => {
|
| 159 |
-
// Place missing models at the end
|
| 160 |
-
if (a.isMissing && !b.isMissing) return 1;
|
| 161 |
-
if (!a.isMissing && b.isMissing) return -1;
|
| 162 |
-
|
| 163 |
-
// If both are missing or both are not missing, sort by average score
|
| 164 |
if (a.model.average_score === null && b.model.average_score === null)
|
| 165 |
return 0;
|
| 166 |
if (a.model.average_score === null) return 1;
|
| 167 |
if (b.model.average_score === null) return -1;
|
| 168 |
return b.model.average_score - a.model.average_score;
|
| 169 |
});
|
| 170 |
-
|
| 171 |
-
|
|
|
|
| 172 |
...item,
|
| 173 |
static_rank: index + 1,
|
| 174 |
}));
|
| 175 |
}, [data, averageMode, visibleColumns]);
|
| 176 |
};
|
| 177 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
// Common filtering logic
|
| 179 |
export const useFilteredData = (
|
| 180 |
processedData,
|
|
@@ -188,179 +227,17 @@ export const useFilteredData = (
|
|
| 188 |
isOfficialProviderActive = false
|
| 189 |
) => {
|
| 190 |
return useMemo(() => {
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
let filteredUnpinned = unpinnedData;
|
| 199 |
-
|
| 200 |
-
// Filter by official providers
|
| 201 |
-
if (isOfficialProviderActive) {
|
| 202 |
-
filteredUnpinned = filteredUnpinned.filter(
|
| 203 |
-
(row) =>
|
| 204 |
-
row.features?.is_highlighted_by_maintainer ||
|
| 205 |
-
row.metadata?.is_highlighted_by_maintainer
|
| 206 |
-
);
|
| 207 |
-
}
|
| 208 |
-
|
| 209 |
-
// Filter by precision
|
| 210 |
-
if (selectedPrecisions.length > 0) {
|
| 211 |
-
filteredUnpinned = filteredUnpinned.filter((row) =>
|
| 212 |
-
selectedPrecisions.includes(row.model.precision)
|
| 213 |
-
);
|
| 214 |
-
}
|
| 215 |
-
|
| 216 |
-
// Filter by type
|
| 217 |
-
if (selectedTypes.length > 0) {
|
| 218 |
-
filteredUnpinned = filteredUnpinned.filter((row) => {
|
| 219 |
-
const modelType = row.model.type?.toLowerCase().trim();
|
| 220 |
-
return selectedTypes.some((type) => modelType?.includes(type));
|
| 221 |
-
});
|
| 222 |
-
}
|
| 223 |
-
|
| 224 |
-
// Filter by parameters
|
| 225 |
-
filteredUnpinned = filteredUnpinned.filter((row) => {
|
| 226 |
-
// Skip parameter filtering if no filter is active
|
| 227 |
-
if (paramsRange[0] === -1 && paramsRange[1] === 140) return true;
|
| 228 |
-
|
| 229 |
-
const params =
|
| 230 |
-
row.metadata?.params_billions || row.features?.params_billions;
|
| 231 |
-
if (params === undefined || params === null) return false;
|
| 232 |
-
return params >= paramsRange[0] && params < paramsRange[1];
|
| 233 |
-
});
|
| 234 |
-
|
| 235 |
-
// Filter by search
|
| 236 |
-
if (searchValue) {
|
| 237 |
-
const searchQueries = searchValue
|
| 238 |
-
.split(";")
|
| 239 |
-
.map((q) => q.trim())
|
| 240 |
-
.filter((q) => q);
|
| 241 |
-
if (searchQueries.length > 0) {
|
| 242 |
-
filteredUnpinned = filteredUnpinned.filter((row) => {
|
| 243 |
-
return searchQueries.some((query) => {
|
| 244 |
-
const { specialSearches, textSearch } = parseSearchQuery(query);
|
| 245 |
-
|
| 246 |
-
const specialSearchMatch = specialSearches.every(
|
| 247 |
-
({ field, value }) => {
|
| 248 |
-
const fieldValue = getValueByPath(row, field)
|
| 249 |
-
?.toString()
|
| 250 |
-
.toLowerCase();
|
| 251 |
-
return fieldValue?.includes(value.toLowerCase());
|
| 252 |
-
}
|
| 253 |
-
);
|
| 254 |
-
|
| 255 |
-
if (!specialSearchMatch) return false;
|
| 256 |
-
if (!textSearch) return true;
|
| 257 |
-
|
| 258 |
-
const modelName = row.model.name.toLowerCase();
|
| 259 |
-
const searchLower = textSearch.toLowerCase();
|
| 260 |
-
|
| 261 |
-
if (looksLikeRegex(textSearch)) {
|
| 262 |
-
try {
|
| 263 |
-
const regex = new RegExp(textSearch, "i");
|
| 264 |
-
return regex.test(modelName);
|
| 265 |
-
} catch (e) {
|
| 266 |
-
return modelName.includes(searchLower);
|
| 267 |
-
}
|
| 268 |
-
} else {
|
| 269 |
-
return modelName.includes(searchLower);
|
| 270 |
-
}
|
| 271 |
-
});
|
| 272 |
-
});
|
| 273 |
-
}
|
| 274 |
-
}
|
| 275 |
-
|
| 276 |
-
// Filter by booleans
|
| 277 |
-
if (selectedBooleanFilters.length > 0) {
|
| 278 |
-
filteredUnpinned = filteredUnpinned.filter((row) => {
|
| 279 |
-
return selectedBooleanFilters.every((filter) => {
|
| 280 |
-
const filterValue =
|
| 281 |
-
typeof filter === "object" ? filter.value : filter;
|
| 282 |
-
|
| 283 |
-
// Maintainer's Highlight keeps positive logic
|
| 284 |
-
if (filterValue === "is_highlighted_by_maintainer") {
|
| 285 |
-
return row.features[filterValue];
|
| 286 |
-
}
|
| 287 |
-
|
| 288 |
-
// For all other filters, invert the logic
|
| 289 |
-
if (filterValue === "is_not_available_on_hub") {
|
| 290 |
-
return row.features[filterValue];
|
| 291 |
-
}
|
| 292 |
-
|
| 293 |
-
return !row.features[filterValue];
|
| 294 |
-
});
|
| 295 |
-
});
|
| 296 |
-
}
|
| 297 |
-
|
| 298 |
-
// Create ordered array of pinned models respecting pinnedModels order
|
| 299 |
-
const orderedPinnedData = pinnedModels
|
| 300 |
-
.map((pinnedModelId) =>
|
| 301 |
-
pinnedData.find((item) => item.id === pinnedModelId)
|
| 302 |
-
)
|
| 303 |
-
.filter(Boolean);
|
| 304 |
-
|
| 305 |
-
// Combine all filtered data
|
| 306 |
-
const allFilteredData = [...filteredUnpinned, ...orderedPinnedData];
|
| 307 |
-
|
| 308 |
-
// Sort all data by average_score for dynamic_rank
|
| 309 |
-
const sortedByScore = [...allFilteredData].sort((a, b) => {
|
| 310 |
-
// Si les scores moyens sont différents, trier par score
|
| 311 |
-
if (a.model.average_score !== b.model.average_score) {
|
| 312 |
-
if (a.model.average_score === null && b.model.average_score === null)
|
| 313 |
-
return 0;
|
| 314 |
-
if (a.model.average_score === null) return 1;
|
| 315 |
-
if (b.model.average_score === null) return -1;
|
| 316 |
-
return b.model.average_score - a.model.average_score;
|
| 317 |
-
}
|
| 318 |
-
|
| 319 |
-
// Si les scores sont égaux, comparer le nom du modèle et la date de soumission
|
| 320 |
-
if (a.model.name === b.model.name) {
|
| 321 |
-
// Si même nom, trier par date de soumission (la plus récente d'abord)
|
| 322 |
-
const dateA = new Date(a.metadata?.submission_date || 0);
|
| 323 |
-
const dateB = new Date(b.metadata?.submission_date || 0);
|
| 324 |
-
return dateB - dateA;
|
| 325 |
-
}
|
| 326 |
-
|
| 327 |
-
// Si noms différents, trier par nom
|
| 328 |
-
return a.model.name.localeCompare(b.model.name);
|
| 329 |
-
});
|
| 330 |
-
|
| 331 |
-
// Create Map to store dynamic_ranks
|
| 332 |
-
const dynamicRankMap = new Map();
|
| 333 |
-
sortedByScore.forEach((item, index) => {
|
| 334 |
-
dynamicRankMap.set(item.id, index + 1);
|
| 335 |
-
});
|
| 336 |
-
|
| 337 |
-
// Add ranks to final data
|
| 338 |
-
const finalData = [...orderedPinnedData, ...filteredUnpinned].map(
|
| 339 |
-
(item) => {
|
| 340 |
-
return {
|
| 341 |
-
...item,
|
| 342 |
-
dynamic_rank: dynamicRankMap.get(item.id),
|
| 343 |
-
rank: item.isPinned
|
| 344 |
-
? pinnedModels.indexOf(item.id) + 1
|
| 345 |
-
: rankingMode === "static"
|
| 346 |
-
? item.static_rank
|
| 347 |
-
: dynamicRankMap.get(item.id),
|
| 348 |
-
isPinned: pinnedModels.includes(item.id),
|
| 349 |
-
};
|
| 350 |
-
}
|
| 351 |
-
);
|
| 352 |
-
|
| 353 |
-
return finalData;
|
| 354 |
}, [
|
| 355 |
processedData,
|
| 356 |
-
selectedPrecisions,
|
| 357 |
-
selectedTypes,
|
| 358 |
-
paramsRange,
|
| 359 |
-
searchValue,
|
| 360 |
-
selectedBooleanFilters,
|
| 361 |
rankingMode,
|
| 362 |
pinnedModels,
|
| 363 |
-
isOfficialProviderActive,
|
| 364 |
]);
|
| 365 |
};
|
| 366 |
|
|
|
|
| 6 |
} from "../utils/searchUtils";
|
| 7 |
import { ALLOWED_MODELS, isModelAllowed } from "../constants/allowedModels";
|
| 8 |
|
| 9 |
+
// 硬编码数据集
|
| 10 |
+
const HARDCODED_SCORES = {
|
| 11 |
+
vision: {
|
| 12 |
+
"GPT-4o": 55.54, "o3-Mini": 0.00, "Deepseek-V3": 0.00, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 16.27,
|
| 13 |
+
"meta-llama/Llama-3.1-70B-Instruct": 0.00, "google/gemma-3-4b-it": 14.97, "google/gemma-3-27b-it": 25.57,
|
| 14 |
+
"Qwen/Qwen2.5-32B-Instruct": 0.00, "Qwen/Qwen2.5-Omni-7B": 24.97, "TheFinAI/finma-7b-full": 0.00,
|
| 15 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 0.00, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 0.00,
|
| 16 |
+
"TheFinAI/FinMA-ES-Bilingual": 0.00, "TheFinAI/plutus-8B-instruct": 0.00, "Qwen-VL-MAX": 18.47,
|
| 17 |
+
"LLaVA-1.6 Vicuna-13B": 19.77, "Deepseek-VL-7B-Chat": 19.10, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 18 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 19 |
+
},
|
| 20 |
+
audio: {
|
| 21 |
+
"GPT-4o": 55.56, "o3-Mini": 0.00, "Deepseek-V3": 0.00, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 0.00,
|
| 22 |
+
"meta-llama/Llama-3.1-70B-Instruct": 0.00, "google/gemma-3-4b-it": 0.00, "google/gemma-3-27b-it": 0.00,
|
| 23 |
+
"Qwen/Qwen2.5-32B-Instruct": 0.00, "Qwen/Qwen2.5-Omni-7B": 48.22, "TheFinAI/finma-7b-full": 0.00,
|
| 24 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 0.00, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 0.00,
|
| 25 |
+
"TheFinAI/FinMA-ES-Bilingual": 0.00, "TheFinAI/plutus-8B-instruct": 0.00, "Qwen-VL-MAX": 0.00,
|
| 26 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 51.58, "Qwen2-Audio-7B": 48.02,
|
| 27 |
+
"Qwen2-Audio-7B-Instruct": 50.06, "SALMONN-7B": 24.24, "SALMONN-13B": 24.59
|
| 28 |
+
},
|
| 29 |
+
english: {
|
| 30 |
+
"GPT-4o": 42.18, "o3-Mini": 20.20, "Deepseek-V3": 18.04, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 24.16,
|
| 31 |
+
"meta-llama/Llama-3.1-70B-Instruct": 38.71, "google/gemma-3-4b-it": 16.13, "google/gemma-3-27b-it": 17.19,
|
| 32 |
+
"Qwen/Qwen2.5-32B-Instruct": 32.01, "Qwen/Qwen2.5-Omni-7B": 24.99, "TheFinAI/finma-7b-full": 28.89,
|
| 33 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 29.39, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 26.38,
|
| 34 |
+
"TheFinAI/FinMA-ES-Bilingual": 31.72, "TheFinAI/plutus-8B-instruct": 27.82, "Qwen-VL-MAX": 0.00,
|
| 35 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 36 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 37 |
+
},
|
| 38 |
+
chinese: {
|
| 39 |
+
"GPT-4o": 60.34, "o3-Mini": 0.00, "Deepseek-V3": 60.94, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 64.51,
|
| 40 |
+
"meta-llama/Llama-3.1-70B-Instruct": 56.74, "google/gemma-3-4b-it": 26.23, "google/gemma-3-27b-it": 26.24,
|
| 41 |
+
"Qwen/Qwen2.5-32B-Instruct": 56.62, "Qwen/Qwen2.5-Omni-7B": 53.09, "TheFinAI/finma-7b-full": 24.42,
|
| 42 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 23.04, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 13.18,
|
| 43 |
+
"TheFinAI/FinMA-ES-Bilingual": 21.50, "TheFinAI/plutus-8B-instruct": 31.04, "Qwen-VL-MAX": 0.00,
|
| 44 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 45 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 46 |
+
},
|
| 47 |
+
japanese: {
|
| 48 |
+
"GPT-4o": 0.00, "o3-Mini": 0.00, "Deepseek-V3": 0.00, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 48.43,
|
| 49 |
+
"meta-llama/Llama-3.1-70B-Instruct": 32.17, "google/gemma-3-4b-it": 8.98, "google/gemma-3-27b-it": 23.96,
|
| 50 |
+
"Qwen/Qwen2.5-32B-Instruct": 4.54, "Qwen/Qwen2.5-Omni-7B": 44.35, "TheFinAI/finma-7b-full": 46.94,
|
| 51 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 47.59, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 23.96,
|
| 52 |
+
"TheFinAI/FinMA-ES-Bilingual": 57.36, "TheFinAI/plutus-8B-instruct": 34.62, "Qwen-VL-MAX": 0.00,
|
| 53 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 54 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 55 |
+
},
|
| 56 |
+
spanish: {
|
| 57 |
+
"GPT-4o": 29.80, "o3-Mini": 4.53, "Deepseek-V3": 25.49, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 47.90,
|
| 58 |
+
"meta-llama/Llama-3.1-70B-Instruct": 37.84, "google/gemma-3-4b-it": 27.66, "google/gemma-3-27b-it": 27.77,
|
| 59 |
+
"Qwen/Qwen2.5-32B-Instruct": 37.47, "Qwen/Qwen2.5-Omni-7B": 39.16, "TheFinAI/finma-7b-full": 27.04,
|
| 60 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 42.86, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 28.01,
|
| 61 |
+
"TheFinAI/FinMA-ES-Bilingual": 38.69, "TheFinAI/plutus-8B-instruct": 40.16, "Qwen-VL-MAX": 0.00,
|
| 62 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 63 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 64 |
+
},
|
| 65 |
+
greek: {
|
| 66 |
+
"GPT-4o": 43.04, "o3-Mini": 9.48, "Deepseek-V3": 39.07, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 48.95,
|
| 67 |
+
"meta-llama/Llama-3.1-70B-Instruct": 43.60, "google/gemma-3-4b-it": 15.45, "google/gemma-3-27b-it": 15.44,
|
| 68 |
+
"Qwen/Qwen2.5-32B-Instruct": 44.32, "Qwen/Qwen2.5-Omni-7B": 23.45, "TheFinAI/finma-7b-full": 17.93,
|
| 69 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 29.49, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 20.91,
|
| 70 |
+
"TheFinAI/FinMA-ES-Bilingual": 15.47, "TheFinAI/plutus-8B-instruct": 60.19, "Qwen-VL-MAX": 0.00,
|
| 71 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 72 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 73 |
+
},
|
| 74 |
+
bilingual: {
|
| 75 |
+
"GPT-4o": 92.29, "o3-Mini": 90.13, "Deepseek-V3": 86.26, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 89.17,
|
| 76 |
+
"meta-llama/Llama-3.1-70B-Instruct": 92.13, "google/gemma-3-4b-it": 35.92, "google/gemma-3-27b-it": 35.92,
|
| 77 |
+
"Qwen/Qwen2.5-32B-Instruct": 92.29, "Qwen/Qwen2.5-Omni-7B": 91.80, "TheFinAI/finma-7b-full": 69.24,
|
| 78 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 91.60, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 71.81,
|
| 79 |
+
"TheFinAI/FinMA-ES-Bilingual": 66.57, "TheFinAI/plutus-8B-instruct": 91.59, "Qwen-VL-MAX": 0.00,
|
| 80 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 81 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 82 |
+
},
|
| 83 |
+
multilingual: {
|
| 84 |
+
"GPT-4o": 6.53, "o3-Mini": 7.80, "Deepseek-V3": 36.99, "meta-llama/Llama-4-Scout-17B-16E-Instruct": 13.52,
|
| 85 |
+
"meta-llama/Llama-3.1-70B-Instruct": 21.97, "google/gemma-3-4b-it": 0.00, "google/gemma-3-27b-it": 0.00,
|
| 86 |
+
"Qwen/Qwen2.5-32B-Instruct": 18.48, "Qwen/Qwen2.5-Omni-7B": 16.29, "TheFinAI/finma-7b-full": 3.10,
|
| 87 |
+
"Duxiaoman-DI/Llama3.1-XuanYuan-FinX1-Preview": 1.76, "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese": 10.25,
|
| 88 |
+
"TheFinAI/FinMA-ES-Bilingual": 0.35, "TheFinAI/plutus-8B-instruct": 7.24, "Qwen-VL-MAX": 0.00,
|
| 89 |
+
"LLaVA-1.6 Vicuna-13B": 0.00, "Deepseek-VL-7B-Chat": 0.00, "Whisper-V3": 0.00, "Qwen2-Audio-7B": 0.00,
|
| 90 |
+
"Qwen2-Audio-7B-Instruct": 0.00, "SALMONN-7B": 0.00, "SALMONN-13B": 0.00
|
| 91 |
+
}
|
| 92 |
+
};
|
| 93 |
+
|
| 94 |
// Calculate min/max averages
|
| 95 |
export const useAverageRange = (data) => {
|
| 96 |
return useMemo(() => {
|
|
|
|
| 124 |
// Process data with boolean standardization
|
| 125 |
export const useProcessedData = (data, averageMode, visibleColumns) => {
|
| 126 |
return useMemo(() => {
|
| 127 |
+
// 直接使用硬编码数据创建模型列表
|
| 128 |
+
const modelList = [];
|
| 129 |
+
|
| 130 |
+
// 从HARDCODED_SCORES中获取所有模型名称
|
| 131 |
+
const modelNames = new Set();
|
| 132 |
+
Object.values(HARDCODED_SCORES).forEach(categoryData => {
|
| 133 |
+
Object.entries(categoryData).forEach(([modelName, score]) => {
|
| 134 |
+
// 添加所有模型,不管分数是否为0
|
| 135 |
+
modelNames.add(modelName);
|
| 136 |
+
});
|
| 137 |
+
});
|
| 138 |
+
|
| 139 |
+
// 为每个模型创建条目
|
| 140 |
+
Array.from(modelNames).forEach((modelName, index) => {
|
| 141 |
+
// 创建硬编码评估数据
|
| 142 |
+
const hardcodedEvaluations = {
|
| 143 |
+
vision_average: getHardcodedScore(modelName, 'vision'),
|
| 144 |
+
audio_average: getHardcodedScore(modelName, 'audio'),
|
| 145 |
+
english_average: getHardcodedScore(modelName, 'english'),
|
| 146 |
+
chinese_average: getHardcodedScore(modelName, 'chinese'),
|
| 147 |
+
japanese_average: getHardcodedScore(modelName, 'japanese'),
|
| 148 |
+
spanish_average: getHardcodedScore(modelName, 'spanish'),
|
| 149 |
+
greek_average: getHardcodedScore(modelName, 'greek'),
|
| 150 |
+
bilingual_average: getHardcodedScore(modelName, 'bilingual'),
|
| 151 |
+
multilingual_average: getHardcodedScore(modelName, 'multilingual')
|
| 152 |
};
|
| 153 |
|
| 154 |
+
// 计算总平均分(包含分数为0的类别)
|
| 155 |
+
const scores = Object.values(hardcodedEvaluations).filter(score => score !== null);
|
| 156 |
+
const averageScore = scores.length > 0 ? scores.reduce((a, b) => a + b, 0) / scores.length : null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
+
// 创建模型数据
|
| 159 |
+
modelList.push({
|
| 160 |
+
id: `model-${index}`,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
model: {
|
| 162 |
+
name: modelName,
|
| 163 |
+
average_score: averageScore,
|
| 164 |
+
type: "chat", // 统一设为chat类型
|
| 165 |
},
|
| 166 |
+
evaluations: hardcodedEvaluations,
|
| 167 |
+
features: {
|
| 168 |
+
is_moe: false,
|
| 169 |
+
is_flagged: false,
|
| 170 |
+
is_highlighted_by_maintainer: false,
|
| 171 |
+
is_merged: false,
|
| 172 |
+
is_not_available_on_hub: false,
|
| 173 |
+
},
|
| 174 |
+
metadata: {
|
| 175 |
+
submission_date: new Date().toISOString(),
|
| 176 |
+
},
|
| 177 |
+
isMissing: false,
|
| 178 |
+
});
|
| 179 |
});
|
| 180 |
|
| 181 |
+
// 根据平均分排序
|
| 182 |
+
modelList.sort((a, b) => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
if (a.model.average_score === null && b.model.average_score === null)
|
| 184 |
return 0;
|
| 185 |
if (a.model.average_score === null) return 1;
|
| 186 |
if (b.model.average_score === null) return -1;
|
| 187 |
return b.model.average_score - a.model.average_score;
|
| 188 |
});
|
| 189 |
+
|
| 190 |
+
// 添加排名
|
| 191 |
+
return modelList.map((item, index) => ({
|
| 192 |
...item,
|
| 193 |
static_rank: index + 1,
|
| 194 |
}));
|
| 195 |
}, [data, averageMode, visibleColumns]);
|
| 196 |
};
|
| 197 |
|
| 198 |
+
// 辅助函数:从硬编码数据中获取分数
|
| 199 |
+
function getHardcodedScore(modelName, category) {
|
| 200 |
+
if (!HARDCODED_SCORES[category]) return null;
|
| 201 |
+
|
| 202 |
+
// 尝试精确匹配
|
| 203 |
+
if (HARDCODED_SCORES[category][modelName] !== undefined) {
|
| 204 |
+
return HARDCODED_SCORES[category][modelName];
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
// 尝试部分匹配
|
| 208 |
+
for (const key in HARDCODED_SCORES[category]) {
|
| 209 |
+
if (modelName.includes(key) || key.includes(modelName)) {
|
| 210 |
+
return HARDCODED_SCORES[category][key];
|
| 211 |
+
}
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
return null;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
// Common filtering logic
|
| 218 |
export const useFilteredData = (
|
| 219 |
processedData,
|
|
|
|
| 227 |
isOfficialProviderActive = false
|
| 228 |
) => {
|
| 229 |
return useMemo(() => {
|
| 230 |
+
// 由于使用的是硬编码数据,这里直接返回所有数据而不进行过滤
|
| 231 |
+
return processedData.map((item, index) => ({
|
| 232 |
+
...item,
|
| 233 |
+
dynamic_rank: index + 1,
|
| 234 |
+
rank: rankingMode === "static" ? item.static_rank : index + 1,
|
| 235 |
+
isPinned: pinnedModels.includes(item.id),
|
| 236 |
+
}));
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
}, [
|
| 238 |
processedData,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
rankingMode,
|
| 240 |
pinnedModels,
|
|
|
|
| 241 |
]);
|
| 242 |
};
|
| 243 |
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/hooks/useLeaderboardData.js
CHANGED
|
@@ -8,60 +8,11 @@ const CACHE_KEY = "leaderboardData";
|
|
| 8 |
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
|
| 9 |
|
| 10 |
export const useLeaderboardData = () => {
|
| 11 |
-
const queryClient = useQueryClient();
|
| 12 |
-
const [searchParams] = useSearchParams();
|
| 13 |
-
const isInitialLoadRef = useRef(true);
|
| 14 |
-
|
| 15 |
-
const { data, isLoading, error } = useQuery({
|
| 16 |
-
queryKey: ["leaderboard"],
|
| 17 |
-
queryFn: async () => {
|
| 18 |
-
try {
|
| 19 |
-
const cachedData = localStorage.getItem(CACHE_KEY);
|
| 20 |
-
if (cachedData) {
|
| 21 |
-
const { data: cached, timestamp } = JSON.parse(cachedData);
|
| 22 |
-
const age = Date.now() - timestamp;
|
| 23 |
-
if (age < CACHE_DURATION) {
|
| 24 |
-
return cached;
|
| 25 |
-
}
|
| 26 |
-
}
|
| 27 |
-
|
| 28 |
-
const response = await fetch("/api/leaderboard/formatted");
|
| 29 |
-
if (!response.ok) {
|
| 30 |
-
throw new Error(`HTTP error! status: ${response.status}`);
|
| 31 |
-
}
|
| 32 |
-
|
| 33 |
-
const newData = await response.json();
|
| 34 |
-
localStorage.setItem(
|
| 35 |
-
CACHE_KEY,
|
| 36 |
-
JSON.stringify({
|
| 37 |
-
data: newData,
|
| 38 |
-
timestamp: Date.now(),
|
| 39 |
-
})
|
| 40 |
-
);
|
| 41 |
-
|
| 42 |
-
return newData;
|
| 43 |
-
} catch (error) {
|
| 44 |
-
console.error("Detailed error:", error);
|
| 45 |
-
throw error;
|
| 46 |
-
}
|
| 47 |
-
},
|
| 48 |
-
staleTime: CACHE_DURATION,
|
| 49 |
-
cacheTime: CACHE_DURATION * 2,
|
| 50 |
-
refetchOnWindowFocus: false,
|
| 51 |
-
enabled: isInitialLoadRef.current || !!searchParams.toString(),
|
| 52 |
-
});
|
| 53 |
-
|
| 54 |
-
useMemo(() => {
|
| 55 |
-
if (data && isInitialLoadRef.current) {
|
| 56 |
-
isInitialLoadRef.current = false;
|
| 57 |
-
}
|
| 58 |
-
}, [data]);
|
| 59 |
-
|
| 60 |
return {
|
| 61 |
-
data,
|
| 62 |
-
isLoading,
|
| 63 |
-
error,
|
| 64 |
-
refetch: () =>
|
| 65 |
};
|
| 66 |
};
|
| 67 |
|
|
|
|
| 8 |
const CACHE_DURATION = 5 * 60 * 1000; // 5 minutes
|
| 9 |
|
| 10 |
export const useLeaderboardData = () => {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
return {
|
| 12 |
+
data: [], // 直接返回空数组,我们使用硬编码数据
|
| 13 |
+
isLoading: false,
|
| 14 |
+
error: null,
|
| 15 |
+
refetch: () => {}
|
| 16 |
};
|
| 17 |
};
|
| 18 |
|
frontend/src/pages/LeaderboardPage/components/Leaderboard/utils/columnUtils.js
CHANGED
|
@@ -499,6 +499,67 @@ const createGreekLeaderboardHeader = (header) => (
|
|
| 499 |
</Box>
|
| 500 |
);
|
| 501 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
export const createColumns = (
|
| 503 |
getColorForValue,
|
| 504 |
scoreDisplay = "normalized",
|
|
@@ -928,6 +989,142 @@ export const createColumns = (
|
|
| 928 |
}),
|
| 929 |
},
|
| 930 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 931 |
];
|
| 932 |
|
| 933 |
const optionalColumns = [
|
|
|
|
| 499 |
</Box>
|
| 500 |
);
|
| 501 |
|
| 502 |
+
// 为各种类型的Leaderboard创建自定义标题组件
|
| 503 |
+
const createLeaderboardHeader = (label, tooltip, linkUrl) => (header) => (
|
| 504 |
+
<Box
|
| 505 |
+
className="header-content"
|
| 506 |
+
sx={{
|
| 507 |
+
display: "flex",
|
| 508 |
+
alignItems: "center",
|
| 509 |
+
width: "100%",
|
| 510 |
+
position: "relative",
|
| 511 |
+
}}
|
| 512 |
+
>
|
| 513 |
+
<HeaderLabel
|
| 514 |
+
label={`${label} Leaderboard`}
|
| 515 |
+
tooltip={tooltip}
|
| 516 |
+
className="header-label"
|
| 517 |
+
isSorted={header?.column?.getIsSorted()}
|
| 518 |
+
/>
|
| 519 |
+
|
| 520 |
+
<Box
|
| 521 |
+
sx={{
|
| 522 |
+
display: "flex",
|
| 523 |
+
alignItems: "center",
|
| 524 |
+
gap: 0.5,
|
| 525 |
+
ml: "auto",
|
| 526 |
+
flexShrink: 0,
|
| 527 |
+
}}
|
| 528 |
+
>
|
| 529 |
+
<InfoIcon tooltip={tooltip} />
|
| 530 |
+
{linkUrl && (
|
| 531 |
+
<Link
|
| 532 |
+
href={linkUrl}
|
| 533 |
+
target="_blank"
|
| 534 |
+
rel="noopener noreferrer"
|
| 535 |
+
aria-label={`View ${label} Leaderboard`}
|
| 536 |
+
sx={{
|
| 537 |
+
color: "info.main",
|
| 538 |
+
display: "flex",
|
| 539 |
+
alignItems: "center",
|
| 540 |
+
ml: 0.5,
|
| 541 |
+
textDecoration: "none",
|
| 542 |
+
"&:hover": {
|
| 543 |
+
textDecoration: "underline",
|
| 544 |
+
"& svg": {
|
| 545 |
+
opacity: 0.8,
|
| 546 |
+
},
|
| 547 |
+
},
|
| 548 |
+
}}
|
| 549 |
+
>
|
| 550 |
+
<OpenInNewIcon
|
| 551 |
+
sx={{
|
| 552 |
+
fontSize: "1rem",
|
| 553 |
+
opacity: 0.6,
|
| 554 |
+
transition: "opacity 0.2s ease-in-out",
|
| 555 |
+
}}
|
| 556 |
+
/>
|
| 557 |
+
</Link>
|
| 558 |
+
)}
|
| 559 |
+
</Box>
|
| 560 |
+
</Box>
|
| 561 |
+
);
|
| 562 |
+
|
| 563 |
export const createColumns = (
|
| 564 |
getColorForValue,
|
| 565 |
scoreDisplay = "normalized",
|
|
|
|
| 989 |
}),
|
| 990 |
},
|
| 991 |
},
|
| 992 |
+
{
|
| 993 |
+
accessorKey: "evaluations.vision_average",
|
| 994 |
+
header: createLeaderboardHeader("Vision", "Average performance on vision tasks", null),
|
| 995 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.vision_average"),
|
| 996 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 997 |
+
meta: {
|
| 998 |
+
headerStyle: {
|
| 999 |
+
backgroundColor: (theme) => alpha(theme.palette.primary.light, 0.05),
|
| 1000 |
+
},
|
| 1001 |
+
cellStyle: (value) => ({
|
| 1002 |
+
position: "relative",
|
| 1003 |
+
overflow: "hidden",
|
| 1004 |
+
padding: "8px 16px",
|
| 1005 |
+
backgroundColor: (theme) => alpha(theme.palette.primary.light, 0.05),
|
| 1006 |
+
}),
|
| 1007 |
+
},
|
| 1008 |
+
},
|
| 1009 |
+
{
|
| 1010 |
+
accessorKey: "evaluations.audio_average",
|
| 1011 |
+
header: createLeaderboardHeader("Audio", "Average performance on audio tasks", null),
|
| 1012 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.audio_average"),
|
| 1013 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1014 |
+
meta: {
|
| 1015 |
+
headerStyle: {
|
| 1016 |
+
backgroundColor: (theme) => alpha(theme.palette.secondary.light, 0.05),
|
| 1017 |
+
},
|
| 1018 |
+
cellStyle: (value) => ({
|
| 1019 |
+
position: "relative",
|
| 1020 |
+
overflow: "hidden",
|
| 1021 |
+
padding: "8px 16px",
|
| 1022 |
+
backgroundColor: (theme) => alpha(theme.palette.secondary.light, 0.05),
|
| 1023 |
+
}),
|
| 1024 |
+
},
|
| 1025 |
+
},
|
| 1026 |
+
{
|
| 1027 |
+
accessorKey: "evaluations.english_average",
|
| 1028 |
+
header: createLeaderboardHeader("English", "Average performance on English language tasks", null),
|
| 1029 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.english_average"),
|
| 1030 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1031 |
+
meta: {
|
| 1032 |
+
headerStyle: {
|
| 1033 |
+
backgroundColor: (theme) => alpha(theme.palette.success.light, 0.05),
|
| 1034 |
+
},
|
| 1035 |
+
cellStyle: (value) => ({
|
| 1036 |
+
position: "relative",
|
| 1037 |
+
overflow: "hidden",
|
| 1038 |
+
padding: "8px 16px",
|
| 1039 |
+
backgroundColor: (theme) => alpha(theme.palette.success.light, 0.05),
|
| 1040 |
+
}),
|
| 1041 |
+
},
|
| 1042 |
+
},
|
| 1043 |
+
{
|
| 1044 |
+
accessorKey: "evaluations.chinese_average",
|
| 1045 |
+
header: createLeaderboardHeader("Chinese", "Average performance on Chinese language tasks", null),
|
| 1046 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.chinese_average"),
|
| 1047 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1048 |
+
meta: {
|
| 1049 |
+
headerStyle: {
|
| 1050 |
+
backgroundColor: (theme) => alpha(theme.palette.warning.light, 0.05),
|
| 1051 |
+
},
|
| 1052 |
+
cellStyle: (value) => ({
|
| 1053 |
+
position: "relative",
|
| 1054 |
+
overflow: "hidden",
|
| 1055 |
+
padding: "8px 16px",
|
| 1056 |
+
backgroundColor: (theme) => alpha(theme.palette.warning.light, 0.05),
|
| 1057 |
+
}),
|
| 1058 |
+
},
|
| 1059 |
+
},
|
| 1060 |
+
{
|
| 1061 |
+
accessorKey: "evaluations.japanese_average",
|
| 1062 |
+
header: createLeaderboardHeader("Japanese", "Average performance on Japanese language tasks", null),
|
| 1063 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.japanese_average"),
|
| 1064 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1065 |
+
meta: {
|
| 1066 |
+
headerStyle: {
|
| 1067 |
+
backgroundColor: (theme) => alpha(theme.palette.error.light, 0.05),
|
| 1068 |
+
},
|
| 1069 |
+
cellStyle: (value) => ({
|
| 1070 |
+
position: "relative",
|
| 1071 |
+
overflow: "hidden",
|
| 1072 |
+
padding: "8px 16px",
|
| 1073 |
+
backgroundColor: (theme) => alpha(theme.palette.error.light, 0.05),
|
| 1074 |
+
}),
|
| 1075 |
+
},
|
| 1076 |
+
},
|
| 1077 |
+
{
|
| 1078 |
+
accessorKey: "evaluations.spanish_average",
|
| 1079 |
+
header: createLeaderboardHeader("Spanish", "Average performance on Spanish language tasks", null),
|
| 1080 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.spanish_average"),
|
| 1081 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1082 |
+
meta: {
|
| 1083 |
+
headerStyle: {
|
| 1084 |
+
backgroundColor: (theme) => alpha(theme.palette.info.main, 0.05),
|
| 1085 |
+
},
|
| 1086 |
+
cellStyle: (value) => ({
|
| 1087 |
+
position: "relative",
|
| 1088 |
+
overflow: "hidden",
|
| 1089 |
+
padding: "8px 16px",
|
| 1090 |
+
backgroundColor: (theme) => alpha(theme.palette.info.main, 0.05),
|
| 1091 |
+
}),
|
| 1092 |
+
},
|
| 1093 |
+
},
|
| 1094 |
+
{
|
| 1095 |
+
accessorKey: "evaluations.bilingual_average",
|
| 1096 |
+
header: createLeaderboardHeader("Bilingual", "Average performance on bilingual tasks", null),
|
| 1097 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.bilingual_average"),
|
| 1098 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1099 |
+
meta: {
|
| 1100 |
+
headerStyle: {
|
| 1101 |
+
backgroundColor: (theme) => alpha(theme.palette.primary.main, 0.05),
|
| 1102 |
+
},
|
| 1103 |
+
cellStyle: (value) => ({
|
| 1104 |
+
position: "relative",
|
| 1105 |
+
overflow: "hidden",
|
| 1106 |
+
padding: "8px 16px",
|
| 1107 |
+
backgroundColor: (theme) => alpha(theme.palette.primary.main, 0.05),
|
| 1108 |
+
}),
|
| 1109 |
+
},
|
| 1110 |
+
},
|
| 1111 |
+
{
|
| 1112 |
+
accessorKey: "evaluations.multilingual_average",
|
| 1113 |
+
header: createLeaderboardHeader("Multilingual", "Average performance on multilingual tasks", null),
|
| 1114 |
+
cell: ({ row, getValue }) => createScoreCell(getValue, row, "evaluations.multilingual_average"),
|
| 1115 |
+
size: TABLE_DEFAULTS.COLUMNS.COLUMN_SIZES["model.average_score"] || 100,
|
| 1116 |
+
meta: {
|
| 1117 |
+
headerStyle: {
|
| 1118 |
+
backgroundColor: (theme) => alpha(theme.palette.secondary.main, 0.05),
|
| 1119 |
+
},
|
| 1120 |
+
cellStyle: (value) => ({
|
| 1121 |
+
position: "relative",
|
| 1122 |
+
overflow: "hidden",
|
| 1123 |
+
padding: "8px 16px",
|
| 1124 |
+
backgroundColor: (theme) => alpha(theme.palette.secondary.main, 0.05),
|
| 1125 |
+
}),
|
| 1126 |
+
},
|
| 1127 |
+
}
|
| 1128 |
];
|
| 1129 |
|
| 1130 |
const optionalColumns = [
|