import { ref, computed, reactive } from 'vue' import { parseCsvContent } from '@/utils/csvUtils.js' // 全局状态对象,用于在多个组件间共享 const globalState = reactive({ leaderboard: [], csvHeaders: [], loading: true, error: null, // 排序状态:当前排序列(key)与方向(true = 降序) sortKey: '', sortDesc: true, visibleColumns: [], dataGroups: [], selectedDataName: '', selectedDataNameChart: '', modelTypeGroups: [], selectedModelType: [], // 改为数组支持多选 DEFAULT_CSV_PATH: '/filtered.csv', DEFAULT_HIDDEN: new Set(['seq_len', 'uniform_entropy', 'entropy_gain', 'information_capacity', 'data_name', 'model_size (B)']) // 默认隐藏的列 }) // 模型类型映射对象:键为模型类型,值为包含的 model_series 数组 const modelTypeMapping = { 'Qwen3': ['Qwen3'], 'Qwen2.5': ['Qwen2.5'], 'Qwen2': ['Qwen2'], 'Qwen1.5': ['Qwen1.5'], 'Llama-3': ['Llama-3.1', 'Llama-3.2'], 'InternLM2.5': ['internlm2.5'], 'GLM-4': ['glm-4', 'GLM-4'], 'Seed-OSS': ['Seed-OSS'], 'Gemma-3': ['gemma-3'], 'Hunyuan': ['Hunyuan'], 'Qwen2 (MoE)': ['Qwen2 (MoE)'], 'Qwen1.5 (MoE)': ['Qwen1.5 (MoE)'], 'DeepSeek-V3.1': ['DeepSeek-V3.1-Base'], 'DeepSeek-V2': ['DeepSeek-V2'], 'GLM-4.5': ['GLM-4.5-Air-Base', 'GLM-4.5-Base'], 'Llama-4': ['Llama-4'], } const MoEModelSeries = ['Qwen2', 'Qwen1.5'] const strtSymbolSeries = ['Qwen2 (MoE)', 'Qwen1.5 (MoE)', 'DeepSeek-V3.1', 'DeepSeek-V2', 'GLM-4.5', "Llama-4"] // const autoShowSeries = ['Qwen3', 'Llama-3', 'InternLM2.5', 'GLM-4', 'Seed-OSS', 'Gemma-3', 'Hunyuan', 'DeepSeek-V3.1', 'DeepSeek-V2', 'GLM-4.5'] const autoShowSeries = ["*"] // 表头显示名称映射(raw header -> 显示名),可以在此添加或由用户修改 const headerDisplayMap = reactive({ 'model_name': 'MODEL NAME', 'model_series': 'MODEL SERIES', 'model_size (B)': 'MODEL SIZE (B)', 'constant': 'TEXT SIZE', 'conditional_entropy': 'NEGATIVE LOG-LIKELIHOOD', 'BF16_TFLOPs': 'FLOPs (G)', 'ic': ' INFORMATION CAPACITY', }) // 数据集名称显示映射(raw data_name -> 显示名) const dataNameDisplayMap = reactive({ 'data_part_0000': 'Mixed text', 'eng_Latn_000_00027_long': 'FinePDFs-en', 'IndustryCorpus_batch_aa_long': 'Ch-FineWeb-Edu', 'CC-MAIN-2013-20_train-00000-of-00014_long': 'FineWeb-Edu', 'NextCoderDataset_v1_long': 'NextCoder', }) // 默认选择模式为Model const selectedMode = ref('Model') // 可选择的列(不包含 rank,也不包含被默认隐藏的列) const selectableColumns = computed(() => { if (!globalState.csvHeaders || globalState.csvHeaders.length === 0) return [] return globalState.csvHeaders.slice(1).filter(h => !globalState.DEFAULT_HIDDEN.has(h)) }) // 模型类型分组(从映射对象中获取) const modelTypeGroups = computed(() => { return Object.keys(modelTypeMapping) }) // 对 leaderboard 做基于 sortKey/sortDesc 的排序视图(不改变原始 globalState.leaderboard) const sortedLeaderboard = computed(() => { if (!globalState.leaderboard || globalState.leaderboard.length === 0) return [] const key = globalState.sortKey const desc = !!globalState.sortDesc const arr = [...globalState.leaderboard] if (!key || key === '') return arr arr.sort((a, b) => { const va = a[key] const vb = b[key] // null/undefined push to end if (va == null && vb == null) return 0 if (va == null) return 1 if (vb == null) return -1 const na = Number(va) const nb = Number(vb) if (Number.isFinite(na) && Number.isFinite(nb)) { return desc ? (nb - na) : (na - nb) } try { return desc ? String(vb).localeCompare(String(va)) : String(va).localeCompare(String(vb)) } catch (e) { return 0 } }) return arr }) // 根据 selectedDataName 和 selectedModelType 过滤 leaderboard,用于表格渲染 const filteredLeaderboard = computed(() => { if (!globalState.leaderboard || globalState.leaderboard.length === 0) return [] // 从已排序的视图开始过滤 let filtered = sortedLeaderboard.value // 过滤数据集 if (globalState.selectedDataName && globalState.selectedDataName !== 'all') { filtered = filtered.filter(r => String(r['data_name'] ?? '') === String(globalState.selectedDataName)) } // 过滤模型类型(支持多选) // 特殊值 '__none__' 表示用户明确选择了“清除”——此时应返回空结果 const sel = globalState.selectedModelType if (Array.isArray(sel)) { if (sel.includes('__none__')) return [] if (sel.length > 0) { filtered = filtered.filter(r => sel.includes(String(r['model_type'] ?? ''))) } } return filtered }) // 计算每个 model_series 的 IC 平均值,返回数组,元素格式为 { ModelSeries, IC } const modelSeriesICAvg = computed(() => { const rows = globalState.leaderboard || [] const selData = globalState.selectedDataName const selModelTypes = globalState.selectedModelType // 以 modelTypeMapping 的 key 为行(即用户划分好的 ModelSeries)来生成平均值 const out = [] const keys = Object.keys(modelTypeMapping) // 如果用户显式清空选择,直接返回空数组 if (Array.isArray(selModelTypes) && selModelTypes.includes('__none__')) return [] for (const key of keys) { // 当有选中的 model types 时,只处理被选中的那些 key if (Array.isArray(selModelTypes) && selModelTypes.length > 0) { // 如果 selectedModelType 包含元素,但不包含当前 key,则跳过 if (!selModelTypes.includes(key)) continue } const mappedSeries = new Set(modelTypeMapping[key] || []) // 也把 key 自身加入集合(保险) mappedSeries.add(key) // 聚合该 key 下所有匹配 series 的 IC let sum = 0 let count = 0 for (const r of rows) { // Dataset 过滤 if (selData && selData !== 'all') { if (String(r['data_name'] ?? '') !== String(selData)) continue } const seriesName = String(r['model_series'] ?? '').trim() if (!seriesName) continue if (!mappedSeries.has(seriesName)) continue const icRaw = r['ic'] ?? r['information_capacity'] const n = Number(icRaw) if (!Number.isFinite(n)) continue sum += n count += 1 } if (count === 0) continue const avg = sum / count out.push({ ModelSeries: key, IC: Number(avg.toFixed(4)) }) } // 按 IC 降序排序以便展示 out.sort((a, b) => b.IC - a.IC) return out }) // 点击表头切换排序:循环 降序 -> 升序 function setSortKey(h) { if (!h) return if (globalState.sortKey !== h) { globalState.sortKey = h globalState.sortDesc = true return } // same key, toggle between desc and asc globalState.sortDesc = !globalState.sortDesc } // 计算当前应该显示的列(不含 rank) const displayedColumns = computed(() => { if (!globalState.csvHeaders || globalState.csvHeaders.length === 0) return [] // csvHeaders includes 'rank' at idx 0 const all = globalState.csvHeaders.slice(1) return all.filter(h => globalState.visibleColumns.includes(h)) }) async function fetchAndLoadCsv(path = globalState.DEFAULT_CSV_PATH) { globalState.loading = true globalState.error = null try { const res = await fetch(path) if (!res.ok) throw new Error(`Failed to fetch CSV (${res.status})`) const txt = await res.text() const { headers, rows } = parseCsvContent(txt) if (!headers || headers.length === 0) { globalState.leaderboard = []; globalState.loading = false; return } // 选择用于排序/显示的分数字段(优先 information_capacity, ic, 然后尝试 numeric-like fields) const scoreKey = headers.find(h => ['information_capacity', 'ic', 'score'].includes(h)) || headers.find(h => /capacity|score|ic/i.test(h)) || headers[0] // 默认以最后一列升序排序(如果不存在则回退到 scoreKey),不直接在 rows 上预排序 const defaultKey = headers.length > 0 ? headers[headers.length - 1] : scoreKey globalState.sortKey = defaultKey || '' globalState.sortDesc = true // 特定处理 // 判断模型开头是否在 MoEModelSeries 中,是则在 判断尾部是否为-A{number}B这样的格式 for (const r of rows) { const name = r['model_name'] || '' for (const moePrefix of MoEModelSeries) { if (name.startsWith(moePrefix)) { // console.log('Checking MoE model name:', name,name.match(/-A(\d+(?:\.\d+)?)B/)) const moeSuffixMatch = name.match(/-A(.+)B$/) if (moeSuffixMatch) { // 更改 model_series 显示名称 为 moePrefix + ' (MoE)' r['model_series'] = `${moePrefix} (MoE)` console.log('Detected MoE model, updated series:', r['model_series']) } } } } // 确保关键列按顺序显示 const preferred = ['model_name', 'model_series', 'model_size (B)', 'seq_len', 'uniform_entropy', 'constant', 'conditional_entropy', 'entropy_gain', 'BF16_TFLOPs', 'information_capacity', 'ic'] const ordered = [] for (const p of preferred) if (headers.includes(p) && !ordered.includes(p)) ordered.push(p) for (const h of headers) if (!ordered.includes(h)) ordered.push(h) globalState.csvHeaders = ['rank', ...ordered] globalState.leaderboard = rows.map((r, idx) => { // 根据 model_series 推断 model_type let modelType = '' for (const [type, series] of Object.entries(modelTypeMapping)) { if (series.includes(r['model_series'])) { modelType = type break } } // 修改 BF16_TFLOPs:先 /1024 再 *1000 const originalTFLOPs = Number(r['BF16_TFLOPs']) || 0 const modifiedTFLOPs = (originalTFLOPs / 1024) * 1000 return { rank: idx + 1, model_type: modelType, ...r, BF16_TFLOPs: modifiedTFLOPs } }) // 构建 data_name 分组(保持出现顺序,不包含空) const seen = new Set() const groups = [] for (const r of rows) { const dn = r['data_name'] if (dn == null) continue const s = String(dn) if (s.trim() === '') continue if (!seen.has(s)) { seen.add(s); groups.push(s) } } globalState.dataGroups = groups // 构建 model_type 分组 globalState.modelTypeGroups = Object.keys(modelTypeMapping) // 默认显示第一个数据集 if (globalState.dataGroups.length > 0) { globalState.selectedDataName = globalState.dataGroups[0] globalState.selectedDataNameChart = globalState.dataGroups[0] } // 默认模型类型:默认全选(使筛选 UI 初始为所有模型被勾选) if (globalState.modelTypeGroups.length > 0) { globalState.selectedModelType = [...globalState.modelTypeGroups] } // 初始化可见列:优先使用 localStorage,否则默认显示所有列(除了 rank) try { const saved = localStorage.getItem('visible_columns') if (saved) { const parsed = JSON.parse(saved) if (Array.isArray(parsed)) { // 仅保留当前 CSV 中存在且不是默认隐藏的列 globalState.visibleColumns = parsed.filter(h => ordered.includes(h) && !globalState.DEFAULT_HIDDEN.has(h)) } } } catch (e) { // ignore malformed localStorage } if (!globalState.visibleColumns || globalState.visibleColumns.length === 0) { // 默认显示所有可选列(不包含默认隐藏列) globalState.visibleColumns = ordered.filter(h => !globalState.DEFAULT_HIDDEN.has(h)) } // 格式化 helper const numericFloatCols = new Set(['uniform_entropy', 'conditional_entropy', 'entropy_gain', 'information_capacity', 'ic', 'constant', 'BF16_TFLOPs']) const numericIntCols = new Set(['seq_len']) // attach formatter per row for rendering convenience (non-reactive simple values) for (const row of globalState.leaderboard) { row._formatted = {} for (const h of ordered) { const raw = row[h] if (raw == null || raw === '') { row._formatted[h] = ''; continue } if (numericIntCols.has(h)) { const n = Number(raw) row._formatted[h] = Number.isFinite(n) ? String(Math.round(n)) : raw } else if (numericFloatCols.has(h)) { const n = Number(raw) if (h === 'ic') { row._formatted[h] = Number.isFinite(n) ? n.toFixed(4) : raw } else if (h === 'constant') { row._formatted[h] = Number.isFinite(n) ? n.toFixed(2) : raw } else { row._formatted[h] = Number.isFinite(n) ? n.toFixed(3) : raw } } else { row._formatted[h] = raw } } } } catch (e) { console.error(e) globalState.error = e && e.message ? e.message : String(e) } finally { globalState.loading = false } } function selectAll() { // 复制一份可选列到 visibleColumns globalState.visibleColumns = [...selectableColumns.value] } function clearAll() { globalState.visibleColumns = [] } function selectAllModelTypes() { globalState.selectedModelType = [...modelTypeGroups.value] } function clearAllModelTypes() { // 使用特殊标记表示用户显式地清空选择(区别于未选择任何项) globalState.selectedModelType = ['__none__'] } function formatCell(h, model) { if (!model) return '' if (model._formatted && model._formatted[h] !== undefined) return model._formatted[h] return model[h] } // 初始化函数,在组件挂载时调用 function init() { fetchAndLoadCsv() } export function useLeaderboardData() { return { // 状态 leaderboard: computed(() => globalState.leaderboard), csvHeaders: computed(() => globalState.csvHeaders), loading: computed(() => globalState.loading), error: computed(() => globalState.error), visibleColumns: computed({ get: () => globalState.visibleColumns, set: (v) => globalState.visibleColumns = v }), selectedMode: computed({ get: () => selectedMode.value, set: (v) => selectedMode.value = v }), selectableColumns, autoShowSeries, strtSymbolSeries, headerDisplayMap: computed(() => headerDisplayMap), dataNameDisplayMap: computed(() => dataNameDisplayMap), dataGroups: computed(() => globalState.dataGroups), selectedDataName: computed({ get: () => globalState.selectedDataName, set: (v) => globalState.selectedDataName = v }), selectedDataNameChart: computed({ get: () => globalState.selectedDataNameChart, set: (v) => globalState.selectedDataNameChart = v }), modelTypeGroups: computed(() => globalState.modelTypeGroups), selectedModelType: computed({ get: () => globalState.selectedModelType, set: (v) => { // 当用户通过 UI 勾选真实模型类型时,移除 '__none__' 标记 if (Array.isArray(v) && v.some(x => x !== '__none__')) { globalState.selectedModelType = v.filter(x => x !== '__none__') } else { globalState.selectedModelType = v } } }), filteredLeaderboard, displayedColumns, modelSeriesICAvg, // 函数 fetchAndLoadCsv, selectAll, clearAll, selectAllModelTypes, clearAllModelTypes, // 排序相关 sortKey: computed(() => globalState.sortKey), sortDesc: computed(() => globalState.sortDesc), setSortKey, formatCell, init } }