import { ref, computed, reactive } from 'vue' import { parseCsvContent, processModelNames } from '@/utils/csvUtils.js' // 全局状态对象,用于在多个组件间共享 const globalState = reactive({ leaderboard: [], csvHeaders: [], loading: true, error: null, // 排序状态:当前排序列(key)与方向(true = 降序) sortKey: '', sortDesc: true, visibleColumns: [], dataGroups: [], selectedDataName: '', selectedDataNameChart: '', modelTypeGroups: [], selectedModelType: [], // 改为数组支持多选 DEFAULT_CSV_PATH: '/filtered.csv', // 默认隐藏的列 DEFAULT_HIDDEN: new Set(['seq_len', 'uniform_entropy', 'entropy_gain', 'information_capacity', 'data_name', 'model_size (B)']) }) // 模型类型映射对象:键为模型类型,值为包含的 model_series 数组 const modelTypeMapping = { 'Qwen3': ['Qwen3'], 'Qwen2.5': ['Qwen2.5'], 'Qwen2': ['Qwen2'], 'Qwen1.5': ['Qwen1.5'], 'Llama-3': ['Llama-3.1', 'Llama-3.2'], 'InternLM2.5': ['Internlm2.5'], 'GLM-4': ['GLM-4', 'GLM-4'], 'Seed-OSS': ['Seed-OSS'], 'Gemma-3': ['Gemma-3'], 'Hunyuan': ['Hunyuan'], 'Qwen2 (MoE)': ['Qwen2 (MoE)'], 'Qwen1.5 (MoE)': ['Qwen1.5 (MoE)'], 'DeepSeek-V3.1': ['DeepSeek-V3.1-Base'], 'DeepSeek-V2': ['DeepSeek-V2'], 'GLM-4.5': ['GLM-4.5-Air-Base', 'GLM-4.5-Base'], 'Llama-4': ['Llama-4'] } const MoEModelSeries = ['Qwen2', 'Qwen1.5'] const strtSymbolSeries = ['Qwen2 (MoE)', 'Qwen1.5 (MoE)', 'DeepSeek-V3.1', 'DeepSeek-V2', 'GLM-4.5', "Llama-4"] // const autoShowSeries = ['Qwen3', 'Llama-3', 'InternLM2.5', 'GLM-4', 'Seed-OSS', 'Gemma-3', 'Hunyuan', 'DeepSeek-V3.1', 'DeepSeek-V2', 'GLM-4.5'] const autoShowSeries = ["*"] // 表头显示名称映射(raw header -> 显示名),可以在此添加或由用户修改 const headerDisplayMap = reactive({ 'rank': 'Rank', 'model_name': 'Model Name', 'model_series': 'Model Series', 'model_size (B)': 'Model Size (B)', 'constant': 'Text Size', 'conditional_entropy': 'Negative Log-Likelihood', 'BF16_TFLOPs': 'FLOPs (G)', 'ic': ' Information Capacity', 'model_source': 'Tested by' }) // 数据集名称显示映射(raw data_name -> 显示名) const dataNameDisplayMap = reactive({ 'data_part_0000': 'Mixed text', 'eng_Latn_000_00027_long': 'FinePDFs-en', 'IndustryCorpus_batch_aa_long': 'Ch-FineWeb-Edu', 'CC-MAIN-2013-20_train-00000-of-00014_long': 'FineWeb-Edu', 'NextCoderDataset_v1_long': 'NextCoder', }) // 默认选择模式为Model const selectedMode = ref('Model') // 可选择的列(包含 rank,如果需要) const selectableColumns = computed(() => { if (!globalState.csvHeaders || globalState.csvHeaders.length === 0) return [] return globalState.csvHeaders.filter(h => !globalState.DEFAULT_HIDDEN.has(h)) }) // 模型类型分组(从映射对象中获取) const modelTypeGroups = computed(() => { return Object.keys(modelTypeMapping) }) // 对 leaderboard 做基于 sortKey/sortDesc 的排序视图(不改变原始 globalState.leaderboard) const sortedLeaderboard = computed(() => { if (!globalState.leaderboard || globalState.leaderboard.length === 0) return [] const key = globalState.sortKey const desc = !!globalState.sortDesc const arr = [...globalState.leaderboard] if (!key || key === '') return arr arr.sort((a, b) => { const va = a[key] const vb = b[key] // null/undefined push to end if (va == null && vb == null) return 0 if (va == null) return 1 if (vb == null) return -1 const na = Number(va) const nb = Number(vb) if (Number.isFinite(na) && Number.isFinite(nb)) { return desc ? (nb - na) : (na - nb) } try { return desc ? String(vb).localeCompare(String(va)) : String(va).localeCompare(String(vb)) } catch (e) { return 0 } }) return arr }) // 根据 selectedDataName 和 selectedModelType 过滤 leaderboard,用于表格渲染 const filteredLeaderboard = computed(() => { if (!globalState.leaderboard || globalState.leaderboard.length === 0) return [] // 从已排序的视图开始过滤 let filtered = sortedLeaderboard.value // 过滤数据集 if (globalState.selectedDataName && globalState.selectedDataName !== 'all') { filtered = filtered.filter(r => String(r['data_name'] ?? '') === String(globalState.selectedDataName)) } // 过滤模型类型(支持多选) // 特殊值 '__none__' 表示用户明确选择了“清除”——此时应返回空结果 const sel = globalState.selectedModelType if (Array.isArray(sel)) { if (sel.includes('__none__')) return [] if (sel.length > 0) { filtered = filtered.filter(r => sel.includes(String(r['model_type'] ?? ''))) } } // 重新分配 rank 基于当前过滤和排序后的顺序 return filtered.map((item, index) => ({ ...item, rank: index + 1 })) }) // 计算每个 model_series 的 IC 平均值,返回数组,元素格式为 { ModelSeries, IC } const modelSeriesICAvg = computed(() => { const rows = globalState.leaderboard || [] const selData = globalState.selectedDataName const selModelTypes = globalState.selectedModelType // 以 modelTypeMapping 的 key 为行(即用户划分好的 ModelSeries)来生成平均值 const out = [] const keys = Object.keys(modelTypeMapping) // 如果用户显式清空选择,直接返回空数组 if (Array.isArray(selModelTypes) && selModelTypes.includes('__none__')) return [] for (const key of keys) { // 当有选中的 model types 时,只处理被选中的那些 key if (Array.isArray(selModelTypes) && selModelTypes.length > 0) { // 如果 selectedModelType 包含元素,但不包含当前 key,则跳过 if (!selModelTypes.includes(key)) continue } const mappedSeries = new Set(modelTypeMapping[key] || []) // 也把 key 自身加入集合(保险) mappedSeries.add(key) // 聚合该 key 下所有匹配 series 的 IC let sum = 0 let count = 0 let constant = 0 let modelSource = '' for (const r of rows) { // Dataset 过滤 if (selData && selData !== 'all') { if (String(r['data_name'] ?? '') !== String(selData)) continue } const seriesName = String(r['model_series'] ?? '').trim() if (!seriesName) continue if (!mappedSeries.has(seriesName)) continue const icRaw = r['ic'] const n = Number(icRaw) if (!Number.isFinite(n)) continue constant = Number(r['constant']) || 0 sum += n count += 1 modelSource = r['model_source'] } if (count === 0) continue const avg = sum / count out.push({ ModelSeries: key, IC: Number(avg.toFixed(4)), Constant: constant ,ModelSource: modelSource }) } // 根据 sortKey 和 sortDesc 进行排序 const key = globalState.sortKey const desc = !!globalState.sortDesc if (key && key !== '') { out.sort((a, b) => { let va, vb if (key === 'ic') { va = a.IC vb = b.IC } else if (key === 'constant') { va = a.Constant vb = b.Constant } else { va = a[key] vb = b[key] } // null/undefined push to end if (va == null && vb == null) return 0 if (va == null) return 1 if (vb == null) return -1 const na = Number(va) const nb = Number(vb) if (Number.isFinite(na) && Number.isFinite(nb)) { return desc ? (nb - na) : (na - nb) } try { return desc ? String(vb).localeCompare(String(va)) : String(va).localeCompare(String(vb)) } catch (e) { return 0 } }) } else { // 默认按 IC 降序排序 out.sort((a, b) => b.IC - a.IC) } return out }) // 点击表头切换排序:循环 降序 -> 升序 function setSortKey(h) { if (!h) return if (globalState.sortKey !== h) { globalState.sortKey = h globalState.sortDesc = true return } // same key, toggle between desc and asc globalState.sortDesc = !globalState.sortDesc } // 计算当前应该显示的列(不含 rank) const displayedColumns = computed(() => { if (!globalState.csvHeaders || globalState.csvHeaders.length === 0) return [] // csvHeaders includes 'rank' at idx 0 console.log('csvHeaders:', globalState.csvHeaders) const all = globalState.csvHeaders return all.filter(h => globalState.visibleColumns.includes(h)) }) // init async function fetchAndLoadCsv(path = globalState.DEFAULT_CSV_PATH) { globalState.loading = true globalState.error = null try { const res = await fetch(path) if (!res.ok) throw new Error(`Failed to fetch CSV (${res.status})`) const txt = await res.text() const { headers, rows } = parseCsvContent(txt) processModelNames(rows) if (!headers || headers.length === 0) { globalState.leaderboard = []; globalState.loading = false; return } // 选择用于排序/显示的分数字段(优先 information_capacity, ic, 然后尝试 numeric-like fields) const scoreKey = headers.find(h => ['information_capacity', 'ic', 'score'].includes(h)) || headers.find(h => /capacity|score|ic/i.test(h)) || headers[0] // 默认以最后一列升序排序(如果不存在则回退到 scoreKey),不直接在 rows 上预排序 const defaultKey = headers.length > 0 ? headers[headers.length - 1] : scoreKey globalState.sortKey = defaultKey || '' globalState.sortDesc = true // 预处理步骤: for (const r of rows) { // 预处理步骤: // 1. 筛选ModelSeries 我们现有的已经指定了一些模型的系列,但是这些系列并不一定完全包含我们的数据,所以,将系列之外的模型新增到key-value映射中,key和value都对应的是model_series名称 const seriesName = String(r['model_series'] ?? '').trim() // console.log('Processing series name:', seriesName, Object.values(modelTypeMapping).flat().includes(seriesName)) if (!Object.values(modelTypeMapping).flat().includes(seriesName)) { modelTypeMapping[seriesName] = [seriesName] } // 2. 模型来源处理,如果Model Name后缀为[[xxxx]],则将来源为xxxx否则为TeleAI,这个来源是新的属性 const name = r['model_name'] || '' const sourceMatch = name.match(/\[\[(.+?)\]\]$/) if (sourceMatch) { r['model_source'] = sourceMatch[1] // 去掉 model_name 末尾的 [[xxxx]] r['model_name'] = name.replace(/\[\[(.+?)\]\]$/, '').trim() } else { r['model_source'] = 'TeleAI' } // 3. 判断模型开头是否在 MoEModelSeries 中,是则在 判断尾部是否为-A{number}B这样的格式 for (const moePrefix of MoEModelSeries) { if (name.startsWith(moePrefix)) { // console.log('Checking MoE model name:', name,name.match(/-A(\d+(?:\.\d+)?)B/)) const moeSuffixMatch = name.match(/-A(.+)B$/) if (moeSuffixMatch) { // 更改 model_series 显示名称 为 moePrefix + ' (MoE)' r['model_series'] = `${moePrefix} (MoE)` // console.log('Detected MoE model, updated series:', r['model_series']) } } } // 4. 根据 model_series 推断 model_type let modelType = '' for (const [type, series] of Object.entries(modelTypeMapping)) { if (series.includes(r['model_series'])) { modelType = type break } } r['model_type'] = modelType // 5. 修改model_series为model_type的值 r['model_series'] = modelType } // 确保关键列按顺序显示 const preferred = ['model_name', 'model_series', 'model_size (B)', 'seq_len', 'uniform_entropy', 'constant', 'conditional_entropy', 'entropy_gain', 'BF16_TFLOPs', 'information_capacity', 'ic','model_source'] const ordered = [] for (const p of preferred) if (headers.includes(p) && !ordered.includes(p)) ordered.push(p) for (const h of headers) if (!ordered.includes(h)) ordered.push(h) globalState.csvHeaders = ['rank', ...ordered, 'model_source'] globalState.leaderboard = rows.map((r, idx) => { const modelType = r['model_type'] // 修改 BF16_TFLOPs:先 /1024 再 *1000 const originalTFLOPs = Number(r['BF16_TFLOPs']) || 0 const modifiedTFLOPs = (originalTFLOPs / 1024) * 1000 return { rank: idx + 1, model_type: modelType, ...r, BF16_TFLOPs: modifiedTFLOPs } }) // console.log('Loaded leaderboard with', globalState.leaderboard.length, 'rows.', globalState.leaderboard ) // 构建 data_name 分组(保持出现顺序,不包含空) const seen = new Set() const groups = [] for (const r of rows) { const dn = r['data_name'] if (dn == null) continue const s = String(dn) if (s.trim() === '') continue if (!seen.has(s)) { seen.add(s); groups.push(s) } } globalState.dataGroups = groups // 构建 model_type 分组 globalState.modelTypeGroups = Object.keys(modelTypeMapping) // 默认显示第一个数据集 if (globalState.dataGroups.length > 0) { globalState.selectedDataName = globalState.dataGroups[0] globalState.selectedDataNameChart = globalState.dataGroups[0] } // 默认模型类型:默认全选(使筛选 UI 初始为所有模型被勾选) if (globalState.modelTypeGroups.length > 0) { globalState.selectedModelType = [...globalState.modelTypeGroups] } // 初始化可见列:默认显示所有可选列(不包含默认隐藏列),包括 rank globalState.visibleColumns = ['rank', ...ordered.filter(h => !globalState.DEFAULT_HIDDEN.has(h)), 'model_source'] // 数字格式化 const numericFloatCols = new Set(['uniform_entropy', 'conditional_entropy', 'entropy_gain', 'information_capacity', 'ic', 'constant', 'BF16_TFLOPs']) const numericIntCols = new Set(['seq_len']) // attach formatter per row for rendering convenience (non-reactive simple values) for (const row of globalState.leaderboard) { row._formatted = {} for (const h of ordered) { const raw = row[h] if (raw == null || raw === '') { row._formatted[h] = ''; continue } if (numericIntCols.has(h)) { const n = Number(raw) row._formatted[h] = Number.isFinite(n) ? String(Math.round(n)) : raw } else if (numericFloatCols.has(h)) { const n = Number(raw) if (h === 'ic') { row._formatted[h] = Number.isFinite(n) ? n.toFixed(4) : raw } else if (h === 'constant') { row._formatted[h] = Number.isFinite(n) ? n.toFixed(2) : raw } else { row._formatted[h] = Number.isFinite(n) ? n.toFixed(3) : raw } } else { row._formatted[h] = raw } } } } catch (e) { console.error(e) globalState.error = e && e.message ? e.message : String(e) } finally { globalState.loading = false } } function selectAll() { // 复制一份可选列到 visibleColumns globalState.visibleColumns = [...selectableColumns.value] } function clearAll() { globalState.visibleColumns = [] } function selectAllModelTypes() { globalState.selectedModelType = [...modelTypeGroups.value] } function clearAllModelTypes() { // 使用特殊标记表示用户显式地清空选择(区别于未选择任何项) globalState.selectedModelType = ['__none__'] } function formatCell(h, model) { if (!model) return '' if (model._formatted && model._formatted[h] !== undefined) return model._formatted[h] return model[h] } // 初始化函数,在组件挂载时调用 function init() { fetchAndLoadCsv() } export function useLeaderboardData() { return { // 状态 leaderboard: computed(() => globalState.leaderboard), csvHeaders: computed(() => globalState.csvHeaders), loading: computed(() => globalState.loading), error: computed(() => globalState.error), visibleColumns: computed({ get: () => globalState.visibleColumns, set: (v) => globalState.visibleColumns = v }), selectedMode: computed({ get: () => selectedMode.value, set: (v) => selectedMode.value = v }), selectableColumns, autoShowSeries, strtSymbolSeries, headerDisplayMap: computed(() => headerDisplayMap), dataNameDisplayMap: computed(() => dataNameDisplayMap), dataGroups: computed(() => globalState.dataGroups), selectedDataName: computed({ get: () => globalState.selectedDataName, set: (v) => globalState.selectedDataName = v }), selectedDataNameChart: computed({ get: () => globalState.selectedDataNameChart, set: (v) => globalState.selectedDataNameChart = v }), modelTypeGroups: computed(() => globalState.modelTypeGroups), selectedModelType: computed({ get: () => globalState.selectedModelType, set: (v) => { // 当用户通过 UI 勾选真实模型类型时,移除 '__none__' 标记 if (Array.isArray(v) && v.some(x => x !== '__none__')) { globalState.selectedModelType = v.filter(x => x !== '__none__') } else { globalState.selectedModelType = v } } }), filteredLeaderboard, displayedColumns, modelSeriesICAvg, // 函数 fetchAndLoadCsv, selectAll, clearAll, selectAllModelTypes, clearAllModelTypes, // 排序相关 sortKey: computed(() => globalState.sortKey), sortDesc: computed(() => globalState.sortDesc), setSortKey, formatCell, init } }