TeleAI-AI-Flow's picture
Upload 32 files
309320b verified
import { ref, computed, reactive } from 'vue'
import { parseCsvContent, processModelNames } from '@/utils/csvUtils.js'
// 全局状态对象,用于在多个组件间共享
const globalState = reactive({
leaderboard: [],
csvHeaders: [],
loading: true,
error: null,
// 排序状态:当前排序列(key)与方向(true = 降序)
sortKey: '',
sortDesc: true,
visibleColumns: [],
dataGroups: [],
selectedDataName: '',
selectedDataNameChart: '',
modelTypeGroups: [],
selectedModelType: [], // 改为数组支持多选
DEFAULT_CSV_PATH: '/filtered.csv',
// 默认隐藏的列
DEFAULT_HIDDEN: new Set(['seq_len', 'uniform_entropy', 'entropy_gain', 'information_capacity', 'data_name', 'model_size (B)'])
})
// 模型类型映射对象:键为模型类型,值为包含的 model_series 数组
const modelTypeMapping = {
'Qwen3': ['Qwen3'],
'Qwen2.5': ['Qwen2.5'],
'Qwen2': ['Qwen2'],
'Qwen1.5': ['Qwen1.5'],
'Llama-3': ['Llama-3.1', 'Llama-3.2'],
'InternLM2.5': ['Internlm2.5'],
'GLM-4': ['GLM-4', 'GLM-4'],
'Seed-OSS': ['Seed-OSS'],
'Gemma-3': ['Gemma-3'],
'Hunyuan': ['Hunyuan'],
'Qwen2 (MoE)': ['Qwen2 (MoE)'],
'Qwen1.5 (MoE)': ['Qwen1.5 (MoE)'],
'DeepSeek-V3.1': ['DeepSeek-V3.1-Base'],
'DeepSeek-V2': ['DeepSeek-V2'],
'GLM-4.5': ['GLM-4.5-Air-Base', 'GLM-4.5-Base'],
'Llama-4': ['Llama-4']
}
const MoEModelSeries = ['Qwen2', 'Qwen1.5']
const strtSymbolSeries = ['Qwen2 (MoE)', 'Qwen1.5 (MoE)', 'DeepSeek-V3.1', 'DeepSeek-V2', 'GLM-4.5', "Llama-4"]
// const autoShowSeries = ['Qwen3', 'Llama-3', 'InternLM2.5', 'GLM-4', 'Seed-OSS', 'Gemma-3', 'Hunyuan', 'DeepSeek-V3.1', 'DeepSeek-V2', 'GLM-4.5']
const autoShowSeries = ["*"]
// 表头显示名称映射(raw header -> 显示名),可以在此添加或由用户修改
const headerDisplayMap = reactive({
'rank': 'Rank',
'model_name': 'Model Name',
'model_series': 'Model Series',
'model_size (B)': 'Model Size (B)',
'constant': 'Text Size',
'conditional_entropy': 'Negative Log-Likelihood',
'BF16_TFLOPs': 'FLOPs (G)',
'ic': ' Information Capacity',
'model_source': 'Tested by'
})
// 数据集名称显示映射(raw data_name -> 显示名)
const dataNameDisplayMap = reactive({
'data_part_0000': 'Mixed text',
'eng_Latn_000_00027_long': 'FinePDFs-en',
'IndustryCorpus_batch_aa_long': 'Ch-FineWeb-Edu',
'CC-MAIN-2013-20_train-00000-of-00014_long': 'FineWeb-Edu',
'NextCoderDataset_v1_long': 'NextCoder',
})
// 默认选择模式为Model
const selectedMode = ref('Model')
// 可选择的列(包含 rank,如果需要)
const selectableColumns = computed(() => {
if (!globalState.csvHeaders || globalState.csvHeaders.length === 0) return []
return globalState.csvHeaders.filter(h => !globalState.DEFAULT_HIDDEN.has(h))
})
// 模型类型分组(从映射对象中获取)
const modelTypeGroups = computed(() => {
return Object.keys(modelTypeMapping)
})
// 对 leaderboard 做基于 sortKey/sortDesc 的排序视图(不改变原始 globalState.leaderboard)
const sortedLeaderboard = computed(() => {
if (!globalState.leaderboard || globalState.leaderboard.length === 0) return []
const key = globalState.sortKey
const desc = !!globalState.sortDesc
const arr = [...globalState.leaderboard]
if (!key || key === '') return arr
arr.sort((a, b) => {
const va = a[key]
const vb = b[key]
// null/undefined push to end
if (va == null && vb == null) return 0
if (va == null) return 1
if (vb == null) return -1
const na = Number(va)
const nb = Number(vb)
if (Number.isFinite(na) && Number.isFinite(nb)) {
return desc ? (nb - na) : (na - nb)
}
try {
return desc ? String(vb).localeCompare(String(va)) : String(va).localeCompare(String(vb))
} catch (e) {
return 0
}
})
return arr
})
// 根据 selectedDataName 和 selectedModelType 过滤 leaderboard,用于表格渲染
const filteredLeaderboard = computed(() => {
if (!globalState.leaderboard || globalState.leaderboard.length === 0) return []
// 从已排序的视图开始过滤
let filtered = sortedLeaderboard.value
// 过滤数据集
if (globalState.selectedDataName && globalState.selectedDataName !== 'all') {
filtered = filtered.filter(r => String(r['data_name'] ?? '') === String(globalState.selectedDataName))
}
// 过滤模型类型(支持多选)
// 特殊值 '__none__' 表示用户明确选择了“清除”——此时应返回空结果
const sel = globalState.selectedModelType
if (Array.isArray(sel)) {
if (sel.includes('__none__')) return []
if (sel.length > 0) {
filtered = filtered.filter(r => sel.includes(String(r['model_type'] ?? '')))
}
}
// 重新分配 rank 基于当前过滤和排序后的顺序
return filtered.map((item, index) => ({ ...item, rank: index + 1 }))
})
// 计算每个 model_series 的 IC 平均值,返回数组,元素格式为 { ModelSeries, IC }
const modelSeriesICAvg = computed(() => {
const rows = globalState.leaderboard || []
const selData = globalState.selectedDataName
const selModelTypes = globalState.selectedModelType
// 以 modelTypeMapping 的 key 为行(即用户划分好的 ModelSeries)来生成平均值
const out = []
const keys = Object.keys(modelTypeMapping)
// 如果用户显式清空选择,直接返回空数组
if (Array.isArray(selModelTypes) && selModelTypes.includes('__none__')) return []
for (const key of keys) {
// 当有选中的 model types 时,只处理被选中的那些 key
if (Array.isArray(selModelTypes) && selModelTypes.length > 0) {
// 如果 selectedModelType 包含元素,但不包含当前 key,则跳过
if (!selModelTypes.includes(key)) continue
}
const mappedSeries = new Set(modelTypeMapping[key] || [])
// 也把 key 自身加入集合(保险)
mappedSeries.add(key)
// 聚合该 key 下所有匹配 series 的 IC
let sum = 0
let count = 0
let constant = 0
let modelSource = ''
for (const r of rows) {
// Dataset 过滤
if (selData && selData !== 'all') {
if (String(r['data_name'] ?? '') !== String(selData)) continue
}
const seriesName = String(r['model_series'] ?? '').trim()
if (!seriesName) continue
if (!mappedSeries.has(seriesName)) continue
const icRaw = r['ic']
const n = Number(icRaw)
if (!Number.isFinite(n)) continue
constant = Number(r['constant']) || 0
sum += n
count += 1
modelSource = r['model_source']
}
if (count === 0) continue
const avg = sum / count
out.push({ ModelSeries: key, IC: Number(avg.toFixed(4)), Constant: constant ,ModelSource: modelSource })
}
// 根据 sortKey 和 sortDesc 进行排序
const key = globalState.sortKey
const desc = !!globalState.sortDesc
if (key && key !== '') {
out.sort((a, b) => {
let va, vb
if (key === 'ic') {
va = a.IC
vb = b.IC
} else if (key === 'constant') {
va = a.Constant
vb = b.Constant
} else {
va = a[key]
vb = b[key]
}
// null/undefined push to end
if (va == null && vb == null) return 0
if (va == null) return 1
if (vb == null) return -1
const na = Number(va)
const nb = Number(vb)
if (Number.isFinite(na) && Number.isFinite(nb)) {
return desc ? (nb - na) : (na - nb)
}
try {
return desc ? String(vb).localeCompare(String(va)) : String(va).localeCompare(String(vb))
} catch (e) {
return 0
}
})
} else {
// 默认按 IC 降序排序
out.sort((a, b) => b.IC - a.IC)
}
return out
})
// 点击表头切换排序:循环 降序 -> 升序
function setSortKey(h) {
if (!h) return
if (globalState.sortKey !== h) {
globalState.sortKey = h
globalState.sortDesc = true
return
}
// same key, toggle between desc and asc
globalState.sortDesc = !globalState.sortDesc
}
// 计算当前应该显示的列(不含 rank)
const displayedColumns = computed(() => {
if (!globalState.csvHeaders || globalState.csvHeaders.length === 0) return []
// csvHeaders includes 'rank' at idx 0
console.log('csvHeaders:', globalState.csvHeaders)
const all = globalState.csvHeaders
return all.filter(h => globalState.visibleColumns.includes(h))
})
// init
async function fetchAndLoadCsv(path = globalState.DEFAULT_CSV_PATH) {
globalState.loading = true
globalState.error = null
try {
const res = await fetch(path)
if (!res.ok) throw new Error(`Failed to fetch CSV (${res.status})`)
const txt = await res.text()
const { headers, rows } = parseCsvContent(txt)
processModelNames(rows)
if (!headers || headers.length === 0) { globalState.leaderboard = []; globalState.loading = false; return }
// 选择用于排序/显示的分数字段(优先 information_capacity, ic, 然后尝试 numeric-like fields)
const scoreKey = headers.find(h => ['information_capacity', 'ic', 'score'].includes(h)) || headers.find(h => /capacity|score|ic/i.test(h)) || headers[0]
// 默认以最后一列升序排序(如果不存在则回退到 scoreKey),不直接在 rows 上预排序
const defaultKey = headers.length > 0 ? headers[headers.length - 1] : scoreKey
globalState.sortKey = defaultKey || ''
globalState.sortDesc = true
// 预处理步骤:
for (const r of rows) {
// 预处理步骤:
// 1. 筛选ModelSeries 我们现有的已经指定了一些模型的系列,但是这些系列并不一定完全包含我们的数据,所以,将系列之外的模型新增到key-value映射中,key和value都对应的是model_series名称
const seriesName = String(r['model_series'] ?? '').trim()
// console.log('Processing series name:', seriesName, Object.values(modelTypeMapping).flat().includes(seriesName))
if (!Object.values(modelTypeMapping).flat().includes(seriesName)) {
modelTypeMapping[seriesName] = [seriesName]
}
// 2. 模型来源处理,如果Model Name后缀为[[xxxx]],则将来源为xxxx否则为TeleAI,这个来源是新的属性
const name = r['model_name'] || ''
const sourceMatch = name.match(/\[\[(.+?)\]\]$/)
if (sourceMatch) {
r['model_source'] = sourceMatch[1]
// 去掉 model_name 末尾的 [[xxxx]]
r['model_name'] = name.replace(/\[\[(.+?)\]\]$/, '').trim()
} else {
r['model_source'] = 'TeleAI'
}
// 3. 判断模型开头是否在 MoEModelSeries 中,是则在 判断尾部是否为-A{number}B这样的格式
for (const moePrefix of MoEModelSeries) {
if (name.startsWith(moePrefix)) {
// console.log('Checking MoE model name:', name,name.match(/-A(\d+(?:\.\d+)?)B/))
const moeSuffixMatch = name.match(/-A(.+)B$/)
if (moeSuffixMatch) {
// 更改 model_series 显示名称 为 moePrefix + ' (MoE)'
r['model_series'] = `${moePrefix} (MoE)`
// console.log('Detected MoE model, updated series:', r['model_series'])
}
}
}
// 4. 根据 model_series 推断 model_type
let modelType = ''
for (const [type, series] of Object.entries(modelTypeMapping)) {
if (series.includes(r['model_series'])) {
modelType = type
break
}
}
r['model_type'] = modelType
// 5. 修改model_series为model_type的值
r['model_series'] = modelType
}
// 确保关键列按顺序显示
const preferred = ['model_name', 'model_series', 'model_size (B)', 'seq_len', 'uniform_entropy', 'constant', 'conditional_entropy', 'entropy_gain', 'BF16_TFLOPs', 'information_capacity', 'ic','model_source']
const ordered = []
for (const p of preferred) if (headers.includes(p) && !ordered.includes(p)) ordered.push(p)
for (const h of headers) if (!ordered.includes(h)) ordered.push(h)
globalState.csvHeaders = ['rank', ...ordered, 'model_source']
globalState.leaderboard = rows.map((r, idx) => {
const modelType = r['model_type']
// 修改 BF16_TFLOPs:先 /1024 再 *1000
const originalTFLOPs = Number(r['BF16_TFLOPs']) || 0
const modifiedTFLOPs = (originalTFLOPs / 1024) * 1000
return { rank: idx + 1, model_type: modelType, ...r, BF16_TFLOPs: modifiedTFLOPs }
})
// console.log('Loaded leaderboard with', globalState.leaderboard.length, 'rows.', globalState.leaderboard )
// 构建 data_name 分组(保持出现顺序,不包含空)
const seen = new Set()
const groups = []
for (const r of rows) {
const dn = r['data_name']
if (dn == null) continue
const s = String(dn)
if (s.trim() === '') continue
if (!seen.has(s)) { seen.add(s); groups.push(s) }
}
globalState.dataGroups = groups
// 构建 model_type 分组
globalState.modelTypeGroups = Object.keys(modelTypeMapping)
// 默认显示第一个数据集
if (globalState.dataGroups.length > 0) {
globalState.selectedDataName = globalState.dataGroups[0]
globalState.selectedDataNameChart = globalState.dataGroups[0]
}
// 默认模型类型:默认全选(使筛选 UI 初始为所有模型被勾选)
if (globalState.modelTypeGroups.length > 0) {
globalState.selectedModelType = [...globalState.modelTypeGroups]
}
// 初始化可见列:默认显示所有可选列(不包含默认隐藏列),包括 rank
globalState.visibleColumns = ['rank', ...ordered.filter(h => !globalState.DEFAULT_HIDDEN.has(h)), 'model_source']
// 数字格式化
const numericFloatCols = new Set(['uniform_entropy', 'conditional_entropy', 'entropy_gain', 'information_capacity', 'ic', 'constant', 'BF16_TFLOPs'])
const numericIntCols = new Set(['seq_len'])
// attach formatter per row for rendering convenience (non-reactive simple values)
for (const row of globalState.leaderboard) {
row._formatted = {}
for (const h of ordered) {
const raw = row[h]
if (raw == null || raw === '') { row._formatted[h] = ''; continue }
if (numericIntCols.has(h)) {
const n = Number(raw)
row._formatted[h] = Number.isFinite(n) ? String(Math.round(n)) : raw
} else if (numericFloatCols.has(h)) {
const n = Number(raw)
if (h === 'ic') {
row._formatted[h] = Number.isFinite(n) ? n.toFixed(4) : raw
}
else if (h === 'constant') {
row._formatted[h] = Number.isFinite(n) ? n.toFixed(2) : raw
}
else {
row._formatted[h] = Number.isFinite(n) ? n.toFixed(3) : raw
}
} else {
row._formatted[h] = raw
}
}
}
} catch (e) {
console.error(e)
globalState.error = e && e.message ? e.message : String(e)
} finally {
globalState.loading = false
}
}
function selectAll() {
// 复制一份可选列到 visibleColumns
globalState.visibleColumns = [...selectableColumns.value]
}
function clearAll() {
globalState.visibleColumns = []
}
function selectAllModelTypes() {
globalState.selectedModelType = [...modelTypeGroups.value]
}
function clearAllModelTypes() {
// 使用特殊标记表示用户显式地清空选择(区别于未选择任何项)
globalState.selectedModelType = ['__none__']
}
function formatCell(h, model) {
if (!model) return ''
if (model._formatted && model._formatted[h] !== undefined) return model._formatted[h]
return model[h]
}
// 初始化函数,在组件挂载时调用
function init() {
fetchAndLoadCsv()
}
export function useLeaderboardData() {
return {
// 状态
leaderboard: computed(() => globalState.leaderboard),
csvHeaders: computed(() => globalState.csvHeaders),
loading: computed(() => globalState.loading),
error: computed(() => globalState.error),
visibleColumns: computed({
get: () => globalState.visibleColumns,
set: (v) => globalState.visibleColumns = v
}),
selectedMode: computed({
get: () => selectedMode.value,
set: (v) => selectedMode.value = v
}),
selectableColumns,
autoShowSeries,
strtSymbolSeries,
headerDisplayMap: computed(() => headerDisplayMap),
dataNameDisplayMap: computed(() => dataNameDisplayMap),
dataGroups: computed(() => globalState.dataGroups),
selectedDataName: computed({
get: () => globalState.selectedDataName,
set: (v) => globalState.selectedDataName = v
}),
selectedDataNameChart: computed({
get: () => globalState.selectedDataNameChart,
set: (v) => globalState.selectedDataNameChart = v
}),
modelTypeGroups: computed(() => globalState.modelTypeGroups),
selectedModelType: computed({
get: () => globalState.selectedModelType,
set: (v) => {
// 当用户通过 UI 勾选真实模型类型时,移除 '__none__' 标记
if (Array.isArray(v) && v.some(x => x !== '__none__')) {
globalState.selectedModelType = v.filter(x => x !== '__none__')
} else {
globalState.selectedModelType = v
}
}
}),
filteredLeaderboard,
displayedColumns,
modelSeriesICAvg,
// 函数
fetchAndLoadCsv,
selectAll,
clearAll,
selectAllModelTypes,
clearAllModelTypes,
// 排序相关
sortKey: computed(() => globalState.sortKey),
sortDesc: computed(() => globalState.sortDesc),
setSortKey,
formatCell,
init
}
}