| import Papa from 'papaparse'; |
| import { DataTable } from 'simple-datatables'; |
|
|
| const languageMap = { |
| 'All Languages': 'final_rankings.csv', |
| 'Arabic': 'results_ar.csv', |
| 'Turkish': 'results_tr.csv', |
| 'Swahili': 'results_sw.csv', |
| 'Russian': 'results_ru.csv', |
| 'Telugu': 'results_te.csv', |
| 'Thai': 'results_th.csv', |
| 'Chinese': 'results_zh.csv', |
| 'French': 'results_fr.csv', |
| 'Hindi': 'results_hi.csv', |
| }; |
|
|
| const versionMap = { |
| 'v1': 'v1', |
| 'v2': 'v2' |
| }; |
|
|
| const versionChangelog = { |
| 'v1': 'Initial release of FineTasks Leaderboard', |
| 'v2': 'Changes in v2:\n' + |
| '• Fixed a bug in the rescaling of scores\n' + |
| '• Switched to using Native choice prefixes for Thai/Telugu/Hindi/Arabics\n' + |
| '• Added Options: anchors before showing options for continuation tasks (e.g Hellawag) - consistent improvement in scores\n' + |
| '• Removed openai/gpt-4o-mini' |
| }; |
|
|
| const columnNameMap = { |
| 'runname': 'Model', |
| 'agg_score_macro': 'Score', |
| 'agg_score_RES': 'RES Score', |
| 'agg_score_RC': 'RC Score', |
| 'agg_score_GK': 'GK Score', |
| 'agg_score_NLU': 'NLU Score', |
| 'avg_rank_macro': 'Multilingual Score', |
| 'rank': 'Rank' |
| }; |
|
|
| function createDropdown(options, onChange, initialValue = null) { |
| const select = document.createElement('select'); |
| options.forEach(option => { |
| const optionElement = document.createElement('option'); |
| optionElement.value = option; |
| optionElement.textContent = option; |
| if (initialValue && option === initialValue) { |
| optionElement.selected = true; |
| } |
| select.appendChild(optionElement); |
| }); |
| select.addEventListener('change', onChange); |
| return select; |
| } |
|
|
| function processTaskName(taskName) { |
| const parts = taskName.split('|'); |
| let processedName = parts.length > 1 ? parts[1] : taskName; |
| processedName = processedName.split('_mcf')[0].split('_cf')[0]; |
| return processedName; |
| } |
|
|
| function sanitizeColumnName(name) { |
| return name.replace(/[^a-zA-Z0-9-_]/g, '_'); |
| } |
|
|
| function createResultsTable(data, extraColumn) { |
| const tableWrapper = document.createElement('div'); |
| tableWrapper.className = 'table-wrapper leaderboard-table-wrapper'; |
|
|
| const table = document.createElement('table'); |
| table.className = 'results-table leaderboard-results-table'; |
|
|
| const columns = extraColumn === 'All Languages' |
| ? ['rank', 'runname', 'avg_rank_macro'] |
| : ['rank', 'runname', 'agg_score_macro', extraColumn].filter(Boolean); |
|
|
| const header = table.createTHead(); |
| const headerRow = header.insertRow(); |
| columns.forEach(column => { |
| const th = document.createElement('th'); |
| th.textContent = columnNameMap[column] || processTaskName(column); |
| th.className = `column-${sanitizeColumnName(column)}`; |
| headerRow.appendChild(th); |
| }); |
|
|
| const body = table.createTBody(); |
| data.forEach((row, index) => { |
| if (!row.runname) return; |
| const tr = body.insertRow(); |
| |
| |
| if (index < 3) { |
| const opacity = 1 - (index * 0.25); |
| tr.style.backgroundColor = `rgba(255, 165, 0, ${opacity * 0.2})`; |
| tr.style.fontWeight = 600; |
| } |
|
|
| columns.forEach(column => { |
| const td = tr.insertCell(); |
| td.className = `column-${sanitizeColumnName(column)}`; |
| |
| if (column === 'rank') { |
| td.textContent = index + 1; |
| |
| if (index < 3) { |
| td.style.fontWeight = 'bold'; |
| switch(index) { |
| case 0: |
| td.style.color = '#FFB800'; |
| break; |
| case 1: |
| td.style.color = '#C0C0C0'; |
| break; |
| case 2: |
| td.style.color = '#CD7F32'; |
| break; |
| } |
| } |
| } else if (column === 'runname') { |
| const modelName = row[column]; |
| let displayName; |
| |
| |
| const chatModels = [ |
| 'CohereForAI/c4ai-command-r-plus-08-2024', |
| 'openai/gpt-4o-mini', |
| 'silma-ai/SILMA-9B-Instruct-v1.0', |
| 'microsoft/Phi-3.5-mini-instruct', |
| 'TURKCELL/Turkcell-LLM-7b-v1' |
| ]; |
| |
| if (chatModels.some(chatModel => modelName.includes(chatModel))) { |
| displayName = `💬 ${modelName}`; |
| } else { |
| displayName = `🟢 ${modelName}`; |
| } |
|
|
| if (modelName.split("/")[0] !== "openai") |
| displayName = `<a href="https://huggingface.co/${modelName}">${displayName}</a>`; |
| td.innerHTML = displayName; |
| td.title = modelName; |
| td.style.cursor = 'help'; |
| } else { |
| const value = row[column]; |
| td.textContent = typeof value === 'number' ? value.toFixed(2) : value; |
| } |
| }); |
| }); |
|
|
| tableWrapper.appendChild(table); |
| return tableWrapper; |
| } |
|
|
| function createChangelog() { |
| const changelogContainer = document.createElement('div'); |
| changelogContainer.className = 'changelog-container'; |
|
|
| const changelogHeader = document.createElement('div'); |
| changelogHeader.className = 'changelog-header'; |
| |
| const arrow = document.createElement('span'); |
| arrow.className = 'changelog-arrow'; |
| arrow.textContent = '▶'; |
| |
| const label = document.createElement('span'); |
| label.textContent = 'Changelog'; |
| label.className = 'changelog-label'; |
|
|
| const content = document.createElement('div'); |
| content.className = 'changelog-content'; |
| content.style.display = 'none'; |
|
|
| changelogHeader.appendChild(arrow); |
| changelogHeader.appendChild(label); |
| changelogContainer.appendChild(changelogHeader); |
| changelogContainer.appendChild(content); |
|
|
| |
| changelogHeader.addEventListener('click', () => { |
| const isVisible = content.style.display !== 'none'; |
| content.style.display = isVisible ? 'none' : 'block'; |
| arrow.textContent = isVisible ? '▶' : '▼'; |
| }); |
|
|
| return { container: changelogContainer, content }; |
| } |
|
|
| export function initLeaderboardResults(containerId) { |
| const container = document.getElementById(containerId); |
| if (!container) return; |
|
|
| const titleElement = document.createElement('h3'); |
| titleElement.textContent = 'FineTasks Leaderboard'; |
| titleElement.className = 'leaderboard-title'; |
|
|
| |
| const { container: changelogContainer, content: changelogContent } = createChangelog(); |
|
|
| const tableContainer = document.createElement('div'); |
| tableContainer.className = 'table-container'; |
| |
| let leaderboardDataTable; |
| let currentData = []; |
|
|
| |
| const captionElement = document.createElement('figcaption'); |
| captionElement.className = 'table-caption'; |
| captionElement.textContent = container.dataset.caption || ''; |
|
|
| |
| async function updateLanguageTable() { |
| const selectedVersion = versionDropdown.value; |
| const selectedLanguage = languageDropdown.value; |
| const csvFile = languageMap[selectedLanguage]; |
|
|
| try { |
| const response = await fetch(`data/os_models/${selectedVersion}/${csvFile}`); |
| if (!response.ok) { |
| throw new Error(`HTTP error! status: ${response.status}`); |
| } |
| const csvText = await response.text(); |
| const results = Papa.parse(csvText, { header: true, dynamicTyping: true }).data; |
| currentData = selectedLanguage === 'All Languages' |
| ? results.sort((a, b) => a.avg_rank_macro - b.avg_rank_macro) |
| : results.sort((a, b) => b.agg_score_macro - a.agg_score_macro); |
|
|
| if (selectedLanguage !== 'All Languages') { |
| const columnOptions = ['None'].concat(Object.keys(currentData[0]).filter(key => |
| !['runname', 'seed', 'steps', 'agg_score_micro', 'rank', 'avg_rank_macro', ''].includes(key) |
| )); |
| extraColumnDropdown.innerHTML = ''; |
| columnOptions.forEach(option => { |
| const optionElement = document.createElement('option'); |
| optionElement.value = option; |
| optionElement.textContent = option === 'None' ? 'None' : processTaskName(option); |
| extraColumnDropdown.appendChild(optionElement); |
| }); |
| |
| extraColumnDropdown.value = 'None'; |
| extraColumnLabel.style.display = 'inline'; |
| extraColumnDropdown.style.display = 'inline'; |
| } else { |
| extraColumnLabel.style.display = 'none'; |
| extraColumnDropdown.style.display = 'none'; |
| } |
|
|
| updateTable(); |
| updateChangelog(); |
| } catch (error) { |
| console.error('Error fetching CSV:', error); |
| tableContainer.innerHTML = `<p>Error loading data: ${error.message}</p>`; |
| } |
| } |
|
|
| function updateTable() { |
| const extraColumn = languageDropdown.value === 'All Languages' ? 'All Languages' : |
| (extraColumnDropdown.value === 'None' ? null : extraColumnDropdown.value); |
| |
| tableContainer.innerHTML = ''; |
| const tableWrapper = createResultsTable(currentData, extraColumn); |
| tableContainer.appendChild(tableWrapper); |
|
|
| if (leaderboardDataTable) { |
| leaderboardDataTable.destroy(); |
| } |
|
|
| leaderboardDataTable = new DataTable('.leaderboard-results-table', { |
| perPage: 10, |
| perPageSelect: false, |
| searchable: false, |
| sortable: true, |
| fixedHeight: true, |
| labels: { |
| info: '' |
| } |
| }); |
|
|
| setTimeout(adjustColumnWidths, 0); |
| } |
|
|
| function updateChangelog() { |
| const selectedVersion = versionDropdown.value; |
| changelogContent.textContent = versionChangelog[selectedVersion]; |
| } |
|
|
| |
| function getUrlParameter(name) { |
| const urlParams = new URLSearchParams(window.location.search); |
| return urlParams.get(name); |
| } |
|
|
| |
| function updateUrlParameter(key, value) { |
| const urlParams = new URLSearchParams(window.location.search); |
| if (value) { |
| urlParams.set(key, value); |
| } else { |
| urlParams.delete(key); |
| } |
| const newUrl = `${window.location.pathname}${urlParams.toString() ? '?' + urlParams.toString() : ''}`; |
| window.history.pushState({ path: newUrl }, '', newUrl); |
| } |
|
|
| |
| const urlLanguage = getUrlParameter('language'); |
| const initialLanguage = urlLanguage && Object.keys(languageMap).includes(urlLanguage) |
| ? urlLanguage |
| : 'All Languages'; |
|
|
| |
| const languageLabel = document.createElement('label'); |
| languageLabel.textContent = 'Language: '; |
| const languageDropdown = createDropdown( |
| Object.keys(languageMap), |
| (e) => { |
| updateLanguageTable(); |
| updateUrlParameter('language', e.target.value === 'All Languages' ? null : e.target.value); |
| }, |
| initialLanguage |
| ); |
|
|
| const extraColumnLabel = document.createElement('label'); |
| extraColumnLabel.textContent = 'Task: '; |
| const extraColumnDropdown = createDropdown(['None'], updateTable); |
|
|
| const versionLabel = document.createElement('label'); |
| versionLabel.textContent = 'Version: '; |
| const versionDropdown = createDropdown(Object.keys(versionMap), updateLanguageTable); |
|
|
| |
| const controls = document.createElement('div'); |
| controls.className = 'controls leaderboard-controls fine-tasks-controls'; |
|
|
| const versionControlGroup = document.createElement('div'); |
| versionControlGroup.className = 'control-group'; |
| versionControlGroup.appendChild(versionLabel); |
| versionControlGroup.appendChild(versionDropdown); |
|
|
| const languageControlGroup = document.createElement('div'); |
| languageControlGroup.className = 'control-group'; |
| languageControlGroup.appendChild(languageLabel); |
| languageControlGroup.appendChild(languageDropdown); |
|
|
| const extraColumnControlGroup = document.createElement('div'); |
| extraColumnControlGroup.className = 'control-group'; |
| extraColumnControlGroup.appendChild(extraColumnLabel); |
| extraColumnControlGroup.appendChild(extraColumnDropdown); |
|
|
| controls.appendChild(versionControlGroup); |
| controls.appendChild(languageControlGroup); |
| controls.appendChild(extraColumnControlGroup); |
|
|
| |
| container.appendChild(titleElement); |
| container.appendChild(tableContainer); |
| container.appendChild(captionElement); |
| container.appendChild(controls); |
| container.appendChild(changelogContainer); |
|
|
| |
| versionDropdown.value = 'v2'; |
| languageDropdown.value = initialLanguage; |
| updateLanguageTable(); |
| } |
|
|
| function adjustColumnWidths() { |
| const table = document.querySelector('.leaderboard-results-table'); |
| if (!table) return; |
|
|
| const columns = table.querySelectorAll('th'); |
| columns.forEach((column, index) => { |
| const columnClass = column.className; |
| const cells = table.querySelectorAll(`td.${columnClass}`); |
| let maxWidth = column.offsetWidth; |
| cells.forEach(cell => { |
| maxWidth = Math.max(maxWidth, cell.offsetWidth); |
| }); |
|
|
| let adjustedWidth; |
| if (index === 0) { |
| adjustedWidth = 50; |
| } else if (index === 1) { |
| adjustedWidth = 200; |
| } else if (index === 2) { |
| adjustedWidth = 100; |
| } else { |
| adjustedWidth = Math.min(maxWidth, 150); |
| } |
|
|
| column.style.width = `${adjustedWidth}px`; |
| cells.forEach(cell => { |
| cell.style.width = `${adjustedWidth}px`; |
| }); |
| }); |
| } |
|
|