Spaces:
Runtime error
Runtime error
| // Global variables | |
| let rawData; | |
| let chart = null; | |
| let lineGraph = null; | |
| let taskScores_save = null; | |
| let currentTask = 'Avg'; | |
| const sortby_options = { | |
| BY_REWARD_SCORE: "sort-by-reward-score", | |
| BY_SUCCESS_RATE: "sort-by-success-rate", | |
| BY_GROUNDING_ACC: "sort-by-grounding-acc", | |
| }; | |
| let cur_sortby_option = sortby_options.BY_REWARD_SCORE; | |
| const taskSubtaskMapping = { | |
| 'Avg': ['AlfWorld', 'ScienceWorld', 'BabyAI', 'PDDL', 'Jericho', 'WebShop', 'WebArena', 'Tool-Query', 'Tool-Operation'], | |
| 'Embodied': ['AlfWorld', 'ScienceWorld', 'BabyAI'], | |
| 'Game': ['PDDL', 'Jericho'], | |
| 'Web': ['WebShop', 'WebArena'], | |
| 'Tools': ['Tool-Query', 'Tool-Operation'], | |
| }; | |
| const SubtaskNameMapping = { | |
| 'AlfWorld': 'ALF', | |
| 'ScienceWorld': 'SW', | |
| 'BabyAI': 'BA', | |
| 'PDDL': 'PL', | |
| 'Jericho': 'JC', | |
| 'WebShop': 'WS', | |
| 'WebArena': 'WA', | |
| 'Tool-Query': 'T-Q', | |
| 'Tool-Operation': 'T-O', | |
| }; | |
| const modelColors = {}; | |
| const borderStyles = {}; | |
| const colors = [ | |
| 'rgba(255, 99, 132, 1)', | |
| 'rgba(54, 162, 235, 1)', | |
| 'rgba(255, 206, 86, 1)', | |
| 'rgba(75, 192, 192, 1)', | |
| 'rgba(153, 102, 255, 1)', | |
| 'rgba(255, 159, 64, 1)', | |
| 'rgba(199, 199, 199, 1)', | |
| 'rgba(83, 102, 255, 1)', | |
| 'rgba(40, 159, 64, 1)', | |
| 'rgba(143, 162, 235, 1)', | |
| 'rgba(255, 99, 75, 1)', | |
| 'rgba(71 ,150 ,87, 1)', | |
| 'rgba(210 ,102 ,95, 1)', | |
| 'rgba(51 ,47 ,180, 1)', | |
| ]; | |
| const borders = [ | |
| { | |
| borderWidth: 2, | |
| borderDash: [], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [5, 5], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [10, 5], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [2, 2], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [8, 4], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [5, 10], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [15, 5], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [5, 15], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [10, 10], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [11, 4], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [10, 2], | |
| }, | |
| { | |
| borderWidth: 2, | |
| borderDash: [4, 8], | |
| }, { | |
| borderWidth: 2, | |
| borderDash: [3, 5], | |
| }, { | |
| borderWidth: 2, | |
| borderDash: [5, 3], | |
| }, | |
| ]; | |
| function generateModelColorsAndStyles(models) { | |
| for (let i = 0; i < models.length; i++) { | |
| const model = models[i]; | |
| modelColors[model] = colors[i % colors.length]; | |
| borderStyles[model] = borders[i % borders.length]; | |
| } | |
| } | |
| function getScoresForTask(rawData, task) { | |
| console.log("Current task:", task); | |
| return rawData.map(model => { | |
| if (model.tasks[task]) { | |
| return { | |
| model: model.model, | |
| score: parseFloat(model.tasks[task].score), | |
| accuracy: parseFloat(model.tasks[task].accuracy), | |
| grounding: parseFloat(model.tasks[task].grounding) | |
| }; | |
| } else { | |
| console.error("Task not found:", task); | |
| return null; | |
| } | |
| }).filter(item => item !== null); | |
| } | |
| let selectedModelIndexInBar = null; | |
| let selectedModelIndexInLine = null; | |
| function createMainResultChart() { | |
| const taskScores = getScoresForTask(rawData, currentTask); | |
| if (cur_sortby_option === sortby_options.BY_REWARD_SCORE) { | |
| taskScores.sort((a, b) => b.score - a.score); | |
| } else if (cur_sortby_option === sortby_options.BY_SUCCESS_RATE) { | |
| taskScores.sort((a, b) => b.accuracy - a.accuracy); | |
| } else if (cur_sortby_option === sortby_options.BY_GROUNDING_ACC) { | |
| taskScores.sort((a, b) => b.grounding - a.grounding); | |
| } | |
| taskScores_save = taskScores | |
| const labels = taskScores.map(item => item.model); | |
| const scores = taskScores.map(item => item.score); | |
| const accuracies = taskScores.map(item => item.accuracy); | |
| const groundings = taskScores.map(item => item.grounding); | |
| if (chart) { | |
| chart.destroy(); | |
| } | |
| const ctx = document.getElementById('chart-success-reward-rate'); | |
| chart = new Chart(ctx, { | |
| plugins: [ChartDataLabels], | |
| type: 'bar', | |
| data: { | |
| labels: labels, | |
| datasets: [ | |
| { | |
| label: 'Progress Rate', | |
| data: scores, | |
| backgroundColor: '#f398ae', | |
| }, | |
| { | |
| label: 'Success Rate', | |
| data: accuracies, | |
| backgroundColor: '#78b5f1', | |
| }, | |
| { | |
| label: 'Grounding Accuracy', | |
| data: groundings, | |
| backgroundColor: '#f3e276', | |
| } | |
| ] | |
| }, | |
| options: { | |
| responsive: true, | |
| maintainAspectRatio: false, | |
| interaction: { | |
| mode: 'index', | |
| }, | |
| indexAxis: 'y', | |
| scales: { | |
| x: { | |
| ticks: { | |
| beginAtZero: true, | |
| min: 0, | |
| max: 100, | |
| font: { | |
| size: 12, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| } | |
| }, | |
| grace: 10, | |
| title: { | |
| display: true, | |
| text: 'Value (%)', | |
| font: { | |
| size: 14, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| } | |
| } | |
| }, | |
| y: { | |
| ticks: { | |
| font: { | |
| size: 12, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| } | |
| }, | |
| title: { | |
| display: true, | |
| text: 'Model', | |
| font: { | |
| size: 14, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| } | |
| } | |
| } | |
| }, | |
| plugins: { | |
| legend: { | |
| display: true, | |
| labels: { | |
| usePointStyle: true, | |
| font: { | |
| size: 10, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| } | |
| }, | |
| align: 'center', | |
| position: 'bottom' | |
| }, | |
| tooltip: { | |
| callbacks: { | |
| label: function (context) { | |
| let label = context.dataset.label || ''; | |
| if (label) { | |
| label += ': '; | |
| } | |
| label += context.formattedValue; | |
| return label; | |
| } | |
| } | |
| }, | |
| datalabels: { | |
| align: 'end', | |
| anchor: 'end', | |
| color: 'black', | |
| padding: 0, | |
| formatter: function (value, context) { | |
| return value.toFixed(2); | |
| }, | |
| font: function (context) { | |
| var width = context.chart.width; | |
| var size = Math.round(width / 48); | |
| size = Math.min(size, 10); | |
| return { | |
| size: size, | |
| family: "'Noto Sans', sans-serif", | |
| }; | |
| } | |
| } | |
| } | |
| } | |
| }); | |
| const subTaskLabels = taskSubtaskMapping[currentTask] || []; | |
| if (lineGraph) { | |
| lineGraph.destroy(); | |
| } | |
| let datasets = []; | |
| let yAxisTitle = ''; | |
| const lineGraphCtx = document.getElementById('line-graph').getContext('2d'); | |
| if (cur_sortby_option === sortby_options.BY_SUCCESS_RATE) { | |
| datasets = rawData.map(modelData => { | |
| return { | |
| label: modelData.model, | |
| data: subTaskLabels.map(subtask => modelData.tasks[subtask] ? | |
| parseFloat(modelData.tasks[subtask].accuracy) : null), | |
| borderColor: modelColors[modelData.model] || '#4CAF50', | |
| fill: false, | |
| ...borderStyles[modelData.model] | |
| }; | |
| }); | |
| yAxisTitle = 'Success Rate (%)'; | |
| } else if (cur_sortby_option === sortby_options.BY_REWARD_SCORE) { | |
| datasets = rawData.map(modelData => { | |
| return { | |
| label: modelData.model, | |
| data: subTaskLabels.map(subtask => modelData.tasks[subtask] ? | |
| parseFloat(modelData.tasks[subtask].score) : null), | |
| borderColor: modelColors[modelData.model] || '#4CAF50', | |
| fill: false, | |
| ...borderStyles[modelData.model] | |
| }; | |
| }); | |
| yAxisTitle = 'Progress Rate (%)'; | |
| } else if (cur_sortby_option === sortby_options.BY_GROUNDING_ACC) { | |
| datasets = rawData.map(modelData => { | |
| return { | |
| label: modelData.model, | |
| data: subTaskLabels.map(subtask => modelData.tasks[subtask] ? | |
| parseFloat(modelData.tasks[subtask].grounding) : null), | |
| borderColor: modelColors[modelData.model] || '#4CAF50', | |
| fill: false, | |
| ...borderStyles[modelData.model] | |
| }; | |
| }); | |
| yAxisTitle = 'Grounding accuracy (%)'; | |
| } | |
| lineGraph = new Chart(lineGraphCtx, { | |
| type: 'line', | |
| data: { | |
| labels: subTaskLabels, | |
| datasets: datasets | |
| }, | |
| options: { | |
| responsive: true, | |
| maintainAspectRatio: false, | |
| scales: { | |
| x: { | |
| ticks: { | |
| font: { | |
| size: 12, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| } | |
| }, | |
| title: { | |
| display: true, | |
| text: 'Sub-Task', | |
| font: { | |
| size: 14, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| }, | |
| }, | |
| }, | |
| y: { | |
| ticks: { | |
| font: { | |
| size: 12, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| } | |
| }, | |
| title: { | |
| display: true, | |
| text: yAxisTitle, | |
| font: { | |
| size: 14, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| }, | |
| }, | |
| min: 0, | |
| max: 100, | |
| }, | |
| }, | |
| plugins: { | |
| legend: { | |
| display: true, | |
| labels: { | |
| usePointStyle: true, | |
| font: { | |
| size: 10, | |
| family: "'Noto Sans', sans-serif", | |
| weight: 'bold' | |
| }, | |
| }, | |
| align: 'center', | |
| position: 'bottom', | |
| }, | |
| annotation: { | |
| annotations: [] | |
| } | |
| } | |
| } | |
| }); | |
| if (ctx) { | |
| ctx.addEventListener('mousemove', function (event) { | |
| const activePoints = chart.getElementsAtEventForMode(event, 'nearest', {intersect: true}, true); | |
| if (activePoints.length > 0) { | |
| const selectedIndex = activePoints[0].index; | |
| if (selectedIndex !== selectedModelIndexInBar) { | |
| selectedModelIndexInBar = selectedIndex; | |
| selectedModelIndexInLine = highlightModel(taskScores_save.map(item => item.model), selectedModelIndexInBar); | |
| // updateLineGraphScale(taskScores_save, selectedModelIndexInLine); | |
| } | |
| } else { | |
| selectedModelIndexInBar = null; | |
| removeHighlight(); | |
| // resetLineGraphScale(); | |
| } | |
| }); | |
| } | |
| } | |
| document.querySelectorAll('.btn-group.task-filter-selector .btn').forEach(btn => { | |
| btn.addEventListener('click', () => { | |
| currentTask = btn.id.replace('filter-by-', ''); | |
| document.querySelectorAll('.btn-group.task-filter-selector .btn.active').forEach(active => { | |
| active.classList.remove('active'); | |
| }); | |
| btn.classList.add('active'); | |
| createMainResultChart(); | |
| }); | |
| }); | |
| function highlightModel(labels, index) { | |
| let highlightedDatasetIndex = -1; | |
| const modelName = labels[index]; | |
| lineGraph.data.datasets.forEach((dataset, datasetIndex) => { | |
| if (dataset.label === modelName) { | |
| dataset.borderColor = '#000000'; | |
| dataset.borderWidth = 4; | |
| highlightedDatasetIndex = datasetIndex | |
| } else { | |
| dataset.borderColor = modelColors[dataset.label]; | |
| dataset.borderWidth = 2; | |
| } | |
| }); | |
| lineGraph.options.plugins.annotation.annotations = createAnnotations(modelName); | |
| lineGraph.update(); | |
| return highlightedDatasetIndex | |
| } | |
| function removeHighlight() { | |
| lineGraph.data.datasets.forEach((dataset) => { | |
| dataset.borderColor = modelColors[dataset.label]; | |
| dataset.borderWidth = 2; | |
| }); | |
| lineGraph.options.plugins.annotation.annotations = []; | |
| lineGraph.update(); | |
| } | |
| function createAnnotations(modelName) { | |
| return taskSubtaskMapping[currentTask].map(subtask => { | |
| const modelData = rawData.find(data => data.model === modelName); | |
| if (!modelData || !modelData.tasks || !modelData.tasks[subtask]) { | |
| return null; | |
| } | |
| let content = `${modelData.model} (success rate):\n`; | |
| if (cur_sortby_option === sortby_options.BY_REWARD_SCORE) { | |
| content = `${modelData.model} (progress rate):\n` | |
| } else if (cur_sortby_option === sortby_options.BY_GROUNDING_ACC) { | |
| content = `${modelData.model} (grounding acc):\n` | |
| } | |
| let focus_score = null | |
| let avg_content = `${modelData.model}(%):\n` | |
| let minYValue = 100; | |
| let maxYValue = 0; | |
| taskSubtaskMapping[currentTask].forEach(subtask => { | |
| if (modelData.tasks && modelData.tasks[subtask] && cur_sortby_option === sortby_options.BY_SUCCESS_RATE) { | |
| focus_score = parseFloat(modelData.tasks[subtask].accuracy); | |
| } else if (modelData.tasks && modelData.tasks[subtask] && cur_sortby_option === sortby_options.BY_REWARD_SCORE) { | |
| focus_score = parseFloat(modelData.tasks[subtask].score); | |
| } else if (modelData.tasks && modelData.tasks[subtask] && cur_sortby_option === sortby_options.BY_GROUNDING_ACC) { | |
| focus_score = parseFloat(modelData.tasks[subtask].grounding); | |
| } | |
| minYValue = Math.min(minYValue, focus_score); | |
| maxYValue = Math.max(maxYValue, focus_score); | |
| if (maxYValue === 0) { | |
| maxYValue = 0.6 | |
| } | |
| if (currentTask === 'Avg') { | |
| subtask = SubtaskNameMapping[subtask] | |
| content = avg_content + `${subtask}: ${focus_score.toFixed(1)}\n`; | |
| avg_content += `${subtask}: ${focus_score.toFixed(1)}\n` | |
| } else { | |
| content += `${subtask}: ${focus_score.toFixed(1)}%\n`; | |
| } | |
| }); | |
| return { | |
| type: 'label', | |
| content: content, | |
| xValue: (taskSubtaskMapping[currentTask].length - 1) / 2, | |
| yValue: (maxYValue + minYValue) / 2, | |
| backgroundColor: 'rgba(255,255,255,0.8)', | |
| font: { | |
| size: 9, | |
| weight: 'bold', | |
| color: 'black', | |
| family: "'Noto Sans', sans-serif" | |
| }, | |
| xPadding: 6, | |
| yPadding: 6, | |
| borderColor: 'black', | |
| borderWidth: 1, | |
| borderRadius: 6, | |
| position: 'center', | |
| adjustScaleRange: true | |
| }; | |
| }).filter(annotation => annotation !== null); | |
| } | |
| function updateLineGraphScale(labels, selectedIndex) { | |
| // const modelName = [index]; | |
| const dataset = lineGraph.data.datasets[selectedIndex]; | |
| const maxValue = Math.max(...dataset.data); | |
| const minValue = Math.min(...dataset.data.filter(v => v !== null)); | |
| const range = maxValue - minValue; | |
| let buffer = null; | |
| if (range > 2) { | |
| buffer = 0.5 | |
| } else { | |
| buffer = 1.5; | |
| } | |
| lineGraph.options.scales.y.min = Math.max(0, minValue - buffer); | |
| lineGraph.options.scales.y.max = maxValue + buffer; | |
| lineGraph.update(); | |
| } | |
| function resetLineGraphScale() { | |
| lineGraph.options.scales.y.min = 0; | |
| lineGraph.options.scales.y.max = 100; | |
| lineGraph.update(); | |
| } | |
| Object.values(sortby_options).forEach(sortby_option => { | |
| const btn = document.getElementById(sortby_option); | |
| btn.addEventListener('click', () => { | |
| document.querySelectorAll('.btn-group.metric-filter-selector .btn.active').forEach(active => { | |
| active.classList.remove('active'); | |
| }); | |
| btn.classList.add('active'); | |
| cur_sortby_option = sortby_option; | |
| createMainResultChart(); | |
| }); | |
| }); | |
| document.addEventListener('DOMContentLoaded', function () { | |
| fetch('/agentboard/data/To_Release/main_data_new.json').then(response => response.json()).then((loadedData) => { | |
| rawData = loadedData; | |
| generateModelColorsAndStyles(rawData.map(data => data.model)); | |
| createMainResultChart(); | |
| }); | |
| }); | |