import { Column } from 'primereact/column'
import ScoreField from './ScoreField'
const scoreBodyTemplate = (field, options = {}) => {
const {
minScore = 0,
maxScore = 1,
machineTranslatedMetrics = [],
ciLowerField = null,
ciUpperField = null
} = options
return rowData => {
const score = rowData[field]
const rowFlagKey = `${field}_is_machine`
const hasRowFlag = Object.prototype.hasOwnProperty.call(rowData, rowFlagKey)
const isMachineTranslated = hasRowFlag
? !!rowData[rowFlagKey]
: machineTranslatedMetrics.includes(field)
const ciLower = ciLowerField ? rowData[ciLowerField] : null
const ciUpper = ciUpperField ? rowData[ciUpperField] : null
return (
)
}
}
const createScoreColumn = (
field,
header,
tooltip,
minScore,
maxScore,
machineTranslatedMetrics
) => (
)
const ScoreColumns = (machineTranslatedMetrics = []) => [
createScoreColumn(
'average',
'Overall',
'Overall Score (average of the scores for each task)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'translation_from_bleu',
'Translation (from)',
'Translation performance from a language to all other languages (spBLEU score on a sample of the FLORES+ benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'translation_to_bleu',
'Translation (to)',
'Translation performance from all other languages to a language (spBLEU score on a sample of the FLORES+ benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'classification_accuracy',
'Classification',
'Classification performance (accuracy on a sample of the SIB-200 / FLORES+ classification benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'mmlu_accuracy',
'Q&A',
'Question Answering performance (accuracy on a sample of multilingual versions of the MMLU benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'arc_accuracy',
'Advanced Q&A',
'Advanced Question Answering performance (accuracy on a sample of multilingual versions of the ARC-Easy benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'mgsm_accuracy',
'Math',
'Math Problem Solving performance (accuracy on a sample of multilingual versions of the GSM8K benchmark)',
0,
1,
machineTranslatedMetrics
)
]
export default ScoreColumns