import { Column } from 'primereact/column' import ScoreField from './ScoreField' const scoreBodyTemplate = (field, options = {}) => { const { minScore = 0, maxScore = 1, machineTranslatedMetrics = [], ciLowerField = null, ciUpperField = null } = options return rowData => { const score = rowData[field] const rowFlagKey = `${field}_is_machine` const hasRowFlag = Object.prototype.hasOwnProperty.call(rowData, rowFlagKey) const isMachineTranslated = hasRowFlag ? !!rowData[rowFlagKey] : machineTranslatedMetrics.includes(field) const ciLower = ciLowerField ? rowData[ciLowerField] : null const ciUpper = ciUpperField ? rowData[ciUpperField] : null return ( ) } } const createScoreColumn = ( field, header, tooltip, minScore, maxScore, machineTranslatedMetrics ) => ( ) const ScoreColumns = (machineTranslatedMetrics = []) => [ createScoreColumn( 'average', 'Overall', 'Overall Score (average of the scores for each task)', 0, 1, machineTranslatedMetrics ), createScoreColumn( 'translation_from_bleu', 'Translation (from)', 'Translation performance from a language to all other languages (spBLEU score on a sample of the FLORES+ benchmark)', 0, 1, machineTranslatedMetrics ), createScoreColumn( 'translation_to_bleu', 'Translation (to)', 'Translation performance from all other languages to a language (spBLEU score on a sample of the FLORES+ benchmark)', 0, 1, machineTranslatedMetrics ), createScoreColumn( 'classification_accuracy', 'Classification', 'Classification performance (accuracy on a sample of the SIB-200 / FLORES+ classification benchmark)', 0, 1, machineTranslatedMetrics ), createScoreColumn( 'mmlu_accuracy', 'Q&A', 'Question Answering performance (accuracy on a sample of multilingual versions of the MMLU benchmark)', 0, 1, machineTranslatedMetrics ), createScoreColumn( 'arc_accuracy', 'Advanced Q&A', 'Advanced Question Answering performance (accuracy on a sample of multilingual versions of the ARC-Easy benchmark)', 0, 1, machineTranslatedMetrics ), createScoreColumn( 'mgsm_accuracy', 'Math', 'Math Problem Solving performance (accuracy on a sample of multilingual versions of the GSM8K benchmark)', 0, 1, machineTranslatedMetrics ) ] export default ScoreColumns