Spaces:
Running
Running
File size: 3,014 Bytes
4106f13 aa92add 4106f13 2cdada4 aa92add 4106f13 aa92add b0aa389 aa92add b0aa389 aa92add b0aa389 aa92add 34b05c6 aa92add 4106f13 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import { Column } from 'primereact/column'
import ScoreField from './ScoreField'
const scoreBodyTemplate = (field, options = {}) => {
const {
minScore = 0,
maxScore = 1,
machineTranslatedMetrics = [],
ciLowerField = null,
ciUpperField = null
} = options
return rowData => {
const score = rowData[field]
const rowFlagKey = `${field}_is_machine`
const hasRowFlag = Object.prototype.hasOwnProperty.call(rowData, rowFlagKey)
const isMachineTranslated = hasRowFlag
? !!rowData[rowFlagKey]
: machineTranslatedMetrics.includes(field)
const ciLower = ciLowerField ? rowData[ciLowerField] : null
const ciUpper = ciUpperField ? rowData[ciUpperField] : null
return (
<ScoreField
score={score}
minScore={minScore}
maxScore={maxScore}
isMachineTranslated={isMachineTranslated}
ciLower={ciLower}
ciUpper={ciUpper}
/>
)
}
}
const createScoreColumn = (
field,
header,
tooltip,
minScore,
maxScore,
machineTranslatedMetrics
) => (
<Column
field={field}
header={header}
headerTooltip={tooltip}
sortable
body={scoreBodyTemplate(field, {
minScore,
maxScore,
machineTranslatedMetrics,
ciLowerField: `${field}_ci_lower`,
ciUpperField: `${field}_ci_upper`
})}
style={{ minWidth: '5rem', maxWidth: '10rem' }}
/>
)
const ScoreColumns = (machineTranslatedMetrics = []) => [
createScoreColumn(
'average',
'Overall',
'Overall Score (average of the scores for each task)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'translation_from_bleu',
'Translation (from)',
'Translation performance from a language to all other languages (spBLEU score on a sample of the FLORES+ benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'translation_to_bleu',
'Translation (to)',
'Translation performance from all other languages to a language (spBLEU score on a sample of the FLORES+ benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'classification_accuracy',
'Classification',
'Classification performance (accuracy on a sample of the SIB-200 / FLORES+ classification benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'mmlu_accuracy',
'Q&A',
'Question Answering performance (accuracy on a sample of multilingual versions of the MMLU benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'arc_accuracy',
'Advanced Q&A',
'Advanced Question Answering performance (accuracy on a sample of multilingual versions of the ARC-Easy benchmark)',
0,
1,
machineTranslatedMetrics
),
createScoreColumn(
'mgsm_accuracy',
'Math',
'Math Problem Solving performance (accuracy on a sample of multilingual versions of the GSM8K benchmark)',
0,
1,
machineTranslatedMetrics
)
]
export default ScoreColumns
|