File size: 3,014 Bytes
4106f13
 
 
 
aa92add
 
 
 
 
 
 
4106f13
 
 
2cdada4
 
 
 
 
aa92add
 
 
 
 
 
 
 
 
 
 
 
4106f13
 
 
aa92add
 
 
 
 
 
 
 
b0aa389
aa92add
 
 
b0aa389
aa92add
 
 
 
 
 
b0aa389
 
aa92add
 
 
 
 
 
34b05c6
 
aa92add
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4106f13
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import { Column } from 'primereact/column'
import ScoreField from './ScoreField'

const scoreBodyTemplate = (field, options = {}) => {
  const {
    minScore = 0,
    maxScore = 1,
    machineTranslatedMetrics = [],
    ciLowerField = null,
    ciUpperField = null
  } = options

  return rowData => {
    const score = rowData[field]
    const rowFlagKey = `${field}_is_machine`
    const hasRowFlag = Object.prototype.hasOwnProperty.call(rowData, rowFlagKey)
    const isMachineTranslated = hasRowFlag
      ? !!rowData[rowFlagKey]
      : machineTranslatedMetrics.includes(field)
    const ciLower = ciLowerField ? rowData[ciLowerField] : null
    const ciUpper = ciUpperField ? rowData[ciUpperField] : null
    return (
      <ScoreField
        score={score}
        minScore={minScore}
        maxScore={maxScore}
        isMachineTranslated={isMachineTranslated}
        ciLower={ciLower}
        ciUpper={ciUpper}
      />
    )
  }
}

const createScoreColumn = (
  field,
  header,
  tooltip,
  minScore,
  maxScore,
  machineTranslatedMetrics
) => (
  <Column
    field={field}
    header={header}
    headerTooltip={tooltip}
    sortable
    body={scoreBodyTemplate(field, {
      minScore,
      maxScore,
      machineTranslatedMetrics,
      ciLowerField: `${field}_ci_lower`,
      ciUpperField: `${field}_ci_upper`
    })}
    style={{ minWidth: '5rem', maxWidth: '10rem' }}
  />
)

const ScoreColumns = (machineTranslatedMetrics = []) => [
  createScoreColumn(
    'average',
    'Overall',
    'Overall Score (average of the scores for each task)',
    0,
    1,
    machineTranslatedMetrics
  ),
  createScoreColumn(
    'translation_from_bleu',
    'Translation (from)',
    'Translation performance from a language to all other languages (spBLEU score on a sample of the FLORES+ benchmark)',
    0,
    1,
    machineTranslatedMetrics
  ),
  createScoreColumn(
    'translation_to_bleu',
    'Translation (to)',
    'Translation performance from all other languages to a language (spBLEU score on a sample of the FLORES+ benchmark)',
    0,
    1,
    machineTranslatedMetrics
  ),
  createScoreColumn(
    'classification_accuracy',
    'Classification',
    'Classification performance (accuracy on a sample of the SIB-200 / FLORES+ classification benchmark)',
    0,
    1,
    machineTranslatedMetrics
  ),
  createScoreColumn(
    'mmlu_accuracy',
    'Q&A',
    'Question Answering performance (accuracy on a sample of multilingual versions of the MMLU benchmark)',
    0,
    1,
    machineTranslatedMetrics
  ),
  createScoreColumn(
    'arc_accuracy',
    'Advanced Q&A',
    'Advanced Question Answering performance (accuracy on a sample of multilingual versions of the ARC-Easy benchmark)',
    0,
    1,
    machineTranslatedMetrics
  ),
  createScoreColumn(
    'mgsm_accuracy',
    'Math',
    'Math Problem Solving performance (accuracy on a sample of multilingual versions of the GSM8K benchmark)',
    0,
    1,
    machineTranslatedMetrics
  )
]

export default ScoreColumns