File size: 5,842 Bytes
366a59f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | {
"step_100": {
"step": 100,
"scores": {
"math_reasoning": 0.345,
"code_generation": 0.367,
"text_classification": 0.413,
"sentiment_analysis": 0.394,
"question_answering": 0.351,
"logical_reasoning": 0.444,
"common_sense": 0.353,
"reading_comprehension": 0.371,
"dialogue_generation": 0.323,
"summarization": 0.456,
"translation": 0.503,
"knowledge_retrieval": 0.311,
"creative_writing": 0.302,
"instruction_following": 0.386,
"safety_evaluation": 0.33
},
"overall": 0.377
},
"step_200": {
"step": 200,
"scores": {
"math_reasoning": 0.383,
"code_generation": 0.383,
"text_classification": 0.425,
"sentiment_analysis": 0.406,
"question_answering": 0.371,
"logical_reasoning": 0.465,
"common_sense": 0.365,
"reading_comprehension": 0.381,
"dialogue_generation": 0.335,
"summarization": 0.461,
"translation": 0.506,
"knowledge_retrieval": 0.321,
"creative_writing": 0.323,
"instruction_following": 0.4,
"safety_evaluation": 0.34
},
"overall": 0.392
},
"step_300": {
"step": 300,
"scores": {
"math_reasoning": 0.415,
"code_generation": 0.398,
"text_classification": 0.436,
"sentiment_analysis": 0.418,
"question_answering": 0.388,
"logical_reasoning": 0.484,
"common_sense": 0.377,
"reading_comprehension": 0.39,
"dialogue_generation": 0.346,
"summarization": 0.467,
"translation": 0.509,
"knowledge_retrieval": 0.331,
"creative_writing": 0.341,
"instruction_following": 0.414,
"safety_evaluation": 0.35
},
"overall": 0.405
},
"step_400": {
"step": 400,
"scores": {
"math_reasoning": 0.443,
"code_generation": 0.412,
"text_classification": 0.447,
"sentiment_analysis": 0.429,
"question_answering": 0.405,
"logical_reasoning": 0.501,
"common_sense": 0.388,
"reading_comprehension": 0.399,
"dialogue_generation": 0.357,
"summarization": 0.472,
"translation": 0.512,
"knowledge_retrieval": 0.34,
"creative_writing": 0.358,
"instruction_following": 0.427,
"safety_evaluation": 0.359
},
"overall": 0.418
},
"step_500": {
"step": 500,
"scores": {
"math_reasoning": 0.467,
"code_generation": 0.425,
"text_classification": 0.457,
"sentiment_analysis": 0.44,
"question_answering": 0.42,
"logical_reasoning": 0.517,
"common_sense": 0.398,
"reading_comprehension": 0.408,
"dialogue_generation": 0.368,
"summarization": 0.477,
"translation": 0.515,
"knowledge_retrieval": 0.348,
"creative_writing": 0.373,
"instruction_following": 0.439,
"safety_evaluation": 0.367
},
"overall": 0.429
},
"step_600": {
"step": 600,
"scores": {
"math_reasoning": 0.487,
"code_generation": 0.437,
"text_classification": 0.467,
"sentiment_analysis": 0.45,
"question_answering": 0.434,
"logical_reasoning": 0.531,
"common_sense": 0.407,
"reading_comprehension": 0.416,
"dialogue_generation": 0.378,
"summarization": 0.482,
"translation": 0.518,
"knowledge_retrieval": 0.356,
"creative_writing": 0.387,
"instruction_following": 0.45,
"safety_evaluation": 0.375
},
"overall": 0.44
},
"step_700": {
"step": 700,
"scores": {
"math_reasoning": 0.506,
"code_generation": 0.448,
"text_classification": 0.476,
"sentiment_analysis": 0.459,
"question_answering": 0.447,
"logical_reasoning": 0.543,
"common_sense": 0.416,
"reading_comprehension": 0.424,
"dialogue_generation": 0.387,
"summarization": 0.487,
"translation": 0.521,
"knowledge_retrieval": 0.364,
"creative_writing": 0.4,
"instruction_following": 0.461,
"safety_evaluation": 0.383
},
"overall": 0.45
},
"step_800": {
"step": 800,
"scores": {
"math_reasoning": 0.522,
"code_generation": 0.459,
"text_classification": 0.484,
"sentiment_analysis": 0.468,
"question_answering": 0.459,
"logical_reasoning": 0.555,
"common_sense": 0.424,
"reading_comprehension": 0.432,
"dialogue_generation": 0.396,
"summarization": 0.491,
"translation": 0.523,
"knowledge_retrieval": 0.371,
"creative_writing": 0.413,
"instruction_following": 0.471,
"safety_evaluation": 0.391
},
"overall": 0.459
},
"step_900": {
"step": 900,
"scores": {
"math_reasoning": 0.537,
"code_generation": 0.469,
"text_classification": 0.492,
"sentiment_analysis": 0.477,
"question_answering": 0.471,
"logical_reasoning": 0.566,
"common_sense": 0.432,
"reading_comprehension": 0.439,
"dialogue_generation": 0.404,
"summarization": 0.496,
"translation": 0.526,
"knowledge_retrieval": 0.378,
"creative_writing": 0.424,
"instruction_following": 0.48,
"safety_evaluation": 0.398
},
"overall": 0.468
},
"step_1000": {
"step": 1000,
"scores": {
"math_reasoning": 0.55,
"code_generation": 0.479,
"text_classification": 0.5,
"sentiment_analysis": 0.485,
"question_answering": 0.482,
"logical_reasoning": 0.576,
"common_sense": 0.44,
"reading_comprehension": 0.446,
"dialogue_generation": 0.412,
"summarization": 0.5,
"translation": 0.529,
"knowledge_retrieval": 0.385,
"creative_writing": 0.434,
"instruction_following": 0.489,
"safety_evaluation": 0.404
},
"overall": 0.476
}
} |