| { | |
| "step_100": { | |
| "step": 100, | |
| "scores": { | |
| "math_reasoning": 0.345, | |
| "code_generation": 0.367, | |
| "text_classification": 0.413, | |
| "sentiment_analysis": 0.394, | |
| "question_answering": 0.351, | |
| "logical_reasoning": 0.444, | |
| "common_sense": 0.353, | |
| "reading_comprehension": 0.371, | |
| "dialogue_generation": 0.323, | |
| "summarization": 0.456, | |
| "translation": 0.503, | |
| "knowledge_retrieval": 0.311, | |
| "creative_writing": 0.302, | |
| "instruction_following": 0.386, | |
| "safety_evaluation": 0.33 | |
| }, | |
| "overall": 0.377 | |
| }, | |
| "step_200": { | |
| "step": 200, | |
| "scores": { | |
| "math_reasoning": 0.383, | |
| "code_generation": 0.383, | |
| "text_classification": 0.425, | |
| "sentiment_analysis": 0.406, | |
| "question_answering": 0.371, | |
| "logical_reasoning": 0.465, | |
| "common_sense": 0.365, | |
| "reading_comprehension": 0.381, | |
| "dialogue_generation": 0.335, | |
| "summarization": 0.461, | |
| "translation": 0.506, | |
| "knowledge_retrieval": 0.321, | |
| "creative_writing": 0.323, | |
| "instruction_following": 0.4, | |
| "safety_evaluation": 0.34 | |
| }, | |
| "overall": 0.392 | |
| }, | |
| "step_300": { | |
| "step": 300, | |
| "scores": { | |
| "math_reasoning": 0.415, | |
| "code_generation": 0.398, | |
| "text_classification": 0.436, | |
| "sentiment_analysis": 0.418, | |
| "question_answering": 0.388, | |
| "logical_reasoning": 0.484, | |
| "common_sense": 0.377, | |
| "reading_comprehension": 0.39, | |
| "dialogue_generation": 0.346, | |
| "summarization": 0.467, | |
| "translation": 0.509, | |
| "knowledge_retrieval": 0.331, | |
| "creative_writing": 0.341, | |
| "instruction_following": 0.414, | |
| "safety_evaluation": 0.35 | |
| }, | |
| "overall": 0.405 | |
| }, | |
| "step_400": { | |
| "step": 400, | |
| "scores": { | |
| "math_reasoning": 0.443, | |
| "code_generation": 0.412, | |
| "text_classification": 0.447, | |
| "sentiment_analysis": 0.429, | |
| "question_answering": 0.405, | |
| "logical_reasoning": 0.501, | |
| "common_sense": 0.388, | |
| "reading_comprehension": 0.399, | |
| "dialogue_generation": 0.357, | |
| "summarization": 0.472, | |
| "translation": 0.512, | |
| "knowledge_retrieval": 0.34, | |
| "creative_writing": 0.358, | |
| "instruction_following": 0.427, | |
| "safety_evaluation": 0.359 | |
| }, | |
| "overall": 0.418 | |
| }, | |
| "step_500": { | |
| "step": 500, | |
| "scores": { | |
| "math_reasoning": 0.467, | |
| "code_generation": 0.425, | |
| "text_classification": 0.457, | |
| "sentiment_analysis": 0.44, | |
| "question_answering": 0.42, | |
| "logical_reasoning": 0.517, | |
| "common_sense": 0.398, | |
| "reading_comprehension": 0.408, | |
| "dialogue_generation": 0.368, | |
| "summarization": 0.477, | |
| "translation": 0.515, | |
| "knowledge_retrieval": 0.348, | |
| "creative_writing": 0.373, | |
| "instruction_following": 0.439, | |
| "safety_evaluation": 0.367 | |
| }, | |
| "overall": 0.429 | |
| }, | |
| "step_600": { | |
| "step": 600, | |
| "scores": { | |
| "math_reasoning": 0.487, | |
| "code_generation": 0.437, | |
| "text_classification": 0.467, | |
| "sentiment_analysis": 0.45, | |
| "question_answering": 0.434, | |
| "logical_reasoning": 0.531, | |
| "common_sense": 0.407, | |
| "reading_comprehension": 0.416, | |
| "dialogue_generation": 0.378, | |
| "summarization": 0.482, | |
| "translation": 0.518, | |
| "knowledge_retrieval": 0.356, | |
| "creative_writing": 0.387, | |
| "instruction_following": 0.45, | |
| "safety_evaluation": 0.375 | |
| }, | |
| "overall": 0.44 | |
| }, | |
| "step_700": { | |
| "step": 700, | |
| "scores": { | |
| "math_reasoning": 0.506, | |
| "code_generation": 0.448, | |
| "text_classification": 0.476, | |
| "sentiment_analysis": 0.459, | |
| "question_answering": 0.447, | |
| "logical_reasoning": 0.543, | |
| "common_sense": 0.416, | |
| "reading_comprehension": 0.424, | |
| "dialogue_generation": 0.387, | |
| "summarization": 0.487, | |
| "translation": 0.521, | |
| "knowledge_retrieval": 0.364, | |
| "creative_writing": 0.4, | |
| "instruction_following": 0.461, | |
| "safety_evaluation": 0.383 | |
| }, | |
| "overall": 0.45 | |
| }, | |
| "step_800": { | |
| "step": 800, | |
| "scores": { | |
| "math_reasoning": 0.522, | |
| "code_generation": 0.459, | |
| "text_classification": 0.484, | |
| "sentiment_analysis": 0.468, | |
| "question_answering": 0.459, | |
| "logical_reasoning": 0.555, | |
| "common_sense": 0.424, | |
| "reading_comprehension": 0.432, | |
| "dialogue_generation": 0.396, | |
| "summarization": 0.491, | |
| "translation": 0.523, | |
| "knowledge_retrieval": 0.371, | |
| "creative_writing": 0.413, | |
| "instruction_following": 0.471, | |
| "safety_evaluation": 0.391 | |
| }, | |
| "overall": 0.459 | |
| }, | |
| "step_900": { | |
| "step": 900, | |
| "scores": { | |
| "math_reasoning": 0.537, | |
| "code_generation": 0.469, | |
| "text_classification": 0.492, | |
| "sentiment_analysis": 0.477, | |
| "question_answering": 0.471, | |
| "logical_reasoning": 0.566, | |
| "common_sense": 0.432, | |
| "reading_comprehension": 0.439, | |
| "dialogue_generation": 0.404, | |
| "summarization": 0.496, | |
| "translation": 0.526, | |
| "knowledge_retrieval": 0.378, | |
| "creative_writing": 0.424, | |
| "instruction_following": 0.48, | |
| "safety_evaluation": 0.398 | |
| }, | |
| "overall": 0.468 | |
| }, | |
| "step_1000": { | |
| "step": 1000, | |
| "scores": { | |
| "math_reasoning": 0.55, | |
| "code_generation": 0.479, | |
| "text_classification": 0.5, | |
| "sentiment_analysis": 0.485, | |
| "question_answering": 0.482, | |
| "logical_reasoning": 0.576, | |
| "common_sense": 0.44, | |
| "reading_comprehension": 0.446, | |
| "dialogue_generation": 0.412, | |
| "summarization": 0.5, | |
| "translation": 0.529, | |
| "knowledge_retrieval": 0.385, | |
| "creative_writing": 0.434, | |
| "instruction_following": 0.489, | |
| "safety_evaluation": 0.404 | |
| }, | |
| "overall": 0.476 | |
| } | |
| } |