{ "step_100": { "step": 100, "scores": { "math_reasoning": 0.345, "code_generation": 0.367, "text_classification": 0.413, "sentiment_analysis": 0.394, "question_answering": 0.351, "logical_reasoning": 0.444, "common_sense": 0.353, "reading_comprehension": 0.371, "dialogue_generation": 0.323, "summarization": 0.456, "translation": 0.503, "knowledge_retrieval": 0.311, "creative_writing": 0.302, "instruction_following": 0.386, "safety_evaluation": 0.33 }, "overall": 0.377 }, "step_200": { "step": 200, "scores": { "math_reasoning": 0.383, "code_generation": 0.383, "text_classification": 0.425, "sentiment_analysis": 0.406, "question_answering": 0.371, "logical_reasoning": 0.465, "common_sense": 0.365, "reading_comprehension": 0.381, "dialogue_generation": 0.335, "summarization": 0.461, "translation": 0.506, "knowledge_retrieval": 0.321, "creative_writing": 0.323, "instruction_following": 0.4, "safety_evaluation": 0.34 }, "overall": 0.392 }, "step_300": { "step": 300, "scores": { "math_reasoning": 0.415, "code_generation": 0.398, "text_classification": 0.436, "sentiment_analysis": 0.418, "question_answering": 0.388, "logical_reasoning": 0.484, "common_sense": 0.377, "reading_comprehension": 0.39, "dialogue_generation": 0.346, "summarization": 0.467, "translation": 0.509, "knowledge_retrieval": 0.331, "creative_writing": 0.341, "instruction_following": 0.414, "safety_evaluation": 0.35 }, "overall": 0.405 }, "step_400": { "step": 400, "scores": { "math_reasoning": 0.443, "code_generation": 0.412, "text_classification": 0.447, "sentiment_analysis": 0.429, "question_answering": 0.405, "logical_reasoning": 0.501, "common_sense": 0.388, "reading_comprehension": 0.399, "dialogue_generation": 0.357, "summarization": 0.472, "translation": 0.512, "knowledge_retrieval": 0.34, "creative_writing": 0.358, "instruction_following": 0.427, "safety_evaluation": 0.359 }, "overall": 0.418 }, "step_500": { "step": 500, "scores": { "math_reasoning": 0.467, "code_generation": 0.425, "text_classification": 0.457, "sentiment_analysis": 0.44, "question_answering": 0.42, "logical_reasoning": 0.517, "common_sense": 0.398, "reading_comprehension": 0.408, "dialogue_generation": 0.368, "summarization": 0.477, "translation": 0.515, "knowledge_retrieval": 0.348, "creative_writing": 0.373, "instruction_following": 0.439, "safety_evaluation": 0.367 }, "overall": 0.429 }, "step_600": { "step": 600, "scores": { "math_reasoning": 0.487, "code_generation": 0.437, "text_classification": 0.467, "sentiment_analysis": 0.45, "question_answering": 0.434, "logical_reasoning": 0.531, "common_sense": 0.407, "reading_comprehension": 0.416, "dialogue_generation": 0.378, "summarization": 0.482, "translation": 0.518, "knowledge_retrieval": 0.356, "creative_writing": 0.387, "instruction_following": 0.45, "safety_evaluation": 0.375 }, "overall": 0.44 }, "step_700": { "step": 700, "scores": { "math_reasoning": 0.506, "code_generation": 0.448, "text_classification": 0.476, "sentiment_analysis": 0.459, "question_answering": 0.447, "logical_reasoning": 0.543, "common_sense": 0.416, "reading_comprehension": 0.424, "dialogue_generation": 0.387, "summarization": 0.487, "translation": 0.521, "knowledge_retrieval": 0.364, "creative_writing": 0.4, "instruction_following": 0.461, "safety_evaluation": 0.383 }, "overall": 0.45 }, "step_800": { "step": 800, "scores": { "math_reasoning": 0.522, "code_generation": 0.459, "text_classification": 0.484, "sentiment_analysis": 0.468, "question_answering": 0.459, "logical_reasoning": 0.555, "common_sense": 0.424, "reading_comprehension": 0.432, "dialogue_generation": 0.396, "summarization": 0.491, "translation": 0.523, "knowledge_retrieval": 0.371, "creative_writing": 0.413, "instruction_following": 0.471, "safety_evaluation": 0.391 }, "overall": 0.459 }, "step_900": { "step": 900, "scores": { "math_reasoning": 0.537, "code_generation": 0.469, "text_classification": 0.492, "sentiment_analysis": 0.477, "question_answering": 0.471, "logical_reasoning": 0.566, "common_sense": 0.432, "reading_comprehension": 0.439, "dialogue_generation": 0.404, "summarization": 0.496, "translation": 0.526, "knowledge_retrieval": 0.378, "creative_writing": 0.424, "instruction_following": 0.48, "safety_evaluation": 0.398 }, "overall": 0.468 }, "step_1000": { "step": 1000, "scores": { "math_reasoning": 0.55, "code_generation": 0.479, "text_classification": 0.5, "sentiment_analysis": 0.485, "question_answering": 0.482, "logical_reasoning": 0.576, "common_sense": 0.44, "reading_comprehension": 0.446, "dialogue_generation": 0.412, "summarization": 0.5, "translation": 0.529, "knowledge_retrieval": 0.385, "creative_writing": 0.434, "instruction_following": 0.489, "safety_evaluation": 0.404 }, "overall": 0.476 } }