| { |
| "step_100": { |
| "step": 100, |
| "scores": { |
| "math_reasoning": 0.345, |
| "code_generation": 0.367, |
| "text_classification": 0.413, |
| "sentiment_analysis": 0.394, |
| "question_answering": 0.351, |
| "logical_reasoning": 0.444, |
| "common_sense": 0.353, |
| "reading_comprehension": 0.371, |
| "dialogue_generation": 0.323, |
| "summarization": 0.456, |
| "translation": 0.503, |
| "knowledge_retrieval": 0.311, |
| "creative_writing": 0.302, |
| "instruction_following": 0.386, |
| "safety_evaluation": 0.33 |
| }, |
| "overall": 0.377 |
| }, |
| "step_200": { |
| "step": 200, |
| "scores": { |
| "math_reasoning": 0.383, |
| "code_generation": 0.383, |
| "text_classification": 0.425, |
| "sentiment_analysis": 0.406, |
| "question_answering": 0.371, |
| "logical_reasoning": 0.465, |
| "common_sense": 0.365, |
| "reading_comprehension": 0.381, |
| "dialogue_generation": 0.335, |
| "summarization": 0.461, |
| "translation": 0.506, |
| "knowledge_retrieval": 0.321, |
| "creative_writing": 0.323, |
| "instruction_following": 0.4, |
| "safety_evaluation": 0.34 |
| }, |
| "overall": 0.392 |
| }, |
| "step_300": { |
| "step": 300, |
| "scores": { |
| "math_reasoning": 0.415, |
| "code_generation": 0.398, |
| "text_classification": 0.436, |
| "sentiment_analysis": 0.418, |
| "question_answering": 0.388, |
| "logical_reasoning": 0.484, |
| "common_sense": 0.377, |
| "reading_comprehension": 0.39, |
| "dialogue_generation": 0.346, |
| "summarization": 0.467, |
| "translation": 0.509, |
| "knowledge_retrieval": 0.331, |
| "creative_writing": 0.341, |
| "instruction_following": 0.414, |
| "safety_evaluation": 0.35 |
| }, |
| "overall": 0.405 |
| }, |
| "step_400": { |
| "step": 400, |
| "scores": { |
| "math_reasoning": 0.443, |
| "code_generation": 0.412, |
| "text_classification": 0.447, |
| "sentiment_analysis": 0.429, |
| "question_answering": 0.405, |
| "logical_reasoning": 0.501, |
| "common_sense": 0.388, |
| "reading_comprehension": 0.399, |
| "dialogue_generation": 0.357, |
| "summarization": 0.472, |
| "translation": 0.512, |
| "knowledge_retrieval": 0.34, |
| "creative_writing": 0.358, |
| "instruction_following": 0.427, |
| "safety_evaluation": 0.359 |
| }, |
| "overall": 0.418 |
| }, |
| "step_500": { |
| "step": 500, |
| "scores": { |
| "math_reasoning": 0.467, |
| "code_generation": 0.425, |
| "text_classification": 0.457, |
| "sentiment_analysis": 0.44, |
| "question_answering": 0.42, |
| "logical_reasoning": 0.517, |
| "common_sense": 0.398, |
| "reading_comprehension": 0.408, |
| "dialogue_generation": 0.368, |
| "summarization": 0.477, |
| "translation": 0.515, |
| "knowledge_retrieval": 0.348, |
| "creative_writing": 0.373, |
| "instruction_following": 0.439, |
| "safety_evaluation": 0.367 |
| }, |
| "overall": 0.429 |
| }, |
| "step_600": { |
| "step": 600, |
| "scores": { |
| "math_reasoning": 0.487, |
| "code_generation": 0.437, |
| "text_classification": 0.467, |
| "sentiment_analysis": 0.45, |
| "question_answering": 0.434, |
| "logical_reasoning": 0.531, |
| "common_sense": 0.407, |
| "reading_comprehension": 0.416, |
| "dialogue_generation": 0.378, |
| "summarization": 0.482, |
| "translation": 0.518, |
| "knowledge_retrieval": 0.356, |
| "creative_writing": 0.387, |
| "instruction_following": 0.45, |
| "safety_evaluation": 0.375 |
| }, |
| "overall": 0.44 |
| }, |
| "step_700": { |
| "step": 700, |
| "scores": { |
| "math_reasoning": 0.506, |
| "code_generation": 0.448, |
| "text_classification": 0.476, |
| "sentiment_analysis": 0.459, |
| "question_answering": 0.447, |
| "logical_reasoning": 0.543, |
| "common_sense": 0.416, |
| "reading_comprehension": 0.424, |
| "dialogue_generation": 0.387, |
| "summarization": 0.487, |
| "translation": 0.521, |
| "knowledge_retrieval": 0.364, |
| "creative_writing": 0.4, |
| "instruction_following": 0.461, |
| "safety_evaluation": 0.383 |
| }, |
| "overall": 0.45 |
| }, |
| "step_800": { |
| "step": 800, |
| "scores": { |
| "math_reasoning": 0.522, |
| "code_generation": 0.459, |
| "text_classification": 0.484, |
| "sentiment_analysis": 0.468, |
| "question_answering": 0.459, |
| "logical_reasoning": 0.555, |
| "common_sense": 0.424, |
| "reading_comprehension": 0.432, |
| "dialogue_generation": 0.396, |
| "summarization": 0.491, |
| "translation": 0.523, |
| "knowledge_retrieval": 0.371, |
| "creative_writing": 0.413, |
| "instruction_following": 0.471, |
| "safety_evaluation": 0.391 |
| }, |
| "overall": 0.459 |
| }, |
| "step_900": { |
| "step": 900, |
| "scores": { |
| "math_reasoning": 0.537, |
| "code_generation": 0.469, |
| "text_classification": 0.492, |
| "sentiment_analysis": 0.477, |
| "question_answering": 0.471, |
| "logical_reasoning": 0.566, |
| "common_sense": 0.432, |
| "reading_comprehension": 0.439, |
| "dialogue_generation": 0.404, |
| "summarization": 0.496, |
| "translation": 0.526, |
| "knowledge_retrieval": 0.378, |
| "creative_writing": 0.424, |
| "instruction_following": 0.48, |
| "safety_evaluation": 0.398 |
| }, |
| "overall": 0.468 |
| }, |
| "step_1000": { |
| "step": 1000, |
| "scores": { |
| "math_reasoning": 0.55, |
| "code_generation": 0.479, |
| "text_classification": 0.5, |
| "sentiment_analysis": 0.485, |
| "question_answering": 0.482, |
| "logical_reasoning": 0.576, |
| "common_sense": 0.44, |
| "reading_comprehension": 0.446, |
| "dialogue_generation": 0.412, |
| "summarization": 0.5, |
| "translation": 0.529, |
| "knowledge_retrieval": 0.385, |
| "creative_writing": 0.434, |
| "instruction_following": 0.489, |
| "safety_evaluation": 0.404 |
| }, |
| "overall": 0.476 |
| } |
| } |