{ "benchmarks": [ "code_generation", "common_sense", "creative_writing", "dialogue_generation", "instruction_following", "knowledge_retrieval", "logical_reasoning", "math_reasoning", "question_answering", "reading_comprehension", "safety_evaluation", "sentiment_analysis", "summarization", "text_classification", "translation" ], "results": [ { "step": 100, "eval_accuracy": 0.705, "benchmarks": { "common_sense": 0.703, "creative_writing": 0.603, "instruction_following": 0.753, "knowledge_retrieval": 0.673, "logical_reasoning": 0.723, "math_reasoning": 0.623, "question_answering": 0.613, "reading_comprehension": 0.683, "safety_evaluation": 0.733, "sentiment_analysis": 0.803, "summarization": 0.763, "translation": 0.803, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 200, "eval_accuracy": 0.709, "benchmarks": { "common_sense": 0.707, "creative_writing": 0.607, "instruction_following": 0.757, "knowledge_retrieval": 0.677, "logical_reasoning": 0.727, "math_reasoning": 0.627, "question_answering": 0.617, "reading_comprehension": 0.687, "safety_evaluation": 0.737, "sentiment_analysis": 0.807, "summarization": 0.767, "translation": 0.807, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 300, "eval_accuracy": 0.711, "benchmarks": { "common_sense": 0.709, "creative_writing": 0.609, "instruction_following": 0.759, "knowledge_retrieval": 0.679, "logical_reasoning": 0.729, "math_reasoning": 0.629, "question_answering": 0.619, "reading_comprehension": 0.689, "safety_evaluation": 0.739, "sentiment_analysis": 0.809, "summarization": 0.769, "translation": 0.809, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 400, "eval_accuracy": 0.712, "benchmarks": { "common_sense": 0.71, "creative_writing": 0.61, "instruction_following": 0.76, "knowledge_retrieval": 0.68, "logical_reasoning": 0.73, "math_reasoning": 0.63, "question_answering": 0.62, "reading_comprehension": 0.69, "safety_evaluation": 0.74, "sentiment_analysis": 0.81, "summarization": 0.77, "translation": 0.81, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 500, "eval_accuracy": 0.713, "benchmarks": { "common_sense": 0.711, "creative_writing": 0.611, "instruction_following": 0.761, "knowledge_retrieval": 0.681, "logical_reasoning": 0.731, "math_reasoning": 0.631, "question_answering": 0.621, "reading_comprehension": 0.691, "safety_evaluation": 0.741, "sentiment_analysis": 0.811, "summarization": 0.771, "translation": 0.811, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 600, "eval_accuracy": 0.714, "benchmarks": { "common_sense": 0.712, "creative_writing": 0.612, "instruction_following": 0.762, "knowledge_retrieval": 0.682, "logical_reasoning": 0.732, "math_reasoning": 0.632, "question_answering": 0.622, "reading_comprehension": 0.692, "safety_evaluation": 0.742, "sentiment_analysis": 0.812, "summarization": 0.772, "translation": 0.812, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 700, "eval_accuracy": 0.715, "benchmarks": { "common_sense": 0.713, "creative_writing": 0.613, "instruction_following": 0.763, "knowledge_retrieval": 0.683, "logical_reasoning": 0.733, "math_reasoning": 0.633, "question_answering": 0.623, "reading_comprehension": 0.693, "safety_evaluation": 0.743, "sentiment_analysis": 0.813, "summarization": 0.773, "translation": 0.813, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 800, "eval_accuracy": 0.715, "benchmarks": { "common_sense": 0.713, "creative_writing": 0.613, "instruction_following": 0.763, "knowledge_retrieval": 0.683, "logical_reasoning": 0.733, "math_reasoning": 0.633, "question_answering": 0.623, "reading_comprehension": 0.693, "safety_evaluation": 0.743, "sentiment_analysis": 0.813, "summarization": 0.773, "translation": 0.813, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 900, "eval_accuracy": 0.716, "benchmarks": { "common_sense": 0.714, "creative_writing": 0.614, "instruction_following": 0.764, "knowledge_retrieval": 0.684, "logical_reasoning": 0.734, "math_reasoning": 0.634, "question_answering": 0.624, "reading_comprehension": 0.694, "safety_evaluation": 0.744, "sentiment_analysis": 0.814, "summarization": 0.774, "translation": 0.814, "code_generation": null, "dialogue_generation": null, "text_classification": null } }, { "step": 1000, "eval_accuracy": 0.717, "benchmarks": { "common_sense": 0.715, "creative_writing": 0.615, "instruction_following": 0.765, "knowledge_retrieval": 0.685, "logical_reasoning": 0.735, "math_reasoning": 0.635, "question_answering": 0.625, "reading_comprehension": 0.695, "safety_evaluation": 0.745, "sentiment_analysis": 0.815, "summarization": 0.775, "translation": 0.815, "code_generation": null, "dialogue_generation": null, "text_classification": null } } ] }