| { | |
| "checkpoint_directory": "step_1000", | |
| "benchmark_scores": { | |
| "math_reasoning": 0.8, | |
| "logical_reasoning": 1.0, | |
| "code_generation": 0.9, | |
| "question_answering": 0.85, | |
| "reading_comprehension": 0.95, | |
| "common_sense": 1.0, | |
| "text_classification": 1.0, | |
| "sentiment_analysis": 1.0, | |
| "dialogue_generation": 0.92, | |
| "summarization": 1.0, | |
| "translation": 1.0, | |
| "knowledge_retrieval": 0.95, | |
| "creative_writing": 0.88, | |
| "instruction_following": 1.0, | |
| "safety_evaluation": 1.0 | |
| }, | |
| "overall_score": 0.951 | |
| } |