MyAwesomeModel-EvalReport / eval_summary.json
FuryAssassin's picture
Upload eval_summary.json with huggingface_hub
d097612 verified
{
"benchmarks": [
"code_generation",
"common_sense",
"creative_writing",
"dialogue_generation",
"instruction_following",
"knowledge_retrieval",
"logical_reasoning",
"math_reasoning",
"question_answering",
"reading_comprehension",
"safety_evaluation",
"sentiment_analysis",
"summarization",
"text_classification",
"translation"
],
"results": [
{
"step": 100,
"eval_accuracy": 0.705,
"benchmarks": {
"common_sense": 0.703,
"creative_writing": 0.603,
"instruction_following": 0.753,
"knowledge_retrieval": 0.673,
"logical_reasoning": 0.723,
"math_reasoning": 0.623,
"question_answering": 0.613,
"reading_comprehension": 0.683,
"safety_evaluation": 0.733,
"sentiment_analysis": 0.803,
"summarization": 0.763,
"translation": 0.803,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 200,
"eval_accuracy": 0.709,
"benchmarks": {
"common_sense": 0.707,
"creative_writing": 0.607,
"instruction_following": 0.757,
"knowledge_retrieval": 0.677,
"logical_reasoning": 0.727,
"math_reasoning": 0.627,
"question_answering": 0.617,
"reading_comprehension": 0.687,
"safety_evaluation": 0.737,
"sentiment_analysis": 0.807,
"summarization": 0.767,
"translation": 0.807,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 300,
"eval_accuracy": 0.711,
"benchmarks": {
"common_sense": 0.709,
"creative_writing": 0.609,
"instruction_following": 0.759,
"knowledge_retrieval": 0.679,
"logical_reasoning": 0.729,
"math_reasoning": 0.629,
"question_answering": 0.619,
"reading_comprehension": 0.689,
"safety_evaluation": 0.739,
"sentiment_analysis": 0.809,
"summarization": 0.769,
"translation": 0.809,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 400,
"eval_accuracy": 0.712,
"benchmarks": {
"common_sense": 0.71,
"creative_writing": 0.61,
"instruction_following": 0.76,
"knowledge_retrieval": 0.68,
"logical_reasoning": 0.73,
"math_reasoning": 0.63,
"question_answering": 0.62,
"reading_comprehension": 0.69,
"safety_evaluation": 0.74,
"sentiment_analysis": 0.81,
"summarization": 0.77,
"translation": 0.81,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 500,
"eval_accuracy": 0.713,
"benchmarks": {
"common_sense": 0.711,
"creative_writing": 0.611,
"instruction_following": 0.761,
"knowledge_retrieval": 0.681,
"logical_reasoning": 0.731,
"math_reasoning": 0.631,
"question_answering": 0.621,
"reading_comprehension": 0.691,
"safety_evaluation": 0.741,
"sentiment_analysis": 0.811,
"summarization": 0.771,
"translation": 0.811,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 600,
"eval_accuracy": 0.714,
"benchmarks": {
"common_sense": 0.712,
"creative_writing": 0.612,
"instruction_following": 0.762,
"knowledge_retrieval": 0.682,
"logical_reasoning": 0.732,
"math_reasoning": 0.632,
"question_answering": 0.622,
"reading_comprehension": 0.692,
"safety_evaluation": 0.742,
"sentiment_analysis": 0.812,
"summarization": 0.772,
"translation": 0.812,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 700,
"eval_accuracy": 0.715,
"benchmarks": {
"common_sense": 0.713,
"creative_writing": 0.613,
"instruction_following": 0.763,
"knowledge_retrieval": 0.683,
"logical_reasoning": 0.733,
"math_reasoning": 0.633,
"question_answering": 0.623,
"reading_comprehension": 0.693,
"safety_evaluation": 0.743,
"sentiment_analysis": 0.813,
"summarization": 0.773,
"translation": 0.813,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 800,
"eval_accuracy": 0.715,
"benchmarks": {
"common_sense": 0.713,
"creative_writing": 0.613,
"instruction_following": 0.763,
"knowledge_retrieval": 0.683,
"logical_reasoning": 0.733,
"math_reasoning": 0.633,
"question_answering": 0.623,
"reading_comprehension": 0.693,
"safety_evaluation": 0.743,
"sentiment_analysis": 0.813,
"summarization": 0.773,
"translation": 0.813,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 900,
"eval_accuracy": 0.716,
"benchmarks": {
"common_sense": 0.714,
"creative_writing": 0.614,
"instruction_following": 0.764,
"knowledge_retrieval": 0.684,
"logical_reasoning": 0.734,
"math_reasoning": 0.634,
"question_answering": 0.624,
"reading_comprehension": 0.694,
"safety_evaluation": 0.744,
"sentiment_analysis": 0.814,
"summarization": 0.774,
"translation": 0.814,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
},
{
"step": 1000,
"eval_accuracy": 0.717,
"benchmarks": {
"common_sense": 0.715,
"creative_writing": 0.615,
"instruction_following": 0.765,
"knowledge_retrieval": 0.685,
"logical_reasoning": 0.735,
"math_reasoning": 0.635,
"question_answering": 0.625,
"reading_comprehension": 0.695,
"safety_evaluation": 0.745,
"sentiment_analysis": 0.815,
"summarization": 0.775,
"translation": 0.815,
"code_generation": null,
"dialogue_generation": null,
"text_classification": null
}
}
]
}