Upload evaluation_results.json with huggingface_hub
Browse files- evaluation_results.json +21 -0
evaluation_results.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"checkpoint_directory": "step_1000",
|
| 3 |
+
"benchmark_scores": {
|
| 4 |
+
"math_reasoning": 0.8,
|
| 5 |
+
"logical_reasoning": 1.0,
|
| 6 |
+
"code_generation": 0.9,
|
| 7 |
+
"question_answering": 0.85,
|
| 8 |
+
"reading_comprehension": 0.95,
|
| 9 |
+
"common_sense": 1.0,
|
| 10 |
+
"text_classification": 1.0,
|
| 11 |
+
"sentiment_analysis": 1.0,
|
| 12 |
+
"dialogue_generation": 0.92,
|
| 13 |
+
"summarization": 1.0,
|
| 14 |
+
"translation": 1.0,
|
| 15 |
+
"knowledge_retrieval": 0.95,
|
| 16 |
+
"creative_writing": 0.88,
|
| 17 |
+
"instruction_following": 1.0,
|
| 18 |
+
"safety_evaluation": 1.0
|
| 19 |
+
},
|
| 20 |
+
"overall_score": 0.951
|
| 21 |
+
}
|