liangyi_LLaMA_Factory / evaluation /multi_aggregate_0926_v12.json
Mickey25's picture
Upload folder using huggingface_hub
46b244e verified
{
"input_file": "data/dataset/9_17/demo.json",
"models": [
"my_lora",
"/data/models/Qwen3-8B"
],
"baseline_model": "/data/models/Qwen3-8B",
"runs": {
"my_lora": {
"output_file": "evaluation/multi_0926_v12/result_my_lora.json",
"summary": {
"total_conversations": 1,
"total_pairs": 3,
"pair_metrics": {
"pair1": {
"total": 1,
"accuracy": 0.625,
"precision@1": 1.0
},
"pair2": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair2_consider_recall": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair2_recall_subset": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair3": {
"total": 1,
"answer_score": 0.8
}
},
"recall_metrics": {
"total_pairs": 1,
"recall@5_1": 1,
"recall@5_0": 0,
"recall_rate": 1.0
},
"overall_metrics": {
"total": 3,
"accuracy": 0.646,
"precision@1": 1.0,
"answer_score": 0.8
},
"baseline": {
"enabled": true,
"is_baseline": false,
"baseline_model": "/data/models/Qwen3-8B",
"current_model": "sql-lora"
}
}
},
"/data/models/Qwen3-8B": {
"output_file": "evaluation/multi_0926_v12/result__data_models_Qwen3-8B.json",
"summary": {
"total_conversations": 1,
"total_pairs": 3,
"pair_metrics": {
"pair1": {
"total": 1,
"accuracy": 0.75,
"precision@1": 1.0
},
"pair2": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair2_consider_recall": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair2_recall_subset": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair3": {
"total": 1,
"answer_score": 0.7
}
},
"recall_metrics": {
"total_pairs": 1,
"recall@5_1": 1,
"recall@5_0": 0,
"recall_rate": 1.0
},
"overall_metrics": {
"total": 3,
"accuracy": 0.708,
"precision@1": 1.0,
"answer_score": 0.7
},
"baseline": {
"enabled": true,
"is_baseline": false,
"baseline_model": "/data/models/Qwen3-8B",
"current_model": "sql-lora"
}
}
}
},
"comparison": {
"my_lora": {
"overall_metrics": {
"total": 3,
"accuracy": 0.646,
"precision@1": 1.0,
"answer_score": 0.8
},
"pair1": {
"total": 1,
"accuracy": 0.625,
"precision@1": 1.0
},
"pair2": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair3": {
"total": 1,
"answer_score": 0.8
}
},
"/data/models/Qwen3-8B": {
"overall_metrics": {
"total": 3,
"accuracy": 0.708,
"precision@1": 1.0,
"answer_score": 0.7
},
"pair1": {
"total": 1,
"accuracy": 0.75,
"precision@1": 1.0
},
"pair2": {
"total": 1,
"accuracy": 0.667,
"precision@1": 1.0
},
"pair3": {
"total": 1,
"answer_score": 0.7
}
}
}
}