{ "input_file": "/home/ziqiang/LLaMA-Factory/data/dataset/9_17/9.17_evaluate_data_top5_final.json", "models": [ "/data/models/Qwen3-8B", "my_lora" ], "baseline_model": "/data/models/Qwen3-8B", "runs": { "/data/models/Qwen3-8B": { "output_file": "evaluation/multi_baseline_lora/result__data_models_Qwen3-8B.json", "summary": { "total_conversations": 397, "total_pairs": 1191, "pair_metrics": { "pair1": { "total": 397, "accuracy": 0.895, "precision@1": 1.0 }, "pair2": { "total": 397, "accuracy": 0.778, "precision@1": 0.872 }, "pair2_consider_recall": { "total": 332, "accuracy": 0.789, "precision@1": 0.895 }, "pair2_recall_subset": { "total": 332, "accuracy": 0.789, "precision@1": 0.895 }, "pair3": { "total": 397, "answer_score": 0.217 } }, "recall_metrics": { "total_pairs": 397, "recall@5_1": 332, "recall@5_0": 65, "recall_rate": 0.836 }, "overall_metrics": { "total": 1191, "accuracy": 0.836, "precision@1": 0.936, "answer_score": 0.217 }, "baseline": { "enabled": false, "is_baseline": false, "baseline_model": "/data/models/Qwen3-8B", "current_model": "my_lora" } } }, "my_lora": { "output_file": "evaluation/multi_baseline_lora/result_my_lora.json", "summary": { "total_conversations": 397, "total_pairs": 1191, "pair_metrics": { "pair1": { "total": 397, "accuracy": 0.895, "precision@1": 1.0 }, "pair2": { "total": 397, "accuracy": 0.779, "precision@1": 0.872 }, "pair2_consider_recall": { "total": 333, "accuracy": 0.791, "precision@1": 0.895 }, "pair2_recall_subset": { "total": 333, "accuracy": 0.791, "precision@1": 0.895 }, "pair3": { "total": 397, "answer_score": 0.218 } }, "recall_metrics": { "total_pairs": 397, "recall@5_1": 333, "recall@5_0": 64, "recall_rate": 0.839 }, "overall_metrics": { "total": 1191, "accuracy": 0.837, "precision@1": 0.936, "answer_score": 0.218 }, "baseline": { "enabled": false, "is_baseline": false, "baseline_model": "/data/models/Qwen3-8B", "current_model": "my_lora" } } } }, "comparison": { "/data/models/Qwen3-8B": { "overall_metrics": { "total": 1191, "accuracy": 0.836, "precision@1": 0.936, "answer_score": 0.217 }, "pair1": { "total": 397, "accuracy": 0.895, "precision@1": 1.0 }, "pair2": { "total": 397, "accuracy": 0.778, "precision@1": 0.872 }, "pair3": { "total": 397, "answer_score": 0.217 } }, "my_lora": { "overall_metrics": { "total": 1191, "accuracy": 0.837, "precision@1": 0.936, "answer_score": 0.218 }, "pair1": { "total": 397, "accuracy": 0.895, "precision@1": 1.0 }, "pair2": { "total": 397, "accuracy": 0.779, "precision@1": 0.872 }, "pair3": { "total": 397, "answer_score": 0.218 } } } }