| { | |
| "input_file": "data/dataset/9_17/demo.json", | |
| "models": [ | |
| "my_lora", | |
| "/data/models/Qwen3-8B" | |
| ], | |
| "baseline_model": "/data/models/Qwen3-8B", | |
| "runs": { | |
| "my_lora": { | |
| "output_file": "evaluation/multi_0926_v12/result_my_lora.json", | |
| "summary": { | |
| "total_conversations": 1, | |
| "total_pairs": 3, | |
| "pair_metrics": { | |
| "pair1": { | |
| "total": 1, | |
| "accuracy": 0.625, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2_consider_recall": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2_recall_subset": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair3": { | |
| "total": 1, | |
| "answer_score": 0.8 | |
| } | |
| }, | |
| "recall_metrics": { | |
| "total_pairs": 1, | |
| "recall@5_1": 1, | |
| "recall@5_0": 0, | |
| "recall_rate": 1.0 | |
| }, | |
| "overall_metrics": { | |
| "total": 3, | |
| "accuracy": 0.646, | |
| "precision@1": 1.0, | |
| "answer_score": 0.8 | |
| }, | |
| "baseline": { | |
| "enabled": true, | |
| "is_baseline": false, | |
| "baseline_model": "/data/models/Qwen3-8B", | |
| "current_model": "sql-lora" | |
| } | |
| } | |
| }, | |
| "/data/models/Qwen3-8B": { | |
| "output_file": "evaluation/multi_0926_v12/result__data_models_Qwen3-8B.json", | |
| "summary": { | |
| "total_conversations": 1, | |
| "total_pairs": 3, | |
| "pair_metrics": { | |
| "pair1": { | |
| "total": 1, | |
| "accuracy": 0.75, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2_consider_recall": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2_recall_subset": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair3": { | |
| "total": 1, | |
| "answer_score": 0.7 | |
| } | |
| }, | |
| "recall_metrics": { | |
| "total_pairs": 1, | |
| "recall@5_1": 1, | |
| "recall@5_0": 0, | |
| "recall_rate": 1.0 | |
| }, | |
| "overall_metrics": { | |
| "total": 3, | |
| "accuracy": 0.708, | |
| "precision@1": 1.0, | |
| "answer_score": 0.7 | |
| }, | |
| "baseline": { | |
| "enabled": true, | |
| "is_baseline": false, | |
| "baseline_model": "/data/models/Qwen3-8B", | |
| "current_model": "sql-lora" | |
| } | |
| } | |
| } | |
| }, | |
| "comparison": { | |
| "my_lora": { | |
| "overall_metrics": { | |
| "total": 3, | |
| "accuracy": 0.646, | |
| "precision@1": 1.0, | |
| "answer_score": 0.8 | |
| }, | |
| "pair1": { | |
| "total": 1, | |
| "accuracy": 0.625, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair3": { | |
| "total": 1, | |
| "answer_score": 0.8 | |
| } | |
| }, | |
| "/data/models/Qwen3-8B": { | |
| "overall_metrics": { | |
| "total": 3, | |
| "accuracy": 0.708, | |
| "precision@1": 1.0, | |
| "answer_score": 0.7 | |
| }, | |
| "pair1": { | |
| "total": 1, | |
| "accuracy": 0.75, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2": { | |
| "total": 1, | |
| "accuracy": 0.667, | |
| "precision@1": 1.0 | |
| }, | |
| "pair3": { | |
| "total": 1, | |
| "answer_score": 0.7 | |
| } | |
| } | |
| } | |
| } |