| { | |
| "input_file": "data/dataset/9_17/demo_step1.json", | |
| "models": [ | |
| "/data/models/Qwen3-8B", | |
| "my_lora" | |
| ], | |
| "baseline_model": "/data/models/Qwen3-8B", | |
| "runs": { | |
| "/data/models/Qwen3-8B": { | |
| "output_file": "evaluation/multi_0926_v13/result__data_models_Qwen3-8B.json", | |
| "summary": { | |
| "total_conversations": 10, | |
| "total_pairs": 10, | |
| "pair_metrics": { | |
| "pair1": { | |
| "total": 10, | |
| "accuracy": 0.6, | |
| "precision@1": 0.9 | |
| } | |
| }, | |
| "recall_metrics": { | |
| "total_pairs": 0, | |
| "recall@5_1": 0, | |
| "recall@5_0": 0, | |
| "recall_rate": 0.0 | |
| }, | |
| "overall_metrics": { | |
| "total": 10, | |
| "accuracy": 0.6, | |
| "precision@1": 0.9, | |
| "answer_score": 0.0 | |
| }, | |
| "baseline": { | |
| "enabled": true, | |
| "is_baseline": false, | |
| "baseline_model": "/data/models/Qwen3-8B", | |
| "current_model": "my_lora" | |
| } | |
| } | |
| }, | |
| "my_lora": { | |
| "output_file": "evaluation/multi_0926_v13/result_my_lora.json", | |
| "summary": { | |
| "total_conversations": 10, | |
| "total_pairs": 10, | |
| "pair_metrics": { | |
| "pair1": { | |
| "total": 10, | |
| "accuracy": 0.675, | |
| "precision@1": 1.0 | |
| } | |
| }, | |
| "recall_metrics": { | |
| "total_pairs": 0, | |
| "recall@5_1": 0, | |
| "recall@5_0": 0, | |
| "recall_rate": 0.0 | |
| }, | |
| "overall_metrics": { | |
| "total": 10, | |
| "accuracy": 0.675, | |
| "precision@1": 1.0, | |
| "answer_score": 0.0 | |
| }, | |
| "baseline": { | |
| "enabled": true, | |
| "is_baseline": false, | |
| "baseline_model": "/data/models/Qwen3-8B", | |
| "current_model": "my_lora" | |
| } | |
| } | |
| } | |
| }, | |
| "comparison": { | |
| "/data/models/Qwen3-8B": { | |
| "overall_metrics": { | |
| "total": 10, | |
| "accuracy": 0.6, | |
| "precision@1": 0.9, | |
| "answer_score": 0.0 | |
| }, | |
| "pair1": { | |
| "total": 10, | |
| "accuracy": 0.6, | |
| "precision@1": 0.9 | |
| }, | |
| "pair2": {}, | |
| "pair3": {} | |
| }, | |
| "my_lora": { | |
| "overall_metrics": { | |
| "total": 10, | |
| "accuracy": 0.675, | |
| "precision@1": 1.0, | |
| "answer_score": 0.0 | |
| }, | |
| "pair1": { | |
| "total": 10, | |
| "accuracy": 0.675, | |
| "precision@1": 1.0 | |
| }, | |
| "pair2": {}, | |
| "pair3": {} | |
| } | |
| } | |
| } |