File size: 3,879 Bytes
46b244e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
{
  "input_file": "/home/ziqiang/LLaMA-Factory/data/dataset/9_17/9.17_evaluate_data_top5_final.json",
  "models": [
    "/data/models/Qwen3-8B",
    "my_lora"
  ],
  "baseline_model": "/data/models/Qwen3-8B",
  "runs": {
    "/data/models/Qwen3-8B": {
      "output_file": "evaluation/multi_baseline_lora/result__data_models_Qwen3-8B.json",
      "summary": {
        "total_conversations": 397,
        "total_pairs": 1191,
        "pair_metrics": {
          "pair1": {
            "total": 397,
            "accuracy": 0.895,
            "precision@1": 1.0
          },
          "pair2": {
            "total": 397,
            "accuracy": 0.778,
            "precision@1": 0.872
          },
          "pair2_consider_recall": {
            "total": 332,
            "accuracy": 0.789,
            "precision@1": 0.895
          },
          "pair2_recall_subset": {
            "total": 332,
            "accuracy": 0.789,
            "precision@1": 0.895
          },
          "pair3": {
            "total": 397,
            "answer_score": 0.217
          }
        },
        "recall_metrics": {
          "total_pairs": 397,
          "recall@5_1": 332,
          "recall@5_0": 65,
          "recall_rate": 0.836
        },
        "overall_metrics": {
          "total": 1191,
          "accuracy": 0.836,
          "precision@1": 0.936,
          "answer_score": 0.217
        },
        "baseline": {
          "enabled": false,
          "is_baseline": false,
          "baseline_model": "/data/models/Qwen3-8B",
          "current_model": "my_lora"
        }
      }
    },
    "my_lora": {
      "output_file": "evaluation/multi_baseline_lora/result_my_lora.json",
      "summary": {
        "total_conversations": 397,
        "total_pairs": 1191,
        "pair_metrics": {
          "pair1": {
            "total": 397,
            "accuracy": 0.895,
            "precision@1": 1.0
          },
          "pair2": {
            "total": 397,
            "accuracy": 0.779,
            "precision@1": 0.872
          },
          "pair2_consider_recall": {
            "total": 333,
            "accuracy": 0.791,
            "precision@1": 0.895
          },
          "pair2_recall_subset": {
            "total": 333,
            "accuracy": 0.791,
            "precision@1": 0.895
          },
          "pair3": {
            "total": 397,
            "answer_score": 0.218
          }
        },
        "recall_metrics": {
          "total_pairs": 397,
          "recall@5_1": 333,
          "recall@5_0": 64,
          "recall_rate": 0.839
        },
        "overall_metrics": {
          "total": 1191,
          "accuracy": 0.837,
          "precision@1": 0.936,
          "answer_score": 0.218
        },
        "baseline": {
          "enabled": false,
          "is_baseline": false,
          "baseline_model": "/data/models/Qwen3-8B",
          "current_model": "my_lora"
        }
      }
    }
  },
  "comparison": {
    "/data/models/Qwen3-8B": {
      "overall_metrics": {
        "total": 1191,
        "accuracy": 0.836,
        "precision@1": 0.936,
        "answer_score": 0.217
      },
      "pair1": {
        "total": 397,
        "accuracy": 0.895,
        "precision@1": 1.0
      },
      "pair2": {
        "total": 397,
        "accuracy": 0.778,
        "precision@1": 0.872
      },
      "pair3": {
        "total": 397,
        "answer_score": 0.217
      }
    },
    "my_lora": {
      "overall_metrics": {
        "total": 1191,
        "accuracy": 0.837,
        "precision@1": 0.936,
        "answer_score": 0.218
      },
      "pair1": {
        "total": 397,
        "accuracy": 0.895,
        "precision@1": 1.0
      },
      "pair2": {
        "total": 397,
        "accuracy": 0.779,
        "precision@1": 0.872
      },
      "pair3": {
        "total": 397,
        "answer_score": 0.218
      }
    }
  }
}