File size: 11,791 Bytes
30bedf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
{
  "settings": {
    "n": 100,
    "seed": 42,
    "caption_field": "caption_cogvlm",
    "probe_count": 35,
    "retries": 2,
    "temperature": 0.0,
    "max_tokens": 900,
    "model_env": "meta-llama/llama-3.1-8b-instruct"
  },
  "overall_metrics": {
    "explicit": {
      "tp": 333,
      "fp": 409,
      "fn": 281,
      "precision": 0.448787,
      "recall": 0.542345,
      "f1": 0.49115
    },
    "strong": {
      "tp": 348,
      "fp": 444,
      "fn": 266,
      "precision": 0.439394,
      "recall": 0.566775,
      "f1": 0.495021
    }
  },
  "diagnostics": {
    "samples_with_attempt_failures": 0,
    "samples_with_call_exhaustion": 0,
    "avg_attempt_failure_rate": 0.0,
    "avg_call_exhaustion_rate": 0.0
  },
  "top_tags_by_f1_strong": [
    {
      "tag": "wide_hips",
      "bundle": "body_shape_breasts",
      "needs_glossary": "0",
      "support_pos": "1",
      "support_neg": "99",
      "tp_explicit": "1",
      "fp_explicit": "0",
      "fn_explicit": "0",
      "precision_explicit": "1.000000",
      "recall_explicit": "1.000000",
      "f1_explicit": "1.000000",
      "tp_strong": "1",
      "fp_strong": "0",
      "fn_strong": "0",
      "precision_strong": "1.000000",
      "recall_strong": "1.000000",
      "f1_strong": "1.000000"
    },
    {
      "tag": "anthro",
      "bundle": "body_type_presence",
      "needs_glossary": "1",
      "support_pos": "68",
      "support_neg": "32",
      "tp_explicit": "63",
      "fp_explicit": "19",
      "fn_explicit": "5",
      "precision_explicit": "0.768293",
      "recall_explicit": "0.926471",
      "f1_explicit": "0.840000",
      "tp_strong": "67",
      "fp_strong": "25",
      "fn_strong": "1",
      "precision_strong": "0.728261",
      "recall_strong": "0.985294",
      "f1_strong": "0.837500"
    },
    {
      "tag": "felid",
      "bundle": "species_taxonomy",
      "needs_glossary": "1",
      "support_pos": "18",
      "support_neg": "82",
      "tp_explicit": "11",
      "fp_explicit": "0",
      "fn_explicit": "7",
      "precision_explicit": "1.000000",
      "recall_explicit": "0.611111",
      "f1_explicit": "0.758621",
      "tp_strong": "12",
      "fp_strong": "1",
      "fn_strong": "6",
      "precision_strong": "0.923077",
      "recall_strong": "0.666667",
      "f1_strong": "0.774194"
    },
    {
      "tag": "group",
      "bundle": "count_cardinality",
      "needs_glossary": "0",
      "support_pos": "16",
      "support_neg": "84",
      "tp_explicit": "10",
      "fp_explicit": "1",
      "fn_explicit": "6",
      "precision_explicit": "0.909091",
      "recall_explicit": "0.625000",
      "f1_explicit": "0.740741",
      "tp_strong": "10",
      "fp_strong": "1",
      "fn_strong": "6",
      "precision_strong": "0.909091",
      "recall_strong": "0.625000",
      "f1_strong": "0.740741"
    },
    {
      "tag": "blush",
      "bundle": "gaze_expression",
      "needs_glossary": "0",
      "support_pos": "13",
      "support_neg": "87",
      "tp_explicit": "10",
      "fp_explicit": "2",
      "fn_explicit": "3",
      "precision_explicit": "0.833333",
      "recall_explicit": "0.769231",
      "f1_explicit": "0.800000",
      "tp_strong": "10",
      "fp_strong": "4",
      "fn_strong": "3",
      "precision_strong": "0.714286",
      "recall_strong": "0.769231",
      "f1_strong": "0.740741"
    },
    {
      "tag": "clothing",
      "bundle": "clothing_state",
      "needs_glossary": "0",
      "support_pos": "59",
      "support_neg": "41",
      "tp_explicit": "42",
      "fp_explicit": "21",
      "fn_explicit": "17",
      "precision_explicit": "0.666667",
      "recall_explicit": "0.711864",
      "f1_explicit": "0.688525",
      "tp_strong": "42",
      "fp_strong": "21",
      "fn_strong": "17",
      "precision_strong": "0.666667",
      "recall_strong": "0.711864",
      "f1_strong": "0.688525"
    },
    {
      "tag": "canid",
      "bundle": "species_taxonomy",
      "needs_glossary": "1",
      "support_pos": "37",
      "support_neg": "63",
      "tp_explicit": "21",
      "fp_explicit": "7",
      "fn_explicit": "16",
      "precision_explicit": "0.750000",
      "recall_explicit": "0.567568",
      "f1_explicit": "0.646154",
      "tp_strong": "24",
      "fp_strong": "11",
      "fn_strong": "13",
      "precision_strong": "0.685714",
      "recall_strong": "0.648649",
      "f1_strong": "0.666667"
    },
    {
      "tag": "<3",
      "bundle": "text_symbols",
      "needs_glossary": "1",
      "support_pos": "6",
      "support_neg": "94",
      "tp_explicit": "3",
      "fp_explicit": "0",
      "fn_explicit": "3",
      "precision_explicit": "1.000000",
      "recall_explicit": "0.500000",
      "f1_explicit": "0.666667",
      "tp_strong": "3",
      "fp_strong": "0",
      "fn_strong": "3",
      "precision_strong": "1.000000",
      "recall_strong": "0.500000",
      "f1_strong": "0.666667"
    },
    {
      "tag": "thick_thighs",
      "bundle": "body_shape_breasts",
      "needs_glossary": "0",
      "support_pos": "1",
      "support_neg": "99",
      "tp_explicit": "1",
      "fp_explicit": "1",
      "fn_explicit": "0",
      "precision_explicit": "0.500000",
      "recall_explicit": "1.000000",
      "f1_explicit": "0.666667",
      "tp_strong": "1",
      "fp_strong": "1",
      "fn_strong": "0",
      "precision_strong": "0.500000",
      "recall_strong": "1.000000",
      "f1_strong": "0.666667"
    },
    {
      "tag": "bird",
      "bundle": "species_taxonomy",
      "needs_glossary": "0",
      "support_pos": "6",
      "support_neg": "94",
      "tp_explicit": "4",
      "fp_explicit": "3",
      "fn_explicit": "2",
      "precision_explicit": "0.571429",
      "recall_explicit": "0.666667",
      "f1_explicit": "0.615385",
      "tp_strong": "4",
      "fp_strong": "3",
      "fn_strong": "2",
      "precision_strong": "0.571429",
      "recall_strong": "0.666667",
      "f1_strong": "0.615385"
    },
    {
      "tag": "bear",
      "bundle": "species_taxonomy",
      "needs_glossary": "0",
      "support_pos": "5",
      "support_neg": "95",
      "tp_explicit": "3",
      "fp_explicit": "4",
      "fn_explicit": "2",
      "precision_explicit": "0.428571",
      "recall_explicit": "0.600000",
      "f1_explicit": "0.500000",
      "tp_strong": "4",
      "fp_strong": "4",
      "fn_strong": "1",
      "precision_strong": "0.500000",
      "recall_strong": "0.800000",
      "f1_strong": "0.615385"
    },
    {
      "tag": "text",
      "bundle": "text_symbols",
      "needs_glossary": "0",
      "support_pos": "23",
      "support_neg": "77",
      "tp_explicit": "15",
      "fp_explicit": "10",
      "fn_explicit": "8",
      "precision_explicit": "0.600000",
      "recall_explicit": "0.652174",
      "f1_explicit": "0.625000",
      "tp_strong": "15",
      "fp_strong": "11",
      "fn_strong": "8",
      "precision_strong": "0.576923",
      "recall_strong": "0.652174",
      "f1_strong": "0.612245"
    },
    {
      "tag": "simple_background",
      "bundle": "scene_pose",
      "needs_glossary": "0",
      "support_pos": "27",
      "support_neg": "73",
      "tp_explicit": "15",
      "fp_explicit": "8",
      "fn_explicit": "12",
      "precision_explicit": "0.652174",
      "recall_explicit": "0.555556",
      "f1_explicit": "0.600000",
      "tp_strong": "15",
      "fp_strong": "8",
      "fn_strong": "12",
      "precision_strong": "0.652174",
      "recall_strong": "0.555556",
      "f1_strong": "0.600000"
    },
    {
      "tag": "eyes_closed",
      "bundle": "gaze_expression",
      "needs_glossary": "0",
      "support_pos": "4",
      "support_neg": "96",
      "tp_explicit": "3",
      "fp_explicit": "3",
      "fn_explicit": "1",
      "precision_explicit": "0.500000",
      "recall_explicit": "0.750000",
      "f1_explicit": "0.600000",
      "tp_strong": "3",
      "fp_strong": "3",
      "fn_strong": "1",
      "precision_strong": "0.500000",
      "recall_strong": "0.750000",
      "f1_strong": "0.600000"
    },
    {
      "tag": "duo",
      "bundle": "count_cardinality",
      "needs_glossary": "1",
      "support_pos": "20",
      "support_neg": "80",
      "tp_explicit": "11",
      "fp_explicit": "9",
      "fn_explicit": "9",
      "precision_explicit": "0.550000",
      "recall_explicit": "0.550000",
      "f1_explicit": "0.550000",
      "tp_strong": "12",
      "fp_strong": "9",
      "fn_strong": "8",
      "precision_strong": "0.571429",
      "recall_strong": "0.600000",
      "f1_strong": "0.585366"
    },
    {
      "tag": "solo",
      "bundle": "count_cardinality",
      "needs_glossary": "1",
      "support_pos": "57",
      "support_neg": "43",
      "tp_explicit": "24",
      "fp_explicit": "3",
      "fn_explicit": "33",
      "precision_explicit": "0.888889",
      "recall_explicit": "0.421053",
      "f1_explicit": "0.571429",
      "tp_strong": "24",
      "fp_strong": "3",
      "fn_strong": "33",
      "precision_strong": "0.888889",
      "recall_strong": "0.421053",
      "f1_strong": "0.571429"
    },
    {
      "tag": "dialogue",
      "bundle": "text_symbols",
      "needs_glossary": "0",
      "support_pos": "11",
      "support_neg": "89",
      "tp_explicit": "10",
      "fp_explicit": "14",
      "fn_explicit": "1",
      "precision_explicit": "0.416667",
      "recall_explicit": "0.909091",
      "f1_explicit": "0.571429",
      "tp_strong": "10",
      "fp_strong": "14",
      "fn_strong": "1",
      "precision_strong": "0.416667",
      "recall_strong": "0.909091",
      "f1_strong": "0.571429"
    },
    {
      "tag": "clothed",
      "bundle": "clothing_state",
      "needs_glossary": "0",
      "support_pos": "32",
      "support_neg": "68",
      "tp_explicit": "29",
      "fp_explicit": "45",
      "fn_explicit": "3",
      "precision_explicit": "0.391892",
      "recall_explicit": "0.906250",
      "f1_explicit": "0.547170",
      "tp_strong": "29",
      "fp_strong": "45",
      "fn_strong": "3",
      "precision_strong": "0.391892",
      "recall_strong": "0.906250",
      "f1_strong": "0.547170"
    },
    {
      "tag": "sitting",
      "bundle": "scene_pose",
      "needs_glossary": "0",
      "support_pos": "9",
      "support_neg": "91",
      "tp_explicit": "8",
      "fp_explicit": "15",
      "fn_explicit": "1",
      "precision_explicit": "0.347826",
      "recall_explicit": "0.888889",
      "f1_explicit": "0.500000",
      "tp_strong": "8",
      "fp_strong": "15",
      "fn_strong": "1",
      "precision_strong": "0.347826",
      "recall_strong": "0.888889",
      "f1_strong": "0.500000"
    },
    {
      "tag": "outside",
      "bundle": "scene_pose",
      "needs_glossary": "0",
      "support_pos": "10",
      "support_neg": "90",
      "tp_explicit": "6",
      "fp_explicit": "13",
      "fn_explicit": "4",
      "precision_explicit": "0.315789",
      "recall_explicit": "0.600000",
      "f1_explicit": "0.413793",
      "tp_strong": "6",
      "fp_strong": "13",
      "fn_strong": "4",
      "precision_strong": "0.315789",
      "recall_strong": "0.600000",
      "f1_strong": "0.413793"
    }
  ],
  "outputs": {
    "csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_n100.csv",
    "json": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_n100.json"
  }
}