Prompt_Squirrel_RAG / data /analysis /probe_reliability_n100.json
Food Desert
Consolidate pending pipeline, structural, and analysis updates
30bedf0
{
"settings": {
"n": 100,
"seed": 42,
"caption_field": "caption_cogvlm",
"probe_count": 35,
"retries": 2,
"temperature": 0.0,
"max_tokens": 900,
"model_env": "meta-llama/llama-3.1-8b-instruct"
},
"overall_metrics": {
"explicit": {
"tp": 333,
"fp": 409,
"fn": 281,
"precision": 0.448787,
"recall": 0.542345,
"f1": 0.49115
},
"strong": {
"tp": 348,
"fp": 444,
"fn": 266,
"precision": 0.439394,
"recall": 0.566775,
"f1": 0.495021
}
},
"diagnostics": {
"samples_with_attempt_failures": 0,
"samples_with_call_exhaustion": 0,
"avg_attempt_failure_rate": 0.0,
"avg_call_exhaustion_rate": 0.0
},
"top_tags_by_f1_strong": [
{
"tag": "wide_hips",
"bundle": "body_shape_breasts",
"needs_glossary": "0",
"support_pos": "1",
"support_neg": "99",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "0",
"precision_explicit": "1.000000",
"recall_explicit": "1.000000",
"f1_explicit": "1.000000",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "0",
"precision_strong": "1.000000",
"recall_strong": "1.000000",
"f1_strong": "1.000000"
},
{
"tag": "anthro",
"bundle": "body_type_presence",
"needs_glossary": "1",
"support_pos": "68",
"support_neg": "32",
"tp_explicit": "63",
"fp_explicit": "19",
"fn_explicit": "5",
"precision_explicit": "0.768293",
"recall_explicit": "0.926471",
"f1_explicit": "0.840000",
"tp_strong": "67",
"fp_strong": "25",
"fn_strong": "1",
"precision_strong": "0.728261",
"recall_strong": "0.985294",
"f1_strong": "0.837500"
},
{
"tag": "felid",
"bundle": "species_taxonomy",
"needs_glossary": "1",
"support_pos": "18",
"support_neg": "82",
"tp_explicit": "11",
"fp_explicit": "0",
"fn_explicit": "7",
"precision_explicit": "1.000000",
"recall_explicit": "0.611111",
"f1_explicit": "0.758621",
"tp_strong": "12",
"fp_strong": "1",
"fn_strong": "6",
"precision_strong": "0.923077",
"recall_strong": "0.666667",
"f1_strong": "0.774194"
},
{
"tag": "group",
"bundle": "count_cardinality",
"needs_glossary": "0",
"support_pos": "16",
"support_neg": "84",
"tp_explicit": "10",
"fp_explicit": "1",
"fn_explicit": "6",
"precision_explicit": "0.909091",
"recall_explicit": "0.625000",
"f1_explicit": "0.740741",
"tp_strong": "10",
"fp_strong": "1",
"fn_strong": "6",
"precision_strong": "0.909091",
"recall_strong": "0.625000",
"f1_strong": "0.740741"
},
{
"tag": "blush",
"bundle": "gaze_expression",
"needs_glossary": "0",
"support_pos": "13",
"support_neg": "87",
"tp_explicit": "10",
"fp_explicit": "2",
"fn_explicit": "3",
"precision_explicit": "0.833333",
"recall_explicit": "0.769231",
"f1_explicit": "0.800000",
"tp_strong": "10",
"fp_strong": "4",
"fn_strong": "3",
"precision_strong": "0.714286",
"recall_strong": "0.769231",
"f1_strong": "0.740741"
},
{
"tag": "clothing",
"bundle": "clothing_state",
"needs_glossary": "0",
"support_pos": "59",
"support_neg": "41",
"tp_explicit": "42",
"fp_explicit": "21",
"fn_explicit": "17",
"precision_explicit": "0.666667",
"recall_explicit": "0.711864",
"f1_explicit": "0.688525",
"tp_strong": "42",
"fp_strong": "21",
"fn_strong": "17",
"precision_strong": "0.666667",
"recall_strong": "0.711864",
"f1_strong": "0.688525"
},
{
"tag": "canid",
"bundle": "species_taxonomy",
"needs_glossary": "1",
"support_pos": "37",
"support_neg": "63",
"tp_explicit": "21",
"fp_explicit": "7",
"fn_explicit": "16",
"precision_explicit": "0.750000",
"recall_explicit": "0.567568",
"f1_explicit": "0.646154",
"tp_strong": "24",
"fp_strong": "11",
"fn_strong": "13",
"precision_strong": "0.685714",
"recall_strong": "0.648649",
"f1_strong": "0.666667"
},
{
"tag": "<3",
"bundle": "text_symbols",
"needs_glossary": "1",
"support_pos": "6",
"support_neg": "94",
"tp_explicit": "3",
"fp_explicit": "0",
"fn_explicit": "3",
"precision_explicit": "1.000000",
"recall_explicit": "0.500000",
"f1_explicit": "0.666667",
"tp_strong": "3",
"fp_strong": "0",
"fn_strong": "3",
"precision_strong": "1.000000",
"recall_strong": "0.500000",
"f1_strong": "0.666667"
},
{
"tag": "thick_thighs",
"bundle": "body_shape_breasts",
"needs_glossary": "0",
"support_pos": "1",
"support_neg": "99",
"tp_explicit": "1",
"fp_explicit": "1",
"fn_explicit": "0",
"precision_explicit": "0.500000",
"recall_explicit": "1.000000",
"f1_explicit": "0.666667",
"tp_strong": "1",
"fp_strong": "1",
"fn_strong": "0",
"precision_strong": "0.500000",
"recall_strong": "1.000000",
"f1_strong": "0.666667"
},
{
"tag": "bird",
"bundle": "species_taxonomy",
"needs_glossary": "0",
"support_pos": "6",
"support_neg": "94",
"tp_explicit": "4",
"fp_explicit": "3",
"fn_explicit": "2",
"precision_explicit": "0.571429",
"recall_explicit": "0.666667",
"f1_explicit": "0.615385",
"tp_strong": "4",
"fp_strong": "3",
"fn_strong": "2",
"precision_strong": "0.571429",
"recall_strong": "0.666667",
"f1_strong": "0.615385"
},
{
"tag": "bear",
"bundle": "species_taxonomy",
"needs_glossary": "0",
"support_pos": "5",
"support_neg": "95",
"tp_explicit": "3",
"fp_explicit": "4",
"fn_explicit": "2",
"precision_explicit": "0.428571",
"recall_explicit": "0.600000",
"f1_explicit": "0.500000",
"tp_strong": "4",
"fp_strong": "4",
"fn_strong": "1",
"precision_strong": "0.500000",
"recall_strong": "0.800000",
"f1_strong": "0.615385"
},
{
"tag": "text",
"bundle": "text_symbols",
"needs_glossary": "0",
"support_pos": "23",
"support_neg": "77",
"tp_explicit": "15",
"fp_explicit": "10",
"fn_explicit": "8",
"precision_explicit": "0.600000",
"recall_explicit": "0.652174",
"f1_explicit": "0.625000",
"tp_strong": "15",
"fp_strong": "11",
"fn_strong": "8",
"precision_strong": "0.576923",
"recall_strong": "0.652174",
"f1_strong": "0.612245"
},
{
"tag": "simple_background",
"bundle": "scene_pose",
"needs_glossary": "0",
"support_pos": "27",
"support_neg": "73",
"tp_explicit": "15",
"fp_explicit": "8",
"fn_explicit": "12",
"precision_explicit": "0.652174",
"recall_explicit": "0.555556",
"f1_explicit": "0.600000",
"tp_strong": "15",
"fp_strong": "8",
"fn_strong": "12",
"precision_strong": "0.652174",
"recall_strong": "0.555556",
"f1_strong": "0.600000"
},
{
"tag": "eyes_closed",
"bundle": "gaze_expression",
"needs_glossary": "0",
"support_pos": "4",
"support_neg": "96",
"tp_explicit": "3",
"fp_explicit": "3",
"fn_explicit": "1",
"precision_explicit": "0.500000",
"recall_explicit": "0.750000",
"f1_explicit": "0.600000",
"tp_strong": "3",
"fp_strong": "3",
"fn_strong": "1",
"precision_strong": "0.500000",
"recall_strong": "0.750000",
"f1_strong": "0.600000"
},
{
"tag": "duo",
"bundle": "count_cardinality",
"needs_glossary": "1",
"support_pos": "20",
"support_neg": "80",
"tp_explicit": "11",
"fp_explicit": "9",
"fn_explicit": "9",
"precision_explicit": "0.550000",
"recall_explicit": "0.550000",
"f1_explicit": "0.550000",
"tp_strong": "12",
"fp_strong": "9",
"fn_strong": "8",
"precision_strong": "0.571429",
"recall_strong": "0.600000",
"f1_strong": "0.585366"
},
{
"tag": "solo",
"bundle": "count_cardinality",
"needs_glossary": "1",
"support_pos": "57",
"support_neg": "43",
"tp_explicit": "24",
"fp_explicit": "3",
"fn_explicit": "33",
"precision_explicit": "0.888889",
"recall_explicit": "0.421053",
"f1_explicit": "0.571429",
"tp_strong": "24",
"fp_strong": "3",
"fn_strong": "33",
"precision_strong": "0.888889",
"recall_strong": "0.421053",
"f1_strong": "0.571429"
},
{
"tag": "dialogue",
"bundle": "text_symbols",
"needs_glossary": "0",
"support_pos": "11",
"support_neg": "89",
"tp_explicit": "10",
"fp_explicit": "14",
"fn_explicit": "1",
"precision_explicit": "0.416667",
"recall_explicit": "0.909091",
"f1_explicit": "0.571429",
"tp_strong": "10",
"fp_strong": "14",
"fn_strong": "1",
"precision_strong": "0.416667",
"recall_strong": "0.909091",
"f1_strong": "0.571429"
},
{
"tag": "clothed",
"bundle": "clothing_state",
"needs_glossary": "0",
"support_pos": "32",
"support_neg": "68",
"tp_explicit": "29",
"fp_explicit": "45",
"fn_explicit": "3",
"precision_explicit": "0.391892",
"recall_explicit": "0.906250",
"f1_explicit": "0.547170",
"tp_strong": "29",
"fp_strong": "45",
"fn_strong": "3",
"precision_strong": "0.391892",
"recall_strong": "0.906250",
"f1_strong": "0.547170"
},
{
"tag": "sitting",
"bundle": "scene_pose",
"needs_glossary": "0",
"support_pos": "9",
"support_neg": "91",
"tp_explicit": "8",
"fp_explicit": "15",
"fn_explicit": "1",
"precision_explicit": "0.347826",
"recall_explicit": "0.888889",
"f1_explicit": "0.500000",
"tp_strong": "8",
"fp_strong": "15",
"fn_strong": "1",
"precision_strong": "0.347826",
"recall_strong": "0.888889",
"f1_strong": "0.500000"
},
{
"tag": "outside",
"bundle": "scene_pose",
"needs_glossary": "0",
"support_pos": "10",
"support_neg": "90",
"tp_explicit": "6",
"fp_explicit": "13",
"fn_explicit": "4",
"precision_explicit": "0.315789",
"recall_explicit": "0.600000",
"f1_explicit": "0.413793",
"tp_strong": "6",
"fp_strong": "13",
"fn_strong": "4",
"precision_strong": "0.315789",
"recall_strong": "0.600000",
"f1_strong": "0.413793"
}
],
"outputs": {
"csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_n100.csv",
"json": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_n100.json"
}
}