Prompt_Squirrel_RAG / data /analysis /probe_reliability_sanity10.json
Food Desert
Consolidate pending pipeline, structural, and analysis updates
30bedf0
{
"settings": {
"n": 10,
"seed": 42,
"caption_field": "caption_cogvlm",
"probe_count": 35,
"retries": 2,
"temperature": 0.0,
"max_tokens": 900,
"model_env": "meta-llama/llama-3.1-8b-instruct"
},
"overall_metrics": {
"explicit": {
"tp": 49,
"fp": 56,
"fn": 19,
"precision": 0.466667,
"recall": 0.720588,
"f1": 0.566474
},
"strong": {
"tp": 49,
"fp": 56,
"fn": 19,
"precision": 0.466667,
"recall": 0.720588,
"f1": 0.566474
}
},
"diagnostics": {
"samples_with_attempt_failures": 0,
"samples_with_call_exhaustion": 0,
"avg_attempt_failure_rate": 0.0,
"avg_call_exhaustion_rate": 0.0
},
"top_tags_by_f1_strong": [
{
"tag": "outside",
"bundle": "scene_pose",
"needs_glossary": "0",
"support_pos": "2",
"support_neg": "8",
"tp_explicit": "2",
"fp_explicit": "0",
"fn_explicit": "0",
"precision_explicit": "1.000000",
"recall_explicit": "1.000000",
"f1_explicit": "1.000000",
"tp_strong": "2",
"fp_strong": "0",
"fn_strong": "0",
"precision_strong": "1.000000",
"recall_strong": "1.000000",
"f1_strong": "1.000000"
},
{
"tag": "eyes_closed",
"bundle": "gaze_expression",
"needs_glossary": "0",
"support_pos": "1",
"support_neg": "9",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "0",
"precision_explicit": "1.000000",
"recall_explicit": "1.000000",
"f1_explicit": "1.000000",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "0",
"precision_strong": "1.000000",
"recall_strong": "1.000000",
"f1_strong": "1.000000"
},
{
"tag": "group",
"bundle": "count_cardinality",
"needs_glossary": "0",
"support_pos": "1",
"support_neg": "9",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "0",
"precision_explicit": "1.000000",
"recall_explicit": "1.000000",
"f1_explicit": "1.000000",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "0",
"precision_strong": "1.000000",
"recall_strong": "1.000000",
"f1_strong": "1.000000"
},
{
"tag": "feral",
"bundle": "body_type_presence",
"needs_glossary": "1",
"support_pos": "1",
"support_neg": "9",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "0",
"precision_explicit": "1.000000",
"recall_explicit": "1.000000",
"f1_explicit": "1.000000",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "0",
"precision_strong": "1.000000",
"recall_strong": "1.000000",
"f1_strong": "1.000000"
},
{
"tag": "<3",
"bundle": "text_symbols",
"needs_glossary": "1",
"support_pos": "1",
"support_neg": "9",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "0",
"precision_explicit": "1.000000",
"recall_explicit": "1.000000",
"f1_explicit": "1.000000",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "0",
"precision_strong": "1.000000",
"recall_strong": "1.000000",
"f1_strong": "1.000000"
},
{
"tag": "clothing",
"bundle": "clothing_state",
"needs_glossary": "0",
"support_pos": "8",
"support_neg": "2",
"tp_explicit": "8",
"fp_explicit": "1",
"fn_explicit": "0",
"precision_explicit": "0.888889",
"recall_explicit": "1.000000",
"f1_explicit": "0.941176",
"tp_strong": "8",
"fp_strong": "1",
"fn_strong": "0",
"precision_strong": "0.888889",
"recall_strong": "1.000000",
"f1_strong": "0.941176"
},
{
"tag": "anthro",
"bundle": "body_type_presence",
"needs_glossary": "1",
"support_pos": "8",
"support_neg": "2",
"tp_explicit": "8",
"fp_explicit": "2",
"fn_explicit": "0",
"precision_explicit": "0.800000",
"recall_explicit": "1.000000",
"f1_explicit": "0.888889",
"tp_strong": "8",
"fp_strong": "2",
"fn_strong": "0",
"precision_strong": "0.800000",
"recall_strong": "1.000000",
"f1_strong": "0.888889"
},
{
"tag": "bear",
"bundle": "species_taxonomy",
"needs_glossary": "0",
"support_pos": "2",
"support_neg": "8",
"tp_explicit": "2",
"fp_explicit": "1",
"fn_explicit": "0",
"precision_explicit": "0.666667",
"recall_explicit": "1.000000",
"f1_explicit": "0.800000",
"tp_strong": "2",
"fp_strong": "1",
"fn_strong": "0",
"precision_strong": "0.666667",
"recall_strong": "1.000000",
"f1_strong": "0.800000"
},
{
"tag": "duo",
"bundle": "count_cardinality",
"needs_glossary": "1",
"support_pos": "2",
"support_neg": "8",
"tp_explicit": "2",
"fp_explicit": "1",
"fn_explicit": "0",
"precision_explicit": "0.666667",
"recall_explicit": "1.000000",
"f1_explicit": "0.800000",
"tp_strong": "2",
"fp_strong": "1",
"fn_strong": "0",
"precision_strong": "0.666667",
"recall_strong": "1.000000",
"f1_strong": "0.800000"
},
{
"tag": "solo",
"bundle": "count_cardinality",
"needs_glossary": "1",
"support_pos": "7",
"support_neg": "3",
"tp_explicit": "4",
"fp_explicit": "0",
"fn_explicit": "3",
"precision_explicit": "1.000000",
"recall_explicit": "0.571429",
"f1_explicit": "0.727273",
"tp_strong": "4",
"fp_strong": "0",
"fn_strong": "3",
"precision_strong": "1.000000",
"recall_strong": "0.571429",
"f1_strong": "0.727273"
},
{
"tag": "clothed",
"bundle": "clothing_state",
"needs_glossary": "0",
"support_pos": "5",
"support_neg": "5",
"tp_explicit": "5",
"fp_explicit": "4",
"fn_explicit": "0",
"precision_explicit": "0.555556",
"recall_explicit": "1.000000",
"f1_explicit": "0.714286",
"tp_strong": "5",
"fp_strong": "4",
"fn_strong": "0",
"precision_strong": "0.555556",
"recall_strong": "1.000000",
"f1_strong": "0.714286"
},
{
"tag": "bird",
"bundle": "species_taxonomy",
"needs_glossary": "0",
"support_pos": "2",
"support_neg": "8",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "1",
"precision_explicit": "1.000000",
"recall_explicit": "0.500000",
"f1_explicit": "0.666667",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "1",
"precision_strong": "1.000000",
"recall_strong": "0.500000",
"f1_strong": "0.666667"
},
{
"tag": "leporid",
"bundle": "species_taxonomy",
"needs_glossary": "1",
"support_pos": "2",
"support_neg": "8",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "1",
"precision_explicit": "1.000000",
"recall_explicit": "0.500000",
"f1_explicit": "0.666667",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "1",
"precision_strong": "1.000000",
"recall_strong": "0.500000",
"f1_strong": "0.666667"
},
{
"tag": "felid",
"bundle": "species_taxonomy",
"needs_glossary": "1",
"support_pos": "2",
"support_neg": "8",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "1",
"precision_explicit": "1.000000",
"recall_explicit": "0.500000",
"f1_explicit": "0.666667",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "1",
"precision_strong": "1.000000",
"recall_strong": "0.500000",
"f1_strong": "0.666667"
},
{
"tag": "canis",
"bundle": "species_taxonomy",
"needs_glossary": "1",
"support_pos": "1",
"support_neg": "9",
"tp_explicit": "1",
"fp_explicit": "1",
"fn_explicit": "0",
"precision_explicit": "0.500000",
"recall_explicit": "1.000000",
"f1_explicit": "0.666667",
"tp_strong": "1",
"fp_strong": "1",
"fn_strong": "0",
"precision_strong": "0.500000",
"recall_strong": "1.000000",
"f1_strong": "0.666667"
},
{
"tag": "simple_background",
"bundle": "scene_pose",
"needs_glossary": "0",
"support_pos": "3",
"support_neg": "7",
"tp_explicit": "3",
"fp_explicit": "4",
"fn_explicit": "0",
"precision_explicit": "0.428571",
"recall_explicit": "1.000000",
"f1_explicit": "0.600000",
"tp_strong": "3",
"fp_strong": "4",
"fn_strong": "0",
"precision_strong": "0.428571",
"recall_strong": "1.000000",
"f1_strong": "0.600000"
},
{
"tag": "canid",
"bundle": "species_taxonomy",
"needs_glossary": "1",
"support_pos": "3",
"support_neg": "7",
"tp_explicit": "2",
"fp_explicit": "2",
"fn_explicit": "1",
"precision_explicit": "0.500000",
"recall_explicit": "0.666667",
"f1_explicit": "0.571429",
"tp_strong": "2",
"fp_strong": "2",
"fn_strong": "1",
"precision_strong": "0.500000",
"recall_strong": "0.666667",
"f1_strong": "0.571429"
},
{
"tag": "looking_at_viewer",
"bundle": "gaze_expression",
"needs_glossary": "0",
"support_pos": "3",
"support_neg": "7",
"tp_explicit": "1",
"fp_explicit": "0",
"fn_explicit": "2",
"precision_explicit": "1.000000",
"recall_explicit": "0.333333",
"f1_explicit": "0.500000",
"tp_strong": "1",
"fp_strong": "0",
"fn_strong": "2",
"precision_strong": "1.000000",
"recall_strong": "0.333333",
"f1_strong": "0.500000"
},
{
"tag": "standing",
"bundle": "scene_pose",
"needs_glossary": "0",
"support_pos": "2",
"support_neg": "8",
"tp_explicit": "2",
"fp_explicit": "4",
"fn_explicit": "0",
"precision_explicit": "0.333333",
"recall_explicit": "1.000000",
"f1_explicit": "0.500000",
"tp_strong": "2",
"fp_strong": "4",
"fn_strong": "0",
"precision_strong": "0.333333",
"recall_strong": "1.000000",
"f1_strong": "0.500000"
},
{
"tag": "biped",
"bundle": "body_type_presence",
"needs_glossary": "0",
"support_pos": "3",
"support_neg": "7",
"tp_explicit": "2",
"fp_explicit": "4",
"fn_explicit": "1",
"precision_explicit": "0.333333",
"recall_explicit": "0.666667",
"f1_explicit": "0.444444",
"tp_strong": "2",
"fp_strong": "4",
"fn_strong": "1",
"precision_strong": "0.333333",
"recall_strong": "0.666667",
"f1_strong": "0.444444"
}
],
"outputs": {
"csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_sanity10.csv",
"json": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_sanity10.json"
}
}