{ "settings": { "n": 100, "seed": 42, "caption_field": "caption_cogvlm", "probe_count": 35, "retries": 2, "temperature": 0.0, "max_tokens": 900, "model_env": "meta-llama/llama-3.1-8b-instruct" }, "overall_metrics": { "explicit": { "tp": 333, "fp": 409, "fn": 281, "precision": 0.448787, "recall": 0.542345, "f1": 0.49115 }, "strong": { "tp": 348, "fp": 444, "fn": 266, "precision": 0.439394, "recall": 0.566775, "f1": 0.495021 } }, "diagnostics": { "samples_with_attempt_failures": 0, "samples_with_call_exhaustion": 0, "avg_attempt_failure_rate": 0.0, "avg_call_exhaustion_rate": 0.0 }, "top_tags_by_f1_strong": [ { "tag": "wide_hips", "bundle": "body_shape_breasts", "needs_glossary": "0", "support_pos": "1", "support_neg": "99", "tp_explicit": "1", "fp_explicit": "0", "fn_explicit": "0", "precision_explicit": "1.000000", "recall_explicit": "1.000000", "f1_explicit": "1.000000", "tp_strong": "1", "fp_strong": "0", "fn_strong": "0", "precision_strong": "1.000000", "recall_strong": "1.000000", "f1_strong": "1.000000" }, { "tag": "anthro", "bundle": "body_type_presence", "needs_glossary": "1", "support_pos": "68", "support_neg": "32", "tp_explicit": "63", "fp_explicit": "19", "fn_explicit": "5", "precision_explicit": "0.768293", "recall_explicit": "0.926471", "f1_explicit": "0.840000", "tp_strong": "67", "fp_strong": "25", "fn_strong": "1", "precision_strong": "0.728261", "recall_strong": "0.985294", "f1_strong": "0.837500" }, { "tag": "felid", "bundle": "species_taxonomy", "needs_glossary": "1", "support_pos": "18", "support_neg": "82", "tp_explicit": "11", "fp_explicit": "0", "fn_explicit": "7", "precision_explicit": "1.000000", "recall_explicit": "0.611111", "f1_explicit": "0.758621", "tp_strong": "12", "fp_strong": "1", "fn_strong": "6", "precision_strong": "0.923077", "recall_strong": "0.666667", "f1_strong": "0.774194" }, { "tag": "group", "bundle": "count_cardinality", "needs_glossary": "0", "support_pos": "16", "support_neg": "84", "tp_explicit": "10", "fp_explicit": "1", "fn_explicit": "6", "precision_explicit": "0.909091", "recall_explicit": "0.625000", "f1_explicit": "0.740741", "tp_strong": "10", "fp_strong": "1", "fn_strong": "6", "precision_strong": "0.909091", "recall_strong": "0.625000", "f1_strong": "0.740741" }, { "tag": "blush", "bundle": "gaze_expression", "needs_glossary": "0", "support_pos": "13", "support_neg": "87", "tp_explicit": "10", "fp_explicit": "2", "fn_explicit": "3", "precision_explicit": "0.833333", "recall_explicit": "0.769231", "f1_explicit": "0.800000", "tp_strong": "10", "fp_strong": "4", "fn_strong": "3", "precision_strong": "0.714286", "recall_strong": "0.769231", "f1_strong": "0.740741" }, { "tag": "clothing", "bundle": "clothing_state", "needs_glossary": "0", "support_pos": "59", "support_neg": "41", "tp_explicit": "42", "fp_explicit": "21", "fn_explicit": "17", "precision_explicit": "0.666667", "recall_explicit": "0.711864", "f1_explicit": "0.688525", "tp_strong": "42", "fp_strong": "21", "fn_strong": "17", "precision_strong": "0.666667", "recall_strong": "0.711864", "f1_strong": "0.688525" }, { "tag": "canid", "bundle": "species_taxonomy", "needs_glossary": "1", "support_pos": "37", "support_neg": "63", "tp_explicit": "21", "fp_explicit": "7", "fn_explicit": "16", "precision_explicit": "0.750000", "recall_explicit": "0.567568", "f1_explicit": "0.646154", "tp_strong": "24", "fp_strong": "11", "fn_strong": "13", "precision_strong": "0.685714", "recall_strong": "0.648649", "f1_strong": "0.666667" }, { "tag": "<3", "bundle": "text_symbols", "needs_glossary": "1", "support_pos": "6", "support_neg": "94", "tp_explicit": "3", "fp_explicit": "0", "fn_explicit": "3", "precision_explicit": "1.000000", "recall_explicit": "0.500000", "f1_explicit": "0.666667", "tp_strong": "3", "fp_strong": "0", "fn_strong": "3", "precision_strong": "1.000000", "recall_strong": "0.500000", "f1_strong": "0.666667" }, { "tag": "thick_thighs", "bundle": "body_shape_breasts", "needs_glossary": "0", "support_pos": "1", "support_neg": "99", "tp_explicit": "1", "fp_explicit": "1", "fn_explicit": "0", "precision_explicit": "0.500000", "recall_explicit": "1.000000", "f1_explicit": "0.666667", "tp_strong": "1", "fp_strong": "1", "fn_strong": "0", "precision_strong": "0.500000", "recall_strong": "1.000000", "f1_strong": "0.666667" }, { "tag": "bird", "bundle": "species_taxonomy", "needs_glossary": "0", "support_pos": "6", "support_neg": "94", "tp_explicit": "4", "fp_explicit": "3", "fn_explicit": "2", "precision_explicit": "0.571429", "recall_explicit": "0.666667", "f1_explicit": "0.615385", "tp_strong": "4", "fp_strong": "3", "fn_strong": "2", "precision_strong": "0.571429", "recall_strong": "0.666667", "f1_strong": "0.615385" }, { "tag": "bear", "bundle": "species_taxonomy", "needs_glossary": "0", "support_pos": "5", "support_neg": "95", "tp_explicit": "3", "fp_explicit": "4", "fn_explicit": "2", "precision_explicit": "0.428571", "recall_explicit": "0.600000", "f1_explicit": "0.500000", "tp_strong": "4", "fp_strong": "4", "fn_strong": "1", "precision_strong": "0.500000", "recall_strong": "0.800000", "f1_strong": "0.615385" }, { "tag": "text", "bundle": "text_symbols", "needs_glossary": "0", "support_pos": "23", "support_neg": "77", "tp_explicit": "15", "fp_explicit": "10", "fn_explicit": "8", "precision_explicit": "0.600000", "recall_explicit": "0.652174", "f1_explicit": "0.625000", "tp_strong": "15", "fp_strong": "11", "fn_strong": "8", "precision_strong": "0.576923", "recall_strong": "0.652174", "f1_strong": "0.612245" }, { "tag": "simple_background", "bundle": "scene_pose", "needs_glossary": "0", "support_pos": "27", "support_neg": "73", "tp_explicit": "15", "fp_explicit": "8", "fn_explicit": "12", "precision_explicit": "0.652174", "recall_explicit": "0.555556", "f1_explicit": "0.600000", "tp_strong": "15", "fp_strong": "8", "fn_strong": "12", "precision_strong": "0.652174", "recall_strong": "0.555556", "f1_strong": "0.600000" }, { "tag": "eyes_closed", "bundle": "gaze_expression", "needs_glossary": "0", "support_pos": "4", "support_neg": "96", "tp_explicit": "3", "fp_explicit": "3", "fn_explicit": "1", "precision_explicit": "0.500000", "recall_explicit": "0.750000", "f1_explicit": "0.600000", "tp_strong": "3", "fp_strong": "3", "fn_strong": "1", "precision_strong": "0.500000", "recall_strong": "0.750000", "f1_strong": "0.600000" }, { "tag": "duo", "bundle": "count_cardinality", "needs_glossary": "1", "support_pos": "20", "support_neg": "80", "tp_explicit": "11", "fp_explicit": "9", "fn_explicit": "9", "precision_explicit": "0.550000", "recall_explicit": "0.550000", "f1_explicit": "0.550000", "tp_strong": "12", "fp_strong": "9", "fn_strong": "8", "precision_strong": "0.571429", "recall_strong": "0.600000", "f1_strong": "0.585366" }, { "tag": "solo", "bundle": "count_cardinality", "needs_glossary": "1", "support_pos": "57", "support_neg": "43", "tp_explicit": "24", "fp_explicit": "3", "fn_explicit": "33", "precision_explicit": "0.888889", "recall_explicit": "0.421053", "f1_explicit": "0.571429", "tp_strong": "24", "fp_strong": "3", "fn_strong": "33", "precision_strong": "0.888889", "recall_strong": "0.421053", "f1_strong": "0.571429" }, { "tag": "dialogue", "bundle": "text_symbols", "needs_glossary": "0", "support_pos": "11", "support_neg": "89", "tp_explicit": "10", "fp_explicit": "14", "fn_explicit": "1", "precision_explicit": "0.416667", "recall_explicit": "0.909091", "f1_explicit": "0.571429", "tp_strong": "10", "fp_strong": "14", "fn_strong": "1", "precision_strong": "0.416667", "recall_strong": "0.909091", "f1_strong": "0.571429" }, { "tag": "clothed", "bundle": "clothing_state", "needs_glossary": "0", "support_pos": "32", "support_neg": "68", "tp_explicit": "29", "fp_explicit": "45", "fn_explicit": "3", "precision_explicit": "0.391892", "recall_explicit": "0.906250", "f1_explicit": "0.547170", "tp_strong": "29", "fp_strong": "45", "fn_strong": "3", "precision_strong": "0.391892", "recall_strong": "0.906250", "f1_strong": "0.547170" }, { "tag": "sitting", "bundle": "scene_pose", "needs_glossary": "0", "support_pos": "9", "support_neg": "91", "tp_explicit": "8", "fp_explicit": "15", "fn_explicit": "1", "precision_explicit": "0.347826", "recall_explicit": "0.888889", "f1_explicit": "0.500000", "tp_strong": "8", "fp_strong": "15", "fn_strong": "1", "precision_strong": "0.347826", "recall_strong": "0.888889", "f1_strong": "0.500000" }, { "tag": "outside", "bundle": "scene_pose", "needs_glossary": "0", "support_pos": "10", "support_neg": "90", "tp_explicit": "6", "fp_explicit": "13", "fn_explicit": "4", "precision_explicit": "0.315789", "recall_explicit": "0.600000", "f1_explicit": "0.413793", "tp_strong": "6", "fp_strong": "13", "fn_strong": "4", "precision_strong": "0.315789", "recall_strong": "0.600000", "f1_strong": "0.413793" } ], "outputs": { "csv": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_n100.csv", "json": "E:\\image\\backup\\Prompt_Squirrel_RAG\\data\\analysis\\probe_reliability_n100.json" } }