Spaces:
Running
Running
Food Desert commited on
Commit ·
a807f94
1
Parent(s): 0ed7e94
Add eval results for debugging
Browse files
data/eval_results/eval_caption_cogvlm_n10_seed123_20260214_061321.jsonl
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"_meta": true, "timestamp": "2026-02-14T06:13:21.058021", "n_samples": 10, "caption_field": "caption_cogvlm", "skip_rewrite": false, "allow_nsfw": false, "mode": "chunked_map_union", "chunk_size": 60, "per_phrase_k": 2, "temperature": 0.0, "shuffle": true, "seed": 123, "workers": 4, "min_why": "strong_implied", "expand_implications": true, "infer_structural": false, "n_errors": 0}
|
| 2 |
+
{"id": 3668157, "n_gt": 25, "n_retrieved": 193, "n_selected": 66, "n_implied": 11, "n_structural": 0, "ret_R": 0.44, "P": 0.1667, "R": 0.44, "F1": 0.2418, "leaf_P": 0.0833, "leaf_R": 0.25, "leaf_F1": 0.125, "n_leaf_sel": 48, "n_leaf_gt": 16, "ret_P": 0.057, "sel_given_ret": 1.0, "over_sel": 2.64, "why": {"explicit": 55}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1667, "gen_R": 0.44, "gen_F1": 0.2418, "missed": ["anthro", "clothed", "clothing", "dinosaur", "macro", "male", "musclegut", "nipples", "pecs", "reptile", "scalie", "slightly_chubby", "solo", "topless"], "extra": ["anime_eyes", "apocalypse", "back_muscles", "big_biceps", "big_muscles", "big_tail", "big_teeth", "black_body", "black_skin", "black_teeth", "breath_cloud", "building", "building_destruction", "burning_building", "city_background", "city_skyline", "cityscape", "damaged_wall", "dark_body", "dark_eyes", "dark_skin", "destroyed_building", "dragon", "elemental_creature", "elemental_dragon", "eyes", "fangs", "flexing_bicep", "glowing_nose", "glowing_tail", "grey_body", "grey_skin", "long_tail", "manly", "muscular", "muscular_arms", "orange_hands", "orange_pupils", "orange_sky", "pink_sky", "pupils", "quads", "rooftop", "scales", "scaly_tail", "sharp_fangs", "sharp_teeth", "skyscraper", "sunset", "tail", "teeth", "thick_tail", "triceps", "x_navel", "yellow_sky"], "extra_evidence": {"anime_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.479}, "apocalypse": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5272}, "back_muscles": {"source": "stage3", "why": "explicit", "retrieval_score": 0.669}, "big_biceps": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7399}, "big_muscles": {"source": "stage3", "why": "explicit", "retrieval_score": 0.679}, "big_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5532}, "big_teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5068}, "black_body": {"source": "implied"}, "black_skin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4839}, "black_teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4797}, "breath_cloud": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3725}, "building": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6785}, "building_destruction": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5437}, "burning_building": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6128}, "city_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5059}, "city_skyline": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5864}, "cityscape": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6011}, "damaged_wall": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5428}, "dark_body": {"source": "stage3", "why": "explicit", "retrieval_score": 0.557}, "dark_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4885}, "dark_skin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6441}, "destroyed_building": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6186}, "dragon": {"source": "implied"}, "elemental_creature": {"source": "implied"}, "elemental_dragon": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4587}, "eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8343}, "fangs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5089}, "flexing_bicep": {"source": "stage3", "why": "explicit", "retrieval_score": 0.662}, "glowing_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5131}, "glowing_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5044}, "grey_body": {"source": "implied"}, "grey_skin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4883}, "long_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6503}, "manly": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6695}, "muscular": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6518}, "muscular_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7442}, "orange_hands": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4826}, "orange_pupils": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4837}, "orange_sky": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5666}, "pink_sky": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5214}, "pupils": {"source": "implied"}, "quads": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6808}, "rooftop": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5072}, "scales": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4567}, "scaly_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5062}, "sharp_fangs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4981}, "sharp_teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6994}, "skyscraper": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5448}, "sunset": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7047}, "tail": {"source": "implied"}, "teeth": {"source": "implied"}, "thick_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5192}, "triceps": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7052}, "x_navel": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5071}, "yellow_sky": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4803}}, "structural": [], "t1": 3.05, "t2": 7.51, "t3": 30.15, "t3s": 0.0, "err": null}
|
| 3 |
+
{"id": 1829842, "n_gt": 7, "n_retrieved": 130, "n_selected": 32, "n_implied": 10, "n_structural": 0, "ret_R": 0.7143, "P": 0.1562, "R": 0.7143, "F1": 0.2564, "leaf_P": 0.1765, "leaf_R": 0.5, "leaf_F1": 0.2609, "n_leaf_sel": 17, "n_leaf_gt": 6, "ret_P": 0.0385, "sel_given_ret": 1.0, "over_sel": 4.57, "why": {"explicit": 20, "strong_implied": 3}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1562, "gen_R": 0.7143, "gen_F1": 0.2564, "missed": ["solo", "text"], "extra": ["2_horns", "antlers", "brown_antlers", "cellphone", "cervine", "clothing", "deer", "eyebrows", "footwear", "grey_lips", "high_heels", "holding_cellphone", "holding_object", "holding_phone", "lips", "looking_at_object", "looking_at_phone", "mammal", "phone", "platform", "platform_footwear", "pupils", "smartphone", "white_eyebrows", "white_face", "white_nose", "white_pupils"], "extra_evidence": {"2_horns": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5276}, "antlers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5373}, "brown_antlers": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4847}, "cellphone": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5636}, "cervine": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4691}, "clothing": {"source": "implied"}, "deer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6352}, "eyebrows": {"source": "implied"}, "footwear": {"source": "implied"}, "grey_lips": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4089}, "high_heels": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3252}, "holding_cellphone": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5291}, "holding_object": {"source": "implied"}, "holding_phone": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5415}, "lips": {"source": "implied"}, "looking_at_object": {"source": "implied"}, "looking_at_phone": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.5355}, "mammal": {"source": "implied"}, "phone": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6286}, "platform": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7564}, "platform_footwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3547}, "pupils": {"source": "implied"}, "smartphone": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5473}, "white_eyebrows": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5113}, "white_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5152}, "white_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5231}, "white_pupils": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5627}}, "structural": [], "t1": 2.27, "t2": 7.29, "t3": 14.5, "t3s": 0.0, "err": null}
|
| 4 |
+
{"id": 3365122, "n_gt": 46, "n_retrieved": 196, "n_selected": 36, "n_implied": 11, "n_structural": 0, "ret_R": 0.413, "P": 0.3889, "R": 0.3043, "F1": 0.3415, "leaf_P": 0.2273, "leaf_R": 0.1515, "leaf_F1": 0.1818, "n_leaf_sel": 22, "n_leaf_gt": 33, "ret_P": 0.0969, "sel_given_ret": 0.7368, "over_sel": 0.78, "why": {"explicit": 26}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3889, "gen_R": 0.3043, "gen_F1": 0.3415, "missed": ["after_transformation", "anthro", "bedding", "blanket", "canid", "canine", "claws", "clothed", "dialogue", "drawer", "emanata", "eyebrows", "fox", "hair", "happy", "inside", "male", "mammal", "navel", "nipples", "solo", "teeth", "text", "text_box", "the_truth", "tongue", "topless", "topless_anthro", "topless_male", "tuft", "under_covers", "whiskers"], "extra": ["actual_fur", "blue_clothing", "blue_underwear", "boxers_(clothing)", "brown_body", "brown_fur", "comic", "grey_body", "grey_clothing", "grey_fur", "grey_underwear", "home", "orange_background", "pointy_speech_bubble", "silly_face", "sitting_on_bed", "sitting_up", "smiley_face", "sparkling_eyes", "speech_bubble", "stretching", "waking_up"], "extra_evidence": {"actual_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4633}, "blue_clothing": {"source": "implied"}, "blue_underwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7191}, "boxers_(clothing)": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4591}, "brown_body": {"source": "implied"}, "brown_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6465}, "comic": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3629}, "grey_body": {"source": "implied"}, "grey_clothing": {"source": "implied"}, "grey_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6454}, "grey_underwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6354}, "home": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3694}, "orange_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5352}, "pointy_speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5407}, "silly_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.379}, "sitting_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5096}, "sitting_up": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5936}, "smiley_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4685}, "sparkling_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3688}, "speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6608}, "stretching": {"source": "stage3", "why": "explicit", "retrieval_score": 0.655}, "waking_up": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4697}}, "structural": [], "t1": 2.71, "t2": 7.61, "t3": 32.22, "t3s": 0.0, "err": null}
|
| 5 |
+
{"id": 2092351, "n_gt": 16, "n_retrieved": 123, "n_selected": 26, "n_implied": 5, "n_structural": 0, "ret_R": 0.3125, "P": 0.1923, "R": 0.3125, "F1": 0.2381, "leaf_P": 0.0, "leaf_R": 0.0, "leaf_F1": 0.0, "n_leaf_sel": 18, "n_leaf_gt": 10, "ret_P": 0.0407, "sel_given_ret": 1.0, "over_sel": 1.62, "why": {"explicit": 22}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1923, "gen_R": 0.3125, "gen_F1": 0.2381, "missed": ["anthro", "belly", "fur", "humanoid_hands", "male", "overweight", "overweight_anthro", "overweight_male", "purple_fur", "solo", "text"], "extra": ["arrow_(weapon)", "bared_teeth", "battle", "bow_(weapon)", "chinese_text", "clenched_teeth", "curved_horn", "energy_sword", "exposed_teeth", "head_horn", "japanese", "melee_weapon", "punch", "purple_background", "ranged_weapon", "sharp_teeth", "standing", "sword", "teeth", "ursine", "wide_stance"], "extra_evidence": {"arrow_(weapon)": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6063}, "bared_teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4912}, "battle": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5047}, "bow_(weapon)": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5892}, "chinese_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6259}, "clenched_teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4756}, "curved_horn": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4193}, "energy_sword": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4495}, "exposed_teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3995}, "head_horn": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4201}, "japanese": {"source": "stage3", "why": "explicit", "retrieval_score": 0.646}, "melee_weapon": {"source": "implied"}, "punch": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4495}, "purple_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4838}, "ranged_weapon": {"source": "implied"}, "sharp_teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6358}, "standing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4868}, "sword": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5955}, "teeth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6424}, "ursine": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5156}, "wide_stance": {"source": "stage3", "why": "explicit", "retrieval_score": 0.448}}, "structural": [], "t1": 1.71, "t2": 7.97, "t3": 24.84, "t3s": 0.0, "err": null}
|
| 6 |
+
{"id": 2533053, "n_gt": 56, "n_retrieved": 165, "n_selected": 42, "n_implied": 16, "n_structural": 0, "ret_R": 0.2321, "P": 0.2857, "R": 0.2143, "F1": 0.2449, "leaf_P": 0.087, "leaf_R": 0.0571, "leaf_F1": 0.069, "n_leaf_sel": 23, "n_leaf_gt": 35, "ret_P": 0.0788, "sel_given_ret": 0.9231, "over_sel": 0.75, "why": {"explicit": 28}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2857, "gen_R": 0.2182, "gen_F1": 0.2474, "missed": ["anthro", "baggy_clothing", "biped", "bird's-eye_view", "blep", "blush", "brown_body", "brown_fur", "clothed", "eyelashes", "female", "female_anthro", "flat_chested", "fur", "grass", "headdress", "high-angle_view", "ice_cream_cone", "inner_ear_fluff", "kemono", "lagomorph", "long_ears", "looking_at_viewer", "made_in_abyss", "mammal", "monotone_hair", "multicolored_body", "multicolored_fur", "narehate", "on_back", "plant", "solo", "tan_body", "tan_fur", "tongue", "tongue_out", "topless", "tuft", "two_tone_body", "two_tone_fur", "two_tone_tail", "whiskers", "white_hair", "yellow_eyes"], "extra": ["black_and_white", "black_and_white_and_red", "candy", "cargo_shorts", "chocolate", "crop_top", "dairy_products", "ears", "eating", "eating_food", "eyes_closed", "eyes_mostly_closed", "green_background", "grey_bottomwear", "grey_clothing", "grey_pants", "half-closed_eyes", "holding_ice_cream", "lying_on_ground", "monochrome", "narrowed_eyes", "red_and_white", "shirt", "shorts", "small_nose", "topwear", "undershirt", "whipped_cream", "yellow_and_white", "yellow_background"], "extra_evidence": {"black_and_white": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4356}, "black_and_white_and_red": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4012}, "candy": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5316}, "cargo_shorts": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5058}, "chocolate": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5124}, "crop_top": {"source": "stage3", "why": "explicit", "retrieval_score": 0.528}, "dairy_products": {"source": "implied"}, "ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9371}, "eating": {"source": "implied"}, "eating_food": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5122}, "eyes_closed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6108}, "eyes_mostly_closed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5336}, "green_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6649}, "grey_bottomwear": {"source": "implied"}, "grey_clothing": {"source": "implied"}, "grey_pants": {"source": "stage3", "why": "explicit", "retrieval_score": 0.736}, "half-closed_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4546}, "holding_ice_cream": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8408}, "lying_on_ground": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4701}, "monochrome": {"source": "implied"}, "narrowed_eyes": {"source": "implied"}, "red_and_white": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4465}, "shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5148}, "shorts": {"source": "implied"}, "small_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7186}, "topwear": {"source": "implied"}, "undershirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5201}, "whipped_cream": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4387}, "yellow_and_white": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3809}, "yellow_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6637}}, "structural": [], "t1": 2.02, "t2": 1.33, "t3": 10.66, "t3s": 0.0, "err": null}
|
| 7 |
+
{"id": 2082421, "n_gt": 37, "n_retrieved": 178, "n_selected": 53, "n_implied": 11, "n_structural": 0, "ret_R": 0.5135, "P": 0.3019, "R": 0.4324, "F1": 0.3556, "leaf_P": 0.1875, "leaf_R": 0.25, "leaf_F1": 0.2143, "n_leaf_sel": 32, "n_leaf_gt": 24, "ret_P": 0.1067, "sel_given_ret": 0.8421, "over_sel": 1.43, "why": {"explicit": 42}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.3019, "gen_R": 0.4571, "gen_F1": 0.3636, "missed": ["alcohol", "black_sclera", "blush", "can", "dress", "drunk", "featureless_feet", "food", "generation_8_pokemon", "hatterene", "humanoid", "nintendo", "nude", "pillow", "pokemon", "pokemon_(species)", "smile", "solo", "substance_intoxication", "watching_tv", "what"], "extra": ["beer_bottle", "beer_mug", "bottle", "coffee_cup", "coffee_table", "comic", "controller", "cup", "doujinshi", "featureless_eyes", "featureless_face", "food_container", "glass", "glass_container", "grey_sclera", "hand_holding", "holding_container", "holding_cup", "holding_mug", "in_container", "japanese", "japanese_text", "lying", "lying_on_bed", "magician", "manga", "mug", "on_bed", "stage_curtains", "table", "tablet_computer", "translated", "tv_remote", "window_curtains", "wood", "wood_furniture", "wood_table"], "extra_evidence": {"beer_bottle": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3832}, "beer_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4237}, "bottle": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4167}, "coffee_cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4262}, "coffee_table": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4487}, "comic": {"source": "implied"}, "controller": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3717}, "cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5166}, "doujinshi": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5714}, "featureless_eyes": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5103}, "featureless_face": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4493}, "food_container": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4256}, "glass": {"source": "implied"}, "glass_container": {"source": "stage3", "why": "explicit", "retrieval_score": 0.435}, "grey_sclera": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4209}, "hand_holding": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4085}, "holding_container": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4912}, "holding_cup": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4824}, "holding_mug": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4498}, "in_container": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4958}, "japanese": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6127}, "japanese_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7469}, "lying": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5491}, "lying_on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4269}, "magician": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3587}, "manga": {"source": "stage3", "why": "explicit", "retrieval_score": 0.9418}, "mug": {"source": "implied"}, "on_bed": {"source": "implied"}, "stage_curtains": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4156}, "table": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5769}, "tablet_computer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3709}, "translated": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7321}, "tv_remote": {"source": "stage3", "why": "explicit", "retrieval_score": 0.3939}, "window_curtains": {"source": "stage3", "why": "explicit", "retrieval_score": 0.45}, "wood": {"source": "implied"}, "wood_furniture": {"source": "implied"}, "wood_table": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4428}}, "structural": [], "t1": 1.1, "t2": 1.51, "t3": 16.57, "t3s": 0.0, "err": null}
|
| 8 |
+
{"id": 1495502, "n_gt": 24, "n_retrieved": 175, "n_selected": 56, "n_implied": 19, "n_structural": 0, "ret_R": 0.7083, "P": 0.1786, "R": 0.4167, "F1": 0.25, "leaf_P": 0.0571, "leaf_R": 0.1667, "leaf_F1": 0.0851, "n_leaf_sel": 35, "n_leaf_gt": 12, "ret_P": 0.0971, "sel_given_ret": 0.5882, "over_sel": 2.33, "why": {"explicit": 37, "strong_implied": 2}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1852, "gen_R": 0.4167, "gen_F1": 0.2564, "missed": ["abs", "biceps", "big_muscles", "felid", "fur", "grey_body", "grey_fur", "male", "muscular_male", "pantherine", "pecs", "solo", "stripes", "tiger"], "extra": ["abandoned_building", "artist_logo", "artist_name", "bat", "battery", "big_arms", "big_forearms", "big_legs", "black_inner_ear_fluff", "burning_building", "chest_tuft", "digitigrade", "forearms", "generation_6_pokemon", "gold_earring", "grey_clothing", "grey_shirt", "grey_tail", "grey_tank_top", "grey_topwear", "hands_on_legs", "hands_on_own_legs", "highlights_(coloring)", "hind_legs", "holding_both_legs", "inner_ear_fluff", "legs_together", "logo", "long_arms", "long_hair", "nintendo", "noibat", "outstretched_arms", "pokemon", "pokemon_(species)", "red_ears", "red_inner_ear", "ring", "road", "street_lamp", "thick_thighs", "tuft", "tufted_ears", "watermark", "white_highlights", "wingless_bat"], "extra_evidence": {"abandoned_building": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5302}, "artist_logo": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5716}, "artist_name": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5924}, "bat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4828}, "battery": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4338}, "big_arms": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6709}, "big_forearms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5513}, "big_legs": {"source": "stage3", "why": "strong_implied", "retrieval_score": 0.6137}, "black_inner_ear_fluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5386}, "burning_building": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5107}, "chest_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5089}, "digitigrade": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5234}, "forearms": {"source": "implied"}, "generation_6_pokemon": {"source": "implied"}, "gold_earring": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6264}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "implied"}, "grey_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5074}, "grey_tank_top": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8713}, "grey_topwear": {"source": "implied"}, "hands_on_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5216}, "hands_on_own_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5299}, "highlights_(coloring)": {"source": "implied"}, "hind_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5433}, "holding_both_legs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5104}, "inner_ear_fluff": {"source": "implied"}, "legs_together": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5365}, "logo": {"source": "implied"}, "long_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5184}, "long_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5126}, "nintendo": {"source": "implied"}, "noibat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4172}, "outstretched_arms": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5233}, "pokemon": {"source": "implied"}, "pokemon_(species)": {"source": "implied"}, "red_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5749}, "red_inner_ear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5539}, "ring": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5509}, "road": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5381}, "street_lamp": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5255}, "thick_thighs": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4986}, "tuft": {"source": "implied"}, "tufted_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5466}, "watermark": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7417}, "white_highlights": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5314}, "wingless_bat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4327}}, "structural": [], "t1": 2.1, "t2": 1.77, "t3": 36.02, "t3s": 0.0, "err": null}
|
| 9 |
+
{"id": 1889828, "n_gt": 51, "n_retrieved": 206, "n_selected": 93, "n_implied": 27, "n_structural": 0, "ret_R": 0.1176, "P": 0.1183, "R": 0.2157, "F1": 0.1528, "leaf_P": 0.0328, "leaf_R": 0.0667, "leaf_F1": 0.044, "n_leaf_sel": 61, "n_leaf_gt": 30, "ret_P": 0.0291, "sel_given_ret": 1.8333, "over_sel": 1.82, "why": {"explicit": 66}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1183, "gen_R": 0.2245, "gen_F1": 0.1549, "missed": ["avian", "bag", "beverage", "bird", "canid", "canine", "charmeleon", "close-up", "eeveelution", "espeon", "eyewear", "female", "food", "generation_1_pokemon", "generation_2_pokemon", "generation_7_pokemon", "glasses", "hat", "inside", "knife", "light", "lycanroc", "male", "mammal", "midnight_lycanroc", "money", "nintendo", "open_mouth", "penguin", "pokemon", "pokemon_(species)", "reptile", "scalie", "semi-anthro", "sunlight", "text", "tired", "tongue", "upset", "yellow_beak"], "extra": ["accessory", "angry_expression", "annoyed_expression", "back_to_back", "baking", "bar_counter", "bar_stool", "bartender", "bartending", "bed", "blue_beak", "cookware", "counter", "cutlery", "doorframe", "doorway", "ears", "ears_front", "elemental_creature", "english_text", "expressionless", "expressions", "fluffy_ears", "fluffy_fur", "fur", "gesture", "grey_beak", "hair_accessory", "hairclip", "hand_gesture", "inner_ear_fluff", "irregular_speech_bubble", "lava", "lava_creature", "looking_back", "mineral_fauna", "multicolored_body", "multicolored_fur", "neck_tuft", "on_bed", "open_door", "opening_door", "orange_cheeks", "orange_clothing", "orange_headwear", "orange_nose", "orange_shirt", "orange_topwear", "pale_fur", "pillow", "pink_ears", "plate", "plushie", "pointing", "pointing_at_viewer", "rear_view", "red_beak", "red_body", "red_fur", "red_inner_ear_fluff", "red_pillow", "rock", "rock_creature", "serving", "shirt", "small_butt", "sneaking", "speech_bubble", "sphere_creature", "stalking", "sweatdrop_(iconography)", "topwear", "tuft", "two_tone_body", "two_tone_fur", "waving", "waving_hand", "wavy_mouth", "white_body", "white_fur", "white_inner_ear_fluff", "white_speech_bubble"], "extra_evidence": {"accessory": {"source": "implied"}, "angry_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5991}, "annoyed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5779}, "back_to_back": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5633}, "baking": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5924}, "bar_counter": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5335}, "bar_stool": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4861}, "bartender": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5778}, "bartending": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5291}, "bed": {"source": "implied"}, "blue_beak": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6548}, "cookware": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6466}, "counter": {"source": "implied"}, "cutlery": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6555}, "doorframe": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6316}, "doorway": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7163}, "ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8694}, "ears_front": {"source": "stage3", "why": "explicit", "retrieval_score": 0.651}, "elemental_creature": {"source": "implied"}, "english_text": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5055}, "expressionless": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6833}, "expressions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6498}, "fluffy_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5895}, "fluffy_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6168}, "fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7015}, "gesture": {"source": "implied"}, "grey_beak": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6389}, "hair_accessory": {"source": "implied"}, "hairclip": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4998}, "hand_gesture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5856}, "inner_ear_fluff": {"source": "implied"}, "irregular_speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5016}, "lava": {"source": "implied"}, "lava_creature": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4861}, "looking_back": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4715}, "mineral_fauna": {"source": "implied"}, "multicolored_body": {"source": "implied"}, "multicolored_fur": {"source": "implied"}, "neck_tuft": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5694}, "on_bed": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5857}, "open_door": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5685}, "opening_door": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6124}, "orange_cheeks": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5848}, "orange_clothing": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6387}, "orange_headwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6322}, "orange_nose": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6205}, "orange_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5887}, "orange_topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.646}, "pale_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6}, "pillow": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5954}, "pink_ears": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7222}, "plate": {"source": "stage3", "why": "explicit", "retrieval_score": 0.842}, "plushie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4498}, "pointing": {"source": "implied"}, "pointing_at_viewer": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6403}, "rear_view": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5057}, "red_beak": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6359}, "red_body": {"source": "implied"}, "red_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.713}, "red_inner_ear_fluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5994}, "red_pillow": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5757}, "rock": {"source": "implied"}, "rock_creature": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5094}, "serving": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5309}, "shirt": {"source": "implied"}, "small_butt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4948}, "sneaking": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4789}, "speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5811}, "sphere_creature": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5053}, "stalking": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5025}, "sweatdrop_(iconography)": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5752}, "topwear": {"source": "implied"}, "tuft": {"source": "implied"}, "two_tone_body": {"source": "implied"}, "two_tone_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5831}, "waving": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6302}, "waving_hand": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5875}, "wavy_mouth": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5649}, "white_body": {"source": "implied"}, "white_fur": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5864}, "white_inner_ear_fluff": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7156}, "white_speech_bubble": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5266}}, "structural": [], "t1": 3.37, "t2": 2.22, "t3": 30.09, "t3s": 0.0, "err": null}
|
| 10 |
+
{"id": 3546942, "n_gt": 23, "n_retrieved": 188, "n_selected": 68, "n_implied": 32, "n_structural": 0, "ret_R": 0.3913, "P": 0.1912, "R": 0.5652, "F1": 0.2857, "leaf_P": 0.037, "leaf_R": 0.0909, "leaf_F1": 0.0526, "n_leaf_sel": 27, "n_leaf_gt": 11, "ret_P": 0.0479, "sel_given_ret": 1.4444, "over_sel": 2.96, "why": {"explicit": 37}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.1912, "gen_R": 0.5652, "gen_F1": 0.2857, "missed": ["3_toes", "4_fingers", "anthro", "countershading", "feet", "fingers", "green_body", "male", "solo", "toes"], "extra": ["beanie", "black_clothing", "black_headwear", "black_shirt", "black_topwear", "blue_bottomwear", "blue_clothing", "blue_hat", "blue_headwear", "blue_jeans", "blue_pants", "blue_shirt", "blue_topwear", "chain_jewelry", "confident", "curled_tail", "curved_tail", "dark_background", "denim", "denim_clothing", "electricity", "gold_(metal)", "gold_jewelry", "gold_necklace", "grey_clothing", "grey_shirt", "grey_topwear", "holding_clothing", "holding_hat", "holding_headgear", "holding_headwear", "holding_object", "jacket", "jeans", "jewelry", "lightning", "lightning_bolt", "logo", "long_tail", "necklace", "pendant", "posing_for_picture", "purple_clothing", "purple_hat", "purple_headwear", "red_background", "red_clothing", "red_hat", "red_headwear", "red_jacket", "red_topwear", "short_jeans", "sitting", "sitting_on_ground", "tail"], "extra_evidence": {"beanie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5451}, "black_clothing": {"source": "implied"}, "black_headwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.602}, "black_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7758}, "black_topwear": {"source": "implied"}, "blue_bottomwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6925}, "blue_clothing": {"source": "implied"}, "blue_hat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6054}, "blue_headwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6006}, "blue_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.8088}, "blue_pants": {"source": "implied"}, "blue_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6424}, "blue_topwear": {"source": "implied"}, "chain_jewelry": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4529}, "confident": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5649}, "curled_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7}, "curved_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.531}, "dark_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6408}, "denim": {"source": "implied"}, "denim_clothing": {"source": "implied"}, "electricity": {"source": "implied"}, "gold_(metal)": {"source": "implied"}, "gold_jewelry": {"source": "implied"}, "gold_necklace": {"source": "stage3", "why": "explicit", "retrieval_score": 0.526}, "grey_clothing": {"source": "implied"}, "grey_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6978}, "grey_topwear": {"source": "implied"}, "holding_clothing": {"source": "implied"}, "holding_hat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5479}, "holding_headgear": {"source": "implied"}, "holding_headwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5492}, "holding_object": {"source": "implied"}, "jacket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7547}, "jeans": {"source": "implied"}, "jewelry": {"source": "implied"}, "lightning": {"source": "stage3", "why": "explicit", "retrieval_score": 0.506}, "lightning_bolt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6741}, "logo": {"source": "stage3", "why": "explicit", "retrieval_score": 0.719}, "long_tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5346}, "necklace": {"source": "implied"}, "pendant": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7015}, "posing_for_picture": {"source": "stage3", "why": "explicit", "retrieval_score": 0.508}, "purple_clothing": {"source": "implied"}, "purple_hat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.794}, "purple_headwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.735}, "red_background": {"source": "stage3", "why": "explicit", "retrieval_score": 0.564}, "red_clothing": {"source": "implied"}, "red_hat": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5509}, "red_headwear": {"source": "implied"}, "red_jacket": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7814}, "red_topwear": {"source": "implied"}, "short_jeans": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5643}, "sitting": {"source": "implied"}, "sitting_on_ground": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5183}, "tail": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6498}}, "structural": [], "t1": 1.28, "t2": 1.95, "t3": 35.3, "t3s": 0.0, "err": null}
|
| 11 |
+
{"id": 64566, "n_gt": 18, "n_retrieved": 142, "n_selected": 35, "n_implied": 9, "n_structural": 0, "ret_R": 0.5, "P": 0.2857, "R": 0.5556, "F1": 0.3774, "leaf_P": 0.1538, "leaf_R": 0.3077, "leaf_F1": 0.2051, "n_leaf_sel": 26, "n_leaf_gt": 13, "ret_P": 0.0634, "sel_given_ret": 1.1111, "over_sel": 1.94, "why": {"explicit": 26}, "n_gt_char": 0, "n_sel_char": 0, "char_F1": 1.0, "gen_P": 0.2857, "gen_R": 0.5556, "gen_F1": 0.3774, "missed": ["feral", "fingerless_(marking)", "male", "mostly_nude", "on_front", "solo", "toeless_(marking)", "water"], "extra": ["black_necktie", "closed_(disambiguation)", "clothing", "commentary", "cupping_chin", "dreadlocks", "dress_shirt", "glasses", "holding_chin", "humor", "multiple_versions", "parody", "pool", "poolside", "punk_hair", "reaction_image", "red_clothing", "red_necktie", "red_topwear", "relaxed_expression", "shirt", "smug_expression", "suit", "toony_expression", "topwear"], "extra_evidence": {"black_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5222}, "closed_(disambiguation)": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6327}, "clothing": {"source": "implied"}, "commentary": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5224}, "cupping_chin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4538}, "dreadlocks": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4276}, "dress_shirt": {"source": "stage3", "why": "explicit", "retrieval_score": 0.508}, "glasses": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5057}, "holding_chin": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4397}, "humor": {"source": "stage3", "why": "explicit", "retrieval_score": 0.6965}, "multiple_versions": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5052}, "parody": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5053}, "pool": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4554}, "poolside": {"source": "stage3", "why": "explicit", "retrieval_score": 0.49}, "punk_hair": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4219}, "reaction_image": {"source": "stage3", "why": "explicit", "retrieval_score": 0.4912}, "red_clothing": {"source": "implied"}, "red_necktie": {"source": "stage3", "why": "explicit", "retrieval_score": 0.7033}, "red_topwear": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5236}, "relaxed_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5686}, "shirt": {"source": "implied"}, "smug_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5225}, "suit": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5023}, "toony_expression": {"source": "stage3", "why": "explicit", "retrieval_score": 0.5146}, "topwear": {"source": "implied"}}, "structural": [], "t1": 1.41, "t2": 1.15, "t3": 34.21, "t3s": 0.0, "err": null}
|
eval_analysis.txt
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Loaded 10 evaluation results from data\eval_results\eval_caption_cogvlm_n10_seed123_20260214_061321.jsonl
|
| 2 |
+
|
| 3 |
+
================================================================================
|
| 4 |
+
CRITICAL CATEGORIES
|
| 5 |
+
================================================================================
|
| 6 |
+
|
| 7 |
+
Body type (body_type)
|
| 8 |
+
Constraint: multi
|
| 9 |
+
Ground truth tags: 0
|
| 10 |
+
Precision: 0.000
|
| 11 |
+
Recall: 0.000
|
| 12 |
+
F1: 0.000
|
| 13 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 14 |
+
|
| 15 |
+
How many (count)
|
| 16 |
+
Constraint: exactly_one
|
| 17 |
+
Ground truth tags: 0
|
| 18 |
+
Accuracy: 1.000
|
| 19 |
+
Precision: 0.000
|
| 20 |
+
Recall: 0.000
|
| 21 |
+
F1: 0.000
|
| 22 |
+
(TP=0, FP=0, FN=0, TN=50)
|
| 23 |
+
|
| 24 |
+
Species (species)
|
| 25 |
+
Constraint: multi
|
| 26 |
+
Ground truth tags: 0
|
| 27 |
+
Precision: 0.000
|
| 28 |
+
Recall: 0.000
|
| 29 |
+
F1: 0.000
|
| 30 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 31 |
+
|
| 32 |
+
================================================================================
|
| 33 |
+
IMPORTANT CATEGORIES
|
| 34 |
+
================================================================================
|
| 35 |
+
|
| 36 |
+
Clothing (clothing)
|
| 37 |
+
Constraint: multi
|
| 38 |
+
Ground truth tags: 0
|
| 39 |
+
Precision: 0.000
|
| 40 |
+
Recall: 0.000
|
| 41 |
+
F1: 0.000
|
| 42 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 43 |
+
|
| 44 |
+
Sex/gender (gender)
|
| 45 |
+
Constraint: multi
|
| 46 |
+
Ground truth tags: 0
|
| 47 |
+
Precision: 0.000
|
| 48 |
+
Recall: 0.000
|
| 49 |
+
F1: 0.000
|
| 50 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 51 |
+
|
| 52 |
+
Location (location)
|
| 53 |
+
Constraint: multi
|
| 54 |
+
Ground truth tags: 0
|
| 55 |
+
Precision: 0.000
|
| 56 |
+
Recall: 0.000
|
| 57 |
+
F1: 0.000
|
| 58 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 59 |
+
|
| 60 |
+
Perspective (perspective)
|
| 61 |
+
Constraint: multi
|
| 62 |
+
Ground truth tags: 0
|
| 63 |
+
Precision: 0.000
|
| 64 |
+
Recall: 0.000
|
| 65 |
+
F1: 0.000
|
| 66 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 67 |
+
|
| 68 |
+
Posture (posture)
|
| 69 |
+
Constraint: multi
|
| 70 |
+
Ground truth tags: 0
|
| 71 |
+
Precision: 0.000
|
| 72 |
+
Recall: 0.000
|
| 73 |
+
F1: 0.000
|
| 74 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 75 |
+
|
| 76 |
+
================================================================================
|
| 77 |
+
NICE-TO-HAVE CATEGORIES
|
| 78 |
+
================================================================================
|
| 79 |
+
|
| 80 |
+
Body decor (body_decor)
|
| 81 |
+
Constraint: multi
|
| 82 |
+
Ground truth tags: 0
|
| 83 |
+
Precision: 0.000
|
| 84 |
+
Recall: 0.000
|
| 85 |
+
F1: 0.000
|
| 86 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 87 |
+
|
| 88 |
+
Breasts (breasts)
|
| 89 |
+
Constraint: multi
|
| 90 |
+
Ground truth tags: 0
|
| 91 |
+
Precision: 0.000
|
| 92 |
+
Recall: 0.000
|
| 93 |
+
F1: 0.000
|
| 94 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 95 |
+
|
| 96 |
+
Expression (expression)
|
| 97 |
+
Constraint: multi
|
| 98 |
+
Ground truth tags: 0
|
| 99 |
+
Precision: 0.000
|
| 100 |
+
Recall: 0.000
|
| 101 |
+
F1: 0.000
|
| 102 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 103 |
+
|
| 104 |
+
Fur style (fur_style)
|
| 105 |
+
Constraint: multi
|
| 106 |
+
Ground truth tags: 0
|
| 107 |
+
Precision: 0.000
|
| 108 |
+
Recall: 0.000
|
| 109 |
+
F1: 0.000
|
| 110 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 111 |
+
|
| 112 |
+
Gaze (gaze)
|
| 113 |
+
Constraint: multi
|
| 114 |
+
Ground truth tags: 0
|
| 115 |
+
Precision: 0.000
|
| 116 |
+
Recall: 0.000
|
| 117 |
+
F1: 0.000
|
| 118 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 119 |
+
|
| 120 |
+
General activity (if any) (general_activity_if_any)
|
| 121 |
+
Constraint: multi
|
| 122 |
+
Ground truth tags: 0
|
| 123 |
+
Precision: 0.000
|
| 124 |
+
Recall: 0.000
|
| 125 |
+
F1: 0.000
|
| 126 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 127 |
+
|
| 128 |
+
Hair (hair)
|
| 129 |
+
Constraint: multi
|
| 130 |
+
Ground truth tags: 0
|
| 131 |
+
Precision: 0.000
|
| 132 |
+
Recall: 0.000
|
| 133 |
+
F1: 0.000
|
| 134 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 135 |
+
|
| 136 |
+
Limbs (limbs)
|
| 137 |
+
Constraint: multi
|
| 138 |
+
Ground truth tags: 0
|
| 139 |
+
Precision: 0.000
|
| 140 |
+
Recall: 0.000
|
| 141 |
+
F1: 0.000
|
| 142 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 143 |
+
|
| 144 |
+
================================================================================
|
| 145 |
+
META CATEGORIES
|
| 146 |
+
================================================================================
|
| 147 |
+
|
| 148 |
+
Information (information)
|
| 149 |
+
Constraint: multi
|
| 150 |
+
Ground truth tags: 0
|
| 151 |
+
Precision: 0.000
|
| 152 |
+
Recall: 0.000
|
| 153 |
+
F1: 0.000
|
| 154 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 155 |
+
|
| 156 |
+
Picture organization (organization)
|
| 157 |
+
Constraint: multi
|
| 158 |
+
Ground truth tags: 0
|
| 159 |
+
Precision: 0.000
|
| 160 |
+
Recall: 0.000
|
| 161 |
+
F1: 0.000
|
| 162 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 163 |
+
|
| 164 |
+
Quality/medium (quality)
|
| 165 |
+
Constraint: multi
|
| 166 |
+
Ground truth tags: 0
|
| 167 |
+
Precision: 0.000
|
| 168 |
+
Recall: 0.000
|
| 169 |
+
F1: 0.000
|
| 170 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 171 |
+
|
| 172 |
+
Requests (requests)
|
| 173 |
+
Constraint: multi
|
| 174 |
+
Ground truth tags: 0
|
| 175 |
+
Precision: 0.000
|
| 176 |
+
Recall: 0.000
|
| 177 |
+
F1: 0.000
|
| 178 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 179 |
+
|
| 180 |
+
Image size (resolution)
|
| 181 |
+
Constraint: multi
|
| 182 |
+
Ground truth tags: 0
|
| 183 |
+
Precision: 0.000
|
| 184 |
+
Recall: 0.000
|
| 185 |
+
F1: 0.000
|
| 186 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 187 |
+
|
| 188 |
+
Style (style)
|
| 189 |
+
Constraint: multi
|
| 190 |
+
Ground truth tags: 0
|
| 191 |
+
Precision: 0.000
|
| 192 |
+
Recall: 0.000
|
| 193 |
+
F1: 0.000
|
| 194 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 195 |
+
|
| 196 |
+
Text and languages (text)
|
| 197 |
+
Constraint: multi
|
| 198 |
+
Ground truth tags: 0
|
| 199 |
+
Precision: 0.000
|
| 200 |
+
Recall: 0.000
|
| 201 |
+
F1: 0.000
|
| 202 |
+
(TP=0, FP=0, FN=0, TN=0)
|
| 203 |
+
|
| 204 |
+
================================================================================
|
| 205 |
+
SUMMARY
|
| 206 |
+
================================================================================
|
| 207 |
+
|
| 208 |
+
CRITICAL:
|
| 209 |
+
Total GT tags: 0
|
| 210 |
+
Micro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 211 |
+
Macro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 212 |
+
|
| 213 |
+
IMPORTANT:
|
| 214 |
+
Total GT tags: 0
|
| 215 |
+
Micro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 216 |
+
Macro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 217 |
+
|
| 218 |
+
NICE-TO-HAVE:
|
| 219 |
+
Total GT tags: 0
|
| 220 |
+
Micro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 221 |
+
Macro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 222 |
+
|
| 223 |
+
META:
|
| 224 |
+
Total GT tags: 0
|
| 225 |
+
Micro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 226 |
+
Macro-avg P/R/F1: 0.000 / 0.000 / 0.000
|
| 227 |
+
|
| 228 |
+
================================================================================
|