{ "cells": [ { "cell_type": "code", "execution_count": 53, "id": "d3a1f52b", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "from dotenv import load_dotenv\n", "from pathlib import Path\n", "import json\n", "\n", "load_dotenv()" ] }, { "cell_type": "code", "execution_count": 54, "id": "bca20bcc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ImageIdEncodedPixelsHeightWidthClassIdAttributesIds
000000663ed1ff0c4e0132b9b9ac53f6e6068157 7 6073371 20 6078584 34 6083797 48 608...521436766115,136,143,154,230,295,316,317
100000663ed1ff0c4e0132b9b9ac53f6e6323163 11 6328356 32 6333549 53 6338742 75 63...521436760115,136,142,146,225,295,316,317
200000663ed1ff0c4e0132b9b9ac53f6e8521389 10 8526585 30 8531789 42 8537002 46 85...5214367628163
300000663ed1ff0c4e0132b9b9ac53f6e12903854 2 12909064 7 12914275 10 12919485 15 ...5214367631160,204
400000663ed1ff0c4e0132b9b9ac53f6e10837337 5 10842542 14 10847746 24 10852951 33...5214367632219
\n", "
" ], "text/plain": [ " ImageId \\\n", "0 00000663ed1ff0c4e0132b9b9ac53f6e \n", "1 00000663ed1ff0c4e0132b9b9ac53f6e \n", "2 00000663ed1ff0c4e0132b9b9ac53f6e \n", "3 00000663ed1ff0c4e0132b9b9ac53f6e \n", "4 00000663ed1ff0c4e0132b9b9ac53f6e \n", "\n", " EncodedPixels Height Width ClassId \\\n", "0 6068157 7 6073371 20 6078584 34 6083797 48 608... 5214 3676 6 \n", "1 6323163 11 6328356 32 6333549 53 6338742 75 63... 5214 3676 0 \n", "2 8521389 10 8526585 30 8531789 42 8537002 46 85... 5214 3676 28 \n", "3 12903854 2 12909064 7 12914275 10 12919485 15 ... 5214 3676 31 \n", "4 10837337 5 10842542 14 10847746 24 10852951 33... 5214 3676 32 \n", "\n", " AttributesIds \n", "0 115,136,143,154,230,295,316,317 \n", "1 115,136,142,146,225,295,316,317 \n", "2 163 \n", "3 160,204 \n", "4 219 " ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Get project root (one level up from notebooks/ if running from notebooks directory)\n", "current_dir = Path.cwd()\n", "PROJECT_ROOT = current_dir.parent if current_dir.name == \"notebooks\" else current_dir\n", "DATA_PATH = PROJECT_ROOT / \"data\"\n", "\n", "fashion_df = pd.read_csv(DATA_PATH / \"train.csv\")\n", "fashion_df.head()\n" ] }, { "cell_type": "code", "execution_count": 55, "id": "58f4f7b4", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamesupercategorylevel
00shirt, blouseupperbody2
11top, t-shirt, sweatshirtupperbody2
22sweaterupperbody2
33cardiganupperbody2
44jacketupperbody2
55vestupperbody2
66pantslowerbody2
77shortslowerbody2
88skirtlowerbody2
99coatwholebody2
1010dresswholebody2
1111jumpsuitwholebody2
1212capewholebody2
1313glasseshead2
1414hathead2
1515headband, head covering, hair accessoryhead2
1616tieneck2
1717glovearms and hands2
1818watcharms and hands2
1919beltwaist2
2020leg warmerlegs and feet2
2121tights, stockingslegs and feet2
2222socklegs and feet2
2323shoelegs and feet2
2424bag, walletothers2
2525scarfothers2
2626umbrellaothers2
2727hoodgarment parts2
2828collargarment parts2
2929lapelgarment parts2
3030epaulettegarment parts2
3131sleevegarment parts2
3232pocketgarment parts2
3333necklinegarment parts2
3434buckleclosures2
3535zipperclosures2
3636appliquedecorations2
3737beaddecorations2
3838bowdecorations2
3939flowerdecorations2
4040fringedecorations2
4141ribbondecorations2
4242rivetdecorations2
4343ruffledecorations2
4444sequindecorations2
4545tasseldecorations2
\n", "
" ], "text/plain": [ " id name supercategory level\n", "0 0 shirt, blouse upperbody 2\n", "1 1 top, t-shirt, sweatshirt upperbody 2\n", "2 2 sweater upperbody 2\n", "3 3 cardigan upperbody 2\n", "4 4 jacket upperbody 2\n", "5 5 vest upperbody 2\n", "6 6 pants lowerbody 2\n", "7 7 shorts lowerbody 2\n", "8 8 skirt lowerbody 2\n", "9 9 coat wholebody 2\n", "10 10 dress wholebody 2\n", "11 11 jumpsuit wholebody 2\n", "12 12 cape wholebody 2\n", "13 13 glasses head 2\n", "14 14 hat head 2\n", "15 15 headband, head covering, hair accessory head 2\n", "16 16 tie neck 2\n", "17 17 glove arms and hands 2\n", "18 18 watch arms and hands 2\n", "19 19 belt waist 2\n", "20 20 leg warmer legs and feet 2\n", "21 21 tights, stockings legs and feet 2\n", "22 22 sock legs and feet 2\n", "23 23 shoe legs and feet 2\n", "24 24 bag, wallet others 2\n", "25 25 scarf others 2\n", "26 26 umbrella others 2\n", "27 27 hood garment parts 2\n", "28 28 collar garment parts 2\n", "29 29 lapel garment parts 2\n", "30 30 epaulette garment parts 2\n", "31 31 sleeve garment parts 2\n", "32 32 pocket garment parts 2\n", "33 33 neckline garment parts 2\n", "34 34 buckle closures 2\n", "35 35 zipper closures 2\n", "36 36 applique decorations 2\n", "37 37 bead decorations 2\n", "38 38 bow decorations 2\n", "39 39 flower decorations 2\n", "40 40 fringe decorations 2\n", "41 41 ribbon decorations 2\n", "42 42 rivet decorations 2\n", "43 43 ruffle decorations 2\n", "44 44 sequin decorations 2\n", "45 45 tassel decorations 2" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "label_descriptions = json.load(open(DATA_PATH / \"label_descriptions.json\"))\n", "\n", "categories_df = pd.DataFrame(label_descriptions[\"categories\"])\n", "categories_df" ] }, { "cell_type": "code", "execution_count": 56, "id": "48d7ab2b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamesupercategorylevel
00classic (t-shirt)nickname1
11polo (shirt)nickname1
22undershirtnickname1
33henley (shirt)nickname1
44ringer (t-shirt)nickname1
...............
289336peacockanimal2
290337zebraanimal2
291338giraffeanimal2
292339toile de jouytextile pattern1
293340planttextile pattern1
\n", "

294 rows × 4 columns

\n", "
" ], "text/plain": [ " id name supercategory level\n", "0 0 classic (t-shirt) nickname 1\n", "1 1 polo (shirt) nickname 1\n", "2 2 undershirt nickname 1\n", "3 3 henley (shirt) nickname 1\n", "4 4 ringer (t-shirt) nickname 1\n", ".. ... ... ... ...\n", "289 336 peacock animal 2\n", "290 337 zebra animal 2\n", "291 338 giraffe animal 2\n", "292 339 toile de jouy textile pattern 1\n", "293 340 plant textile pattern 1\n", "\n", "[294 rows x 4 columns]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "attributes_df = pd.DataFrame(label_descriptions[\"attributes\"])\n", "attributes_df\n" ] }, { "cell_type": "code", "execution_count": 57, "id": "b178ce00", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Found 0 potentially SUGGESTIVE categories\n", "Found 4 potentially SUGGESTIVE attributes\n", "\n", "============================================================\n", "POTENTIALLY SUGGESTIVE CATEGORIES:\n", "============================================================\n", "Empty DataFrame\n", "Columns: [id, name, supercategory]\n", "Index: []\n", "\n", "============================================================\n", "POTENTIALLY SUGGESTIVE ATTRIBUTES:\n", "============================================================\n", " id name supercategory\n", " 51 booty (shorts) nickname\n", "106 bodycon (dress) nickname\n", "148 micro (length) length\n", "192 plunging (neckline) neckline type\n" ] } ], "source": [ "# ULTRA-RESTRICTIVE criteria for UNCERTAIN SUGGESTIVE content\n", "# Only items that are clearly and unambiguously SUGGESTIVE/revealing\n", "# Removed: crop tops, halter tops, tube tops, mini length, tight fit, etc. - too many false positives\n", "\n", "# Only the most obviously SUGGESTIVE items\n", "revealing_keywords = {\n", " 'categories': [\n", " # Only inherently revealing categories - removed most as they had false positives\n", " # 'booty', # removed - checking attribute instead\n", " # 'bodycon', # removed - checking attribute instead\n", " ],\n", " 'attributes': [\n", " # ONLY the most clearly SUGGESTIVE items\n", " 'booty (shorts)', # very specific and clearly SUGGESTIVE\n", " 'bodycon (dress)', # form-fitting, often revealing\n", " # Removed: crop (top), halter (top), tube (top), camisole, slip (dress) - too many false positives\n", " ]\n", "}\n", "\n", "# Only the most revealing patterns\n", "revealing_patterns = {\n", " 'length': [\n", " # Removed: mini (length) - too broad, many modest mini skirts\n", " 'micro (length)', # only very short - but still might have false positives\n", " ],\n", " 'neckline type': [\n", " 'plunging (neckline)', # only the most revealing neckline\n", " # Removed: off-the-shoulder, one shoulder - can be modest\n", " ],\n", " 'silhouette': [\n", " # Removed: tight (fit) - way too broad, many normal clothes are tight\n", " ],\n", " 'nickname': [\n", " # Only the most clearly SUGGESTIVE\n", " 'booty (shorts)',\n", " 'bodycon (dress)',\n", " ]\n", "}\n", "\n", "def is_potentially_SUGGESTIVE(name: str, supercategory: str = None) -> bool:\n", " \"\"\"Check if a category or attribute name suggests potentially SUGGESTIVE content.\n", " ULTRA-RESTRICTIVE: Only matches clearly SUGGESTIVE items to avoid false positives.\"\"\"\n", " name_lower = name.lower()\n", " \n", " # Special case: check for \"booty\" in any context (shorts) - very specific\n", " if 'booty' in name_lower:\n", " return True\n", " \n", " # Check for revealing patterns by supercategory - very restrictive\n", " if supercategory:\n", " if supercategory == 'length':\n", " # Only micro length (very short) - removed mini as too broad\n", " if 'micro (length)' in name_lower:\n", " return True\n", " elif supercategory == 'neckline type':\n", " # Only plunging neckline - most revealing\n", " if 'plunging (neckline)' in name_lower:\n", " return True\n", " elif supercategory == 'nickname':\n", " # Only the most clearly SUGGESTIVE styles\n", " for pattern in revealing_patterns['nickname']:\n", " if pattern.lower() in name_lower:\n", " return True\n", " \n", " # Check categories for inherently revealing items\n", " for keyword in revealing_keywords['categories']:\n", " if keyword.lower() in name_lower:\n", " return True\n", " \n", " # Check attributes for inherently revealing items\n", " for keyword in revealing_keywords['attributes']:\n", " if keyword.lower() in name_lower:\n", " return True\n", " \n", " return False\n", "\n", "# Filter categories\n", "SUGGESTIVE_categories = categories_df[\n", " categories_df['name'].apply(lambda x: is_potentially_SUGGESTIVE(x))\n", "].copy()\n", "\n", "# Filter attributes\n", "SUGGESTIVE_attributes = attributes_df[\n", " attributes_df.apply(lambda row: is_potentially_SUGGESTIVE(row['name'], row['supercategory']), axis=1)\n", "].copy()\n", "\n", "print(f\"Found {len(SUGGESTIVE_categories)} potentially SUGGESTIVE categories\")\n", "print(f\"Found {len(SUGGESTIVE_attributes)} potentially SUGGESTIVE attributes\")\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"POTENTIALLY SUGGESTIVE CATEGORIES:\")\n", "print(\"=\"*60)\n", "print(SUGGESTIVE_categories[['id', 'name', 'supercategory']].to_string(index=False))\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"POTENTIALLY SUGGESTIVE ATTRIBUTES:\")\n", "print(\"=\"*60)\n", "print(SUGGESTIVE_attributes[['id', 'name', 'supercategory']].to_string(index=False))\n" ] }, { "cell_type": "code", "execution_count": 58, "id": "7ff15a1c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "BREAKDOWN BY SUPERCATEGORY:\n", "============================================================\n", "\n", "Attributes by supercategory:\n", "supercategory\n", "nickname 2\n", "length 1\n", "neckline type 1\n", "dtype: int64\n", "\n", "Detailed attribute breakdown:\n", "\n", "nickname:\n", " - booty (shorts) (id: 51)\n", " - bodycon (dress) (id: 106)\n", "\n", "length:\n", " - micro (length) (id: 148)\n", "\n", "neckline type:\n", " - plunging (neckline) (id: 192)\n", "\n", "============================================================\n", "SUMMARY DATAFRAME (for export):\n", "============================================================\n", " type id name supercategory\n", "0 attribute 51 booty (shorts) nickname\n", "1 attribute 106 bodycon (dress) nickname\n", "2 attribute 148 micro (length) length\n", "3 attribute 192 plunging (neckline) neckline type\n" ] } ], "source": [ "# Create a detailed breakdown by supercategory\n", "print(\"=\"*60)\n", "print(\"BREAKDOWN BY SUPERCATEGORY:\")\n", "print(\"=\"*60)\n", "\n", "if len(SUGGESTIVE_attributes) > 0:\n", " print(\"\\nAttributes by supercategory:\")\n", " print(SUGGESTIVE_attributes.groupby('supercategory').size().sort_values(ascending=False))\n", " \n", " print(\"\\nDetailed attribute breakdown:\")\n", " for supercat in SUGGESTIVE_attributes['supercategory'].unique():\n", " print(f\"\\n{supercat}:\")\n", " subset = SUGGESTIVE_attributes[SUGGESTIVE_attributes['supercategory'] == supercat]\n", " for _, row in subset.iterrows():\n", " print(f\" - {row['name']} (id: {row['id']})\")\n", "\n", "# Create summary DataFrames for export\n", "SUGGESTIVE_summary = {\n", " 'type': ['category'] * len(SUGGESTIVE_categories) + ['attribute'] * len(SUGGESTIVE_attributes),\n", " 'id': list(SUGGESTIVE_categories['id']) + list(SUGGESTIVE_attributes['id']),\n", " 'name': list(SUGGESTIVE_categories['name']) + list(SUGGESTIVE_attributes['name']),\n", " 'supercategory': list(SUGGESTIVE_categories['supercategory']) + list(SUGGESTIVE_attributes['supercategory'])\n", "}\n", "\n", "SUGGESTIVE_summary_df = pd.DataFrame(SUGGESTIVE_summary)\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"SUMMARY DATAFRAME (for export):\")\n", "print(\"=\"*60)\n", "print(SUGGESTIVE_summary_df)\n", "\n", "# Optionally save to CSV\n", "# SUGGESTIVE_summary_df.to_csv(DATA_PATH / \"SUGGESTIVE_labels.csv\", index=False)\n" ] }, { "cell_type": "code", "execution_count": 59, "id": "a46d616c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SUGGESTIVE category IDs: set()\n", "SUGGESTIVE attribute IDs: {192, 106, 51, 148}\n", "\n", "Total unique SUGGESTIVE category IDs: 0\n", "Total unique SUGGESTIVE attribute IDs: 4\n", "\n", "============================================================\n", "FILTERING RESULTS:\n", "============================================================\n", "Total images in fashion_df: 333401\n", "Images with SUGGESTIVE content: 5218\n", "Percentage: 1.57%\n", "\n", "Breakdown:\n", " - Matched by category only: 0\n", " - Matched by attribute only: 5218\n", " - Matched by both: 0\n", "\n", "============================================================\n", "SAMPLE OF SUGGESTIVE IMAGES (first 10 rows):\n", "============================================================\n", " ImageId ClassId \\\n", "49 000b3a87508b0fa185fbd53ecbe2e4c6 33 \n", "147 001a66b16b12f12dc45e2bba40e04683 10 \n", "180 00211c06b1fe730097dde122cd4d3f8c 7 \n", "304 003ae3da258f7ba7267af5f159dd3502 10 \n", "369 0048f6c47de85cc4dc263912bd0ff6f5 33 \n", "372 0048f6c47de85cc4dc263912bd0ff6f5 7 \n", "445 005380bd939eb68085af3f804d387824 10 \n", "456 0054564ae183ad9a1b152eef0bc11e1d 10 \n", "465 0055347a114b215f8f469fec9e38c272 10 \n", "526 005e9b75edcee7d655c390ea5416641d 33 \n", "\n", " AttributesIds \n", "49 192 \n", "147 106,115,127,142,149,229,295,316 \n", "180 50,115,136,142,148,230,295,300,317 \n", "304 106,127,141,150,295,316,317 \n", "369 192 \n", "372 50,115,136,142,148,317 \n", "445 106,114,127,142,150,229,295,311,317 \n", "456 106,115,127,142,149,229,295,316,317 \n", "465 106,115,127,142,149,229,295,316,317 \n", "526 192 \n" ] } ], "source": [ "# Get IDs of SUGGESTIVE categories and attributes\n", "SUGGESTIVE_category_ids = set(SUGGESTIVE_categories['id'].tolist())\n", "SUGGESTIVE_attribute_ids = set(SUGGESTIVE_attributes['id'].tolist())\n", "\n", "print(f\"SUGGESTIVE category IDs: {SUGGESTIVE_category_ids}\")\n", "print(f\"SUGGESTIVE attribute IDs: {SUGGESTIVE_attribute_ids}\")\n", "print(f\"\\nTotal unique SUGGESTIVE category IDs: {len(SUGGESTIVE_category_ids)}\")\n", "print(f\"Total unique SUGGESTIVE attribute IDs: {len(SUGGESTIVE_attribute_ids)}\")\n", "\n", "# Function to check if AttributesIds string contains any SUGGESTIVE attribute ID\n", "def has_SUGGESTIVE_attribute(attributes_str: str) -> bool:\n", " \"\"\"Check if the comma-separated attributes string contains any SUGGESTIVE attribute ID.\"\"\"\n", " if pd.isna(attributes_str) or attributes_str == '':\n", " return False\n", " # Parse comma-separated string and convert to integers\n", " try:\n", " attr_ids = [int(x.strip()) for x in str(attributes_str).split(',')]\n", " return bool(SUGGESTIVE_attribute_ids.intersection(set(attr_ids)))\n", " except (ValueError, AttributeError):\n", " return False\n", "\n", "# Filter fashion_df for SUGGESTIVE images\n", "# An image is SUGGESTIVE if:\n", "# 1. Its ClassId matches a SUGGESTIVE category, OR\n", "# 2. Its AttributesIds contains any SUGGESTIVE attribute ID\n", "\n", "SUGGESTIVE_mask = (\n", " fashion_df['ClassId'].isin(SUGGESTIVE_category_ids) |\n", " fashion_df['AttributesIds'].apply(has_SUGGESTIVE_attribute)\n", ")\n", "\n", "SUGGESTIVE_fashion_df = fashion_df[SUGGESTIVE_mask].copy()\n", "\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"FILTERING RESULTS:\")\n", "print(\"=\"*60)\n", "print(f\"Total images in fashion_df: {len(fashion_df)}\")\n", "print(f\"Images with SUGGESTIVE content: {len(SUGGESTIVE_fashion_df)}\")\n", "print(f\"Percentage: {len(SUGGESTIVE_fashion_df) / len(fashion_df) * 100:.2f}%\")\n", "\n", "# Show breakdown by type of match\n", "category_matches = fashion_df['ClassId'].isin(SUGGESTIVE_category_ids).sum()\n", "attribute_matches = fashion_df['AttributesIds'].apply(has_SUGGESTIVE_attribute).sum()\n", "both_matches = ((fashion_df['ClassId'].isin(SUGGESTIVE_category_ids)) & \n", " (fashion_df['AttributesIds'].apply(has_SUGGESTIVE_attribute))).sum()\n", "\n", "print(f\"\\nBreakdown:\")\n", "print(f\" - Matched by category only: {category_matches - both_matches}\")\n", "print(f\" - Matched by attribute only: {attribute_matches - both_matches}\")\n", "print(f\" - Matched by both: {both_matches}\")\n", "\n", "# Show sample of SUGGESTIVE images\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"SAMPLE OF SUGGESTIVE IMAGES (first 10 rows):\")\n", "print(\"=\"*60)\n", "print(SUGGESTIVE_fashion_df[['ImageId', 'ClassId', 'AttributesIds']].head(10))\n" ] }, { "cell_type": "code", "execution_count": 60, "id": "0e1b67fa", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "DIAGNOSTIC: BREAKDOWN OF MATCHES\n", "============================================================\n", "\n", "1. Matches by Category (ClassId):\n", " No category matches\n", "\n", "2. Matches by Attribute (AttributesIds):\n", " - micro (length) (id: 148): 3190 matches\n", " - bodycon (dress) (id: 106): 1144 matches\n", " - plunging (neckline) (id: 192): 966 matches\n", " - booty (shorts) (id: 51): 20 matches\n", "\n", "3. Sample rows with their matched attributes/categories:\n", " (First 5 rows showing ImageId, ClassId, and matched attributes)\n", "\n", " ImageId: 000b3a87508b0fa185fbd53ecbe2e4c6\n", " ClassId: 33 -> neckline\n", " Matched Attributes: ['plunging (neckline)']\n", "\n", " ImageId: 001a66b16b12f12dc45e2bba40e04683\n", " ClassId: 10 -> dress\n", " Matched Attributes: ['bodycon (dress)']\n", "\n", " ImageId: 00211c06b1fe730097dde122cd4d3f8c\n", " ClassId: 7 -> shorts\n", " Matched Attributes: ['micro (length)']\n", "\n", " ImageId: 003ae3da258f7ba7267af5f159dd3502\n", " ClassId: 10 -> dress\n", " Matched Attributes: ['bodycon (dress)']\n", "\n", " ImageId: 0048f6c47de85cc4dc263912bd0ff6f5\n", " ClassId: 33 -> neckline\n", " Matched Attributes: ['plunging (neckline)']\n" ] } ], "source": [ "# DIAGNOSTIC: Show what's actually being matched\n", "# This helps identify which attributes/categories are causing matches\n", "\n", "print(\"=\"*60)\n", "print(\"DIAGNOSTIC: BREAKDOWN OF MATCHES\")\n", "print(\"=\"*60)\n", "\n", "# Create a mapping of attribute IDs to names\n", "attr_id_to_name = dict(zip(attributes_df['id'], attributes_df['name']))\n", "cat_id_to_name = dict(zip(categories_df['id'], categories_df['name']))\n", "\n", "# Analyze what's matching in the SUGGESTIVE_fashion_df\n", "print(\"\\n1. Matches by Category (ClassId):\")\n", "category_matches = SUGGESTIVE_fashion_df[SUGGESTIVE_fashion_df['ClassId'].isin(SUGGESTIVE_category_ids)]\n", "if len(category_matches) > 0:\n", " cat_counts = category_matches['ClassId'].value_counts()\n", " for cat_id, count in cat_counts.items():\n", " cat_name = cat_id_to_name.get(cat_id, f\"Unknown (id: {cat_id})\")\n", " print(f\" - {cat_name} (id: {cat_id}): {count} matches\")\n", "else:\n", " print(\" No category matches\")\n", "\n", "print(\"\\n2. Matches by Attribute (AttributesIds):\")\n", "# Find which attributes are matching\n", "matching_attributes = {}\n", "for idx, row in SUGGESTIVE_fashion_df.iterrows():\n", " if pd.notna(row['AttributesIds']) and row['AttributesIds'] != '':\n", " try:\n", " attr_ids = [int(x.strip()) for x in str(row['AttributesIds']).split(',')]\n", " matching_attr_ids = SUGGESTIVE_attribute_ids.intersection(set(attr_ids))\n", " for attr_id in matching_attr_ids:\n", " matching_attributes[attr_id] = matching_attributes.get(attr_id, 0) + 1\n", " except:\n", " pass\n", "\n", "if matching_attributes:\n", " for attr_id, count in sorted(matching_attributes.items(), key=lambda x: x[1], reverse=True):\n", " attr_name = attr_id_to_name.get(attr_id, f\"Unknown (id: {attr_id})\")\n", " print(f\" - {attr_name} (id: {attr_id}): {count} matches\")\n", "else:\n", " print(\" No attribute matches\")\n", "\n", "print(\"\\n3. Sample rows with their matched attributes/categories:\")\n", "print(\" (First 5 rows showing ImageId, ClassId, and matched attributes)\")\n", "for idx, row in SUGGESTIVE_fashion_df.head(5).iterrows():\n", " print(f\"\\n ImageId: {row['ImageId']}\")\n", " print(f\" ClassId: {row['ClassId']} -> {cat_id_to_name.get(row['ClassId'], 'Unknown')}\")\n", " if pd.notna(row['AttributesIds']) and row['AttributesIds'] != '':\n", " try:\n", " attr_ids = [int(x.strip()) for x in str(row['AttributesIds']).split(',')]\n", " matching_attr_ids = SUGGESTIVE_attribute_ids.intersection(set(attr_ids))\n", " if matching_attr_ids:\n", " print(f\" Matched Attributes: {[attr_id_to_name.get(aid, f'id:{aid}') for aid in matching_attr_ids]}\")\n", " except:\n", " pass\n" ] }, { "cell_type": "code", "execution_count": 61, "id": "aaaf424a", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "UNIQUE IMAGE ANALYSIS:\n", "============================================================\n", "Total unique images in dataset: 45623\n", "Unique images with SUGGESTIVE content: 5079\n", "Percentage of unique images: 11.13%\n", "\n", "Average annotations per SUGGESTIVE image: 1.03\n", "Max annotations for a single image: 4\n", "Min annotations for a single image: 1\n", "\n", "============================================================\n", "DISTRIBUTION OF SUGGESTIVE CATEGORIES IN FILTERED DATA:\n", "============================================================\n", "ClassId\n", "10 1502\n", "33 965\n", "7 857\n", "4 684\n", "1 322\n", "0 317\n", "9 215\n", "8 123\n", "3 83\n", "2 79\n", "11 51\n", "5 11\n", "12 8\n", "37 1\n", "Name: count, dtype: int64\n", "\n", "Top SUGGESTIVE categories by count:\n", " - dress (id: 10): 1502 annotations\n", " - neckline (id: 33): 965 annotations\n", " - shorts (id: 7): 857 annotations\n", " - jacket (id: 4): 684 annotations\n", " - top, t-shirt, sweatshirt (id: 1): 322 annotations\n", " - shirt, blouse (id: 0): 317 annotations\n", " - coat (id: 9): 215 annotations\n", " - skirt (id: 8): 123 annotations\n", " - cardigan (id: 3): 83 annotations\n", " - sweater (id: 2): 79 annotations\n" ] } ], "source": [ "# Get unique image IDs (since same image can have multiple annotations)\n", "unique_SUGGESTIVE_image_ids = SUGGESTIVE_fashion_df['ImageId'].unique()\n", "unique_total_image_ids = fashion_df['ImageId'].unique()\n", "\n", "print(\"=\"*60)\n", "print(\"UNIQUE IMAGE ANALYSIS:\")\n", "print(\"=\"*60)\n", "print(f\"Total unique images in dataset: {len(unique_total_image_ids)}\")\n", "print(f\"Unique images with SUGGESTIVE content: {len(unique_SUGGESTIVE_image_ids)}\")\n", "print(f\"Percentage of unique images: {len(unique_SUGGESTIVE_image_ids) / len(unique_total_image_ids) * 100:.2f}%\")\n", "\n", "# Count how many annotations per SUGGESTIVE image\n", "annotations_per_image = SUGGESTIVE_fashion_df.groupby('ImageId').size().sort_values(ascending=False)\n", "print(f\"\\nAverage annotations per SUGGESTIVE image: {annotations_per_image.mean():.2f}\")\n", "print(f\"Max annotations for a single image: {annotations_per_image.max()}\")\n", "print(f\"Min annotations for a single image: {annotations_per_image.min()}\")\n", "\n", "# Show distribution of SUGGESTIVE categories in the filtered data\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"DISTRIBUTION OF SUGGESTIVE CATEGORIES IN FILTERED DATA:\")\n", "print(\"=\"*60)\n", "category_counts = SUGGESTIVE_fashion_df['ClassId'].value_counts()\n", "print(category_counts)\n", "\n", "# Map category IDs to names for better readability\n", "category_id_to_name = dict(zip(categories_df['id'], categories_df['name']))\n", "print(\"\\nTop SUGGESTIVE categories by count:\")\n", "for cat_id, count in category_counts.head(10).items():\n", " cat_name = category_id_to_name.get(cat_id, f\"Unknown (id: {cat_id})\")\n", " print(f\" - {cat_name} (id: {cat_id}): {count} annotations\")\n", "\n", "# Save the filtered DataFrame\n", "# SUGGESTIVE_fashion_df.to_csv(DATA_PATH / \"SUGGESTIVE_train.csv\", index=False)\n", "# pd.Series(unique_SUGGESTIVE_image_ids).to_csv(DATA_PATH / \"SUGGESTIVE_image_ids.csv\", index=False, header=['ImageId'])\n" ] }, { "cell_type": "code", "execution_count": 62, "id": "a4736cf0", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "============================================================\n", "CREATING NEW DATASET\n", "============================================================\n", "Total unique SUGGESTIVE image IDs: 5079\n", "\n", "Created folder: /Users/youniss/Documents/GitHub/haram-police/data/new_dataset\n", "\n", "Copying images from /Users/youniss/Documents/GitHub/haram-police/data/train and /Users/youniss/Documents/GitHub/haram-police/data/test...\n", "\n", "✓ Successfully copied: 5079 images\n", "\n", "✓ Saved DataFrame to: /Users/youniss/Documents/GitHub/haram-police/data/SUGGESTIVE_fashion.csv\n", " Total rows: 5218\n", "\n", "============================================================\n", "SUMMARY:\n", "============================================================\n", " - Images folder: /Users/youniss/Documents/GitHub/haram-police/data/new_dataset\n", " - Images copied: 5079\n", " - CSV file: /Users/youniss/Documents/GitHub/haram-police/data/SUGGESTIVE_fashion.csv\n", " - CSV rows: 5218\n", " - Unique images: 5079\n" ] } ], "source": [ "# Create new_dataset folder and copy all SUGGESTIVE images from train and test\n", "import shutil\n", "\n", "# Get unique image IDs from SUGGESTIVE_fashion_df\n", "unique_SUGGESTIVE_image_ids = set(SUGGESTIVE_fashion_df['ImageId'].unique())\n", "\n", "print(\"=\"*60)\n", "print(\"CREATING NEW DATASET\")\n", "print(\"=\"*60)\n", "print(f\"Total unique SUGGESTIVE image IDs: {len(unique_SUGGESTIVE_image_ids)}\")\n", "\n", "# Create new_dataset folder\n", "NEW_DATASET_PATH = DATA_PATH / \"new_dataset\"\n", "NEW_DATASET_PATH.mkdir(exist_ok=True)\n", "print(f\"\\nCreated folder: {NEW_DATASET_PATH}\")\n", "\n", "# Paths to source folders\n", "TRAIN_IMAGE_PATH = DATA_PATH / \"train\"\n", "TEST_IMAGE_PATH = DATA_PATH / \"test\"\n", "\n", "# Copy images from train and test folders\n", "copied_count = 0\n", "not_found_count = 0\n", "not_found_ids = []\n", "\n", "print(f\"\\nCopying images from {TRAIN_IMAGE_PATH} and {TEST_IMAGE_PATH}...\")\n", "\n", "for image_id in unique_SUGGESTIVE_image_ids:\n", " image_filename = f\"{image_id}.jpg\"\n", " source_path = None\n", " \n", " # Try train folder first\n", " train_path = TRAIN_IMAGE_PATH / image_filename\n", " if train_path.exists():\n", " source_path = train_path\n", " else:\n", " # Try test folder\n", " test_path = TEST_IMAGE_PATH / image_filename\n", " if test_path.exists():\n", " source_path = test_path\n", " \n", " if source_path:\n", " dest_path = NEW_DATASET_PATH / image_filename\n", " shutil.copy2(source_path, dest_path)\n", " copied_count += 1\n", " else:\n", " not_found_count += 1\n", " not_found_ids.append(image_id)\n", "\n", "print(f\"\\n✓ Successfully copied: {copied_count} images\")\n", "if not_found_count > 0:\n", " print(f\"⚠ Not found: {not_found_count} images\")\n", " print(f\" First 10 missing IDs: {not_found_ids[:10]}\")\n", "\n", "# Save the SUGGESTIVE_fashion_df to CSV\n", "csv_path = DATA_PATH / \"SUGGESTIVE_fashion.csv\"\n", "SUGGESTIVE_fashion_df.to_csv(csv_path, index=False)\n", "print(f\"\\n✓ Saved DataFrame to: {csv_path}\")\n", "print(f\" Total rows: {len(SUGGESTIVE_fashion_df)}\")\n", "\n", "print(\"\\n\" + \"=\"*60)\n", "print(\"SUMMARY:\")\n", "print(\"=\"*60)\n", "print(f\" - Images folder: {NEW_DATASET_PATH}\")\n", "print(f\" - Images copied: {copied_count}\")\n", "print(f\" - CSV file: {csv_path}\")\n", "print(f\" - CSV rows: {len(SUGGESTIVE_fashion_df)}\")\n", "print(f\" - Unique images: {len(unique_SUGGESTIVE_image_ids)}\")\n" ] }, { "cell_type": "code", "execution_count": 63, "id": "a4bdc53c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ImageIdEncodedPixelsHeightWidthClassIdAttributesIds
49000b3a87508b0fa185fbd53ecbe2e4c6457283 2 458562 6 459841 9 461120 13 462400 15...128085233192
147001a66b16b12f12dc45e2bba40e0468364049 3 64548 10 65048 17 65548 23 65754 36 66...50037510106,115,127,142,149,229,295,316
18000211c06b1fe730097dde122cd4d3f8c296470 1 297469 3 298468 5 299467 8 300466 10 ...1000665750,115,136,142,148,230,295,300,317
304003ae3da258f7ba7267af5f159dd3502129565 3 130583 9 131602 14 132621 19 133641 2...102468310106,127,141,150,295,316,317
3690048f6c47de85cc4dc263912bd0ff6f54777361 1 4781320 3 4785279 5 4789239 7 479319...3960264033192
3720048f6c47de85cc4dc263912bd0ff6f53982550 2 3986509 8 3990469 13 3994429 18 3998...39602640750,115,136,142,148,317
445005380bd939eb68085af3f804d3878242317673 15 2320644 45 2323624 67 2326613 79 23...3000200110106,114,127,142,150,229,295,311,317
4560054564ae183ad9a1b152eef0bc11e1d195071 2 196093 5 197115 8 198134 13 199151 20...102468310106,115,127,142,149,229,295,316,317
4650055347a114b215f8f469fec9e38c272236337 20 237832 26 239327 33 240823 38 242320...1500100010106,115,127,142,149,229,295,316,317
526005e9b75edcee7d655c390ea5416641d480863 2 481943 3 483023 4 484102 6 485182 7 4...1080108033192
593006bb85ca0935680110f4ce67d88b4ee2461619 6 2463717 17 2465814 24 2467912 23 247...2096300010106,115,127,142,149,295,316,317
802009447b79fce7da1ee19a54401517cde23802163 9 23807451 27 23812746 37 23818049 40...53047952750,115,136,142,148,230,295,298,317
88400af8f65bb93f4131499dc9807129a241313044 41 1315994 123 1318943 207 1321893 289...30002000417,115,135,145,148,225,281,311,317
120100f7d06a8db722b86961d911fb9f1d9654821 5 55316 16 55811 27 56307 37 56802 48 57...50037510106,115,127,142,151,229,283,311
121700f843a44365248e179ad2a4897349131690956 5 1693572 11 1696188 15 1698804 18 170...2617150033192
126001098396b79639e29db8de146c2d0064959078 8 962065 22 965051 38 968037 54 971024 ...30002000750,148,234,295,316,317
1274010db49ecc226102e63815fcf56273191205717 31 1208132 69 1210571 82 1213011 94 12...2448244810112,115,119,145,148,229,295,306,323,325
13110116a12304c7f94686978f86100076f332646 31 33110 78 33587 109 34065 136 34542 15...49235410106,115,127,142,150,229,295,316,317
1356011afec5e443599a79261ece1a662043629287 14 631022 16 632757 18 634492 21 636227...17371157417,148,225,281,311,317
1371011c59f7c25d18027f4f9b2b1cffd44a168345 3 169142 7 169938 12 170735 17 171531 2...80080010101,115,129,145,148,289,301,317
\n", "
" ], "text/plain": [ " ImageId \\\n", "49 000b3a87508b0fa185fbd53ecbe2e4c6 \n", "147 001a66b16b12f12dc45e2bba40e04683 \n", "180 00211c06b1fe730097dde122cd4d3f8c \n", "304 003ae3da258f7ba7267af5f159dd3502 \n", "369 0048f6c47de85cc4dc263912bd0ff6f5 \n", "372 0048f6c47de85cc4dc263912bd0ff6f5 \n", "445 005380bd939eb68085af3f804d387824 \n", "456 0054564ae183ad9a1b152eef0bc11e1d \n", "465 0055347a114b215f8f469fec9e38c272 \n", "526 005e9b75edcee7d655c390ea5416641d \n", "593 006bb85ca0935680110f4ce67d88b4ee \n", "802 009447b79fce7da1ee19a54401517cde \n", "884 00af8f65bb93f4131499dc9807129a24 \n", "1201 00f7d06a8db722b86961d911fb9f1d96 \n", "1217 00f843a44365248e179ad2a489734913 \n", "1260 01098396b79639e29db8de146c2d0064 \n", "1274 010db49ecc226102e63815fcf5627319 \n", "1311 0116a12304c7f94686978f86100076f3 \n", "1356 011afec5e443599a79261ece1a662043 \n", "1371 011c59f7c25d18027f4f9b2b1cffd44a \n", "\n", " EncodedPixels Height Width \\\n", "49 457283 2 458562 6 459841 9 461120 13 462400 15... 1280 852 \n", "147 64049 3 64548 10 65048 17 65548 23 65754 36 66... 500 375 \n", "180 296470 1 297469 3 298468 5 299467 8 300466 10 ... 1000 665 \n", "304 129565 3 130583 9 131602 14 132621 19 133641 2... 1024 683 \n", "369 4777361 1 4781320 3 4785279 5 4789239 7 479319... 3960 2640 \n", "372 3982550 2 3986509 8 3990469 13 3994429 18 3998... 3960 2640 \n", "445 2317673 15 2320644 45 2323624 67 2326613 79 23... 3000 2001 \n", "456 195071 2 196093 5 197115 8 198134 13 199151 20... 1024 683 \n", "465 236337 20 237832 26 239327 33 240823 38 242320... 1500 1000 \n", "526 480863 2 481943 3 483023 4 484102 6 485182 7 4... 1080 1080 \n", "593 2461619 6 2463717 17 2465814 24 2467912 23 247... 2096 3000 \n", "802 23802163 9 23807451 27 23812746 37 23818049 40... 5304 7952 \n", "884 1313044 41 1315994 123 1318943 207 1321893 289... 3000 2000 \n", "1201 54821 5 55316 16 55811 27 56307 37 56802 48 57... 500 375 \n", "1217 1690956 5 1693572 11 1696188 15 1698804 18 170... 2617 1500 \n", "1260 959078 8 962065 22 965051 38 968037 54 971024 ... 3000 2000 \n", "1274 1205717 31 1208132 69 1210571 82 1213011 94 12... 2448 2448 \n", "1311 32646 31 33110 78 33587 109 34065 136 34542 15... 492 354 \n", "1356 629287 14 631022 16 632757 18 634492 21 636227... 1737 1157 \n", "1371 168345 3 169142 7 169938 12 170735 17 171531 2... 800 800 \n", "\n", " ClassId AttributesIds \n", "49 33 192 \n", "147 10 106,115,127,142,149,229,295,316 \n", "180 7 50,115,136,142,148,230,295,300,317 \n", "304 10 106,127,141,150,295,316,317 \n", "369 33 192 \n", "372 7 50,115,136,142,148,317 \n", "445 10 106,114,127,142,150,229,295,311,317 \n", "456 10 106,115,127,142,149,229,295,316,317 \n", "465 10 106,115,127,142,149,229,295,316,317 \n", "526 33 192 \n", "593 10 106,115,127,142,149,295,316,317 \n", "802 7 50,115,136,142,148,230,295,298,317 \n", "884 4 17,115,135,145,148,225,281,311,317 \n", "1201 10 106,115,127,142,151,229,283,311 \n", "1217 33 192 \n", "1260 7 50,148,234,295,316,317 \n", "1274 10 112,115,119,145,148,229,295,306,323,325 \n", "1311 10 106,115,127,142,150,229,295,316,317 \n", "1356 4 17,148,225,281,311,317 \n", "1371 10 101,115,129,145,148,289,301,317 " ] }, "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ "SUGGESTIVE_fashion_df.head(20)" ] }, { "cell_type": "code", "execution_count": 64, "id": "a9e2133d", "metadata": {}, "outputs": [], "source": [ "# Show the \"EncodedPixels\" feature of the first SUGGESTIVE_fashion_df \n", "from PIL import Image\n", "\n", "# Get the first image ID from the SUGGESTIVE_fashion_df\n", "first_image_id = SUGGESTIVE_fashion_df.iloc[0]['ImageId']\n", "\n", "# Load the image\n", "image = Image.open((DATA_PATH / \"train\" / f\"{first_image_id}.jpg\"))\n", "image.show()" ] }, { "cell_type": "code", "execution_count": null, "id": "2d6351ab", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.5" } }, "nbformat": 4, "nbformat_minor": 5 }