In [53]:
import pandas as pd
from dotenv import load_dotenv
from pathlib import Path
import json

load_dotenv()

True

In [54]:
# Get project root (one level up from notebooks/ if running from notebooks directory)
current_dir = Path.cwd()
PROJECT_ROOT = current_dir.parent if current_dir.name == "notebooks" else current_dir
DATA_PATH = PROJECT_ROOT / "data"

fashion_df = pd.read_csv(DATA_PATH / "train.csv")
fashion_df.head()


Unnamed: 0,ImageId,EncodedPixels,Height,Width,ClassId,AttributesIds
0,00000663ed1ff0c4e0132b9b9ac53f6e,6068157 7 6073371 20 6078584 34 6083797 48 608...,5214,3676,6,115136143154230295316317
1,00000663ed1ff0c4e0132b9b9ac53f6e,6323163 11 6328356 32 6333549 53 6338742 75 63...,5214,3676,0,115136142146225295316317
2,00000663ed1ff0c4e0132b9b9ac53f6e,8521389 10 8526585 30 8531789 42 8537002 46 85...,5214,3676,28,163
3,00000663ed1ff0c4e0132b9b9ac53f6e,12903854 2 12909064 7 12914275 10 12919485 15 ...,5214,3676,31,160204
4,00000663ed1ff0c4e0132b9b9ac53f6e,10837337 5 10842542 14 10847746 24 10852951 33...,5214,3676,32,219


In [55]:
label_descriptions = json.load(open(DATA_PATH / "label_descriptions.json"))

categories_df = pd.DataFrame(label_descriptions["categories"])
categories_df

Unnamed: 0,id,name,supercategory,level
0,0,"shirt, blouse",upperbody,2
1,1,"top, t-shirt, sweatshirt",upperbody,2
2,2,sweater,upperbody,2
3,3,cardigan,upperbody,2
4,4,jacket,upperbody,2
5,5,vest,upperbody,2
6,6,pants,lowerbody,2
7,7,shorts,lowerbody,2
8,8,skirt,lowerbody,2
9,9,coat,wholebody,2


In [56]:
attributes_df = pd.DataFrame(label_descriptions["attributes"])
attributes_df


Unnamed: 0,id,name,supercategory,level
0,0,classic (t-shirt),nickname,1
1,1,polo (shirt),nickname,1
2,2,undershirt,nickname,1
3,3,henley (shirt),nickname,1
4,4,ringer (t-shirt),nickname,1
...,...,...,...,...
289,336,peacock,animal,2
290,337,zebra,animal,2
291,338,giraffe,animal,2
292,339,toile de jouy,textile pattern,1


In [57]:
# ULTRA-RESTRICTIVE criteria for UNCERTAIN SUGGESTIVE content
# Only items that are clearly and unambiguously SUGGESTIVE/revealing
# Removed: crop tops, halter tops, tube tops, mini length, tight fit, etc. - too many false positives

# Only the most obviously SUGGESTIVE items
revealing_keywords = {
    'categories': [
        # Only inherently revealing categories - removed most as they had false positives
        # 'booty',  # removed - checking attribute instead
        # 'bodycon',  # removed - checking attribute instead
    ],
    'attributes': [
        # ONLY the most clearly SUGGESTIVE items
        'booty (shorts)',  # very specific and clearly SUGGESTIVE
        'bodycon (dress)',  # form-fitting, often revealing
        # Removed: crop (top), halter (top), tube (top), camisole, slip (dress) - too many false positives
    ]
}

# Only the most revealing patterns
revealing_patterns = {
    'length': [
        # Removed: mini (length) - too broad, many modest mini skirts
        'micro (length)',  # only very short - but still might have false positives
    ],
    'neckline type': [
        'plunging (neckline)',  # only the most revealing neckline
        # Removed: off-the-shoulder, one shoulder - can be modest
    ],
    'silhouette': [
        # Removed: tight (fit) - way too broad, many normal clothes are tight
    ],
    'nickname': [
        # Only the most clearly SUGGESTIVE
        'booty (shorts)',
        'bodycon (dress)',
    ]
}

def is_potentially_SUGGESTIVE(name: str, supercategory: str = None) -> bool:
    """Check if a category or attribute name suggests potentially SUGGESTIVE content.
    ULTRA-RESTRICTIVE: Only matches clearly SUGGESTIVE items to avoid false positives."""
    name_lower = name.lower()
    
    # Special case: check for "booty" in any context (shorts) - very specific
    if 'booty' in name_lower:
        return True
    
    # Check for revealing patterns by supercategory - very restrictive
    if supercategory:
        if supercategory == 'length':
            # Only micro length (very short) - removed mini as too broad
            if 'micro (length)' in name_lower:
                return True
        elif supercategory == 'neckline type':
            # Only plunging neckline - most revealing
            if 'plunging (neckline)' in name_lower:
                return True
        elif supercategory == 'nickname':
            # Only the most clearly SUGGESTIVE styles
            for pattern in revealing_patterns['nickname']:
                if pattern.lower() in name_lower:
                    return True
    
    # Check categories for inherently revealing items
    for keyword in revealing_keywords['categories']:
        if keyword.lower() in name_lower:
            return True
    
    # Check attributes for inherently revealing items
    for keyword in revealing_keywords['attributes']:
        if keyword.lower() in name_lower:
            return True
    
    return False

# Filter categories
SUGGESTIVE_categories = categories_df[
    categories_df['name'].apply(lambda x: is_potentially_SUGGESTIVE(x))
].copy()

# Filter attributes
SUGGESTIVE_attributes = attributes_df[
    attributes_df.apply(lambda row: is_potentially_SUGGESTIVE(row['name'], row['supercategory']), axis=1)
].copy()

print(f"Found {len(SUGGESTIVE_categories)} potentially SUGGESTIVE categories")
print(f"Found {len(SUGGESTIVE_attributes)} potentially SUGGESTIVE attributes")
print("\n" + "="*60)
print("POTENTIALLY SUGGESTIVE CATEGORIES:")
print("="*60)
print(SUGGESTIVE_categories[['id', 'name', 'supercategory']].to_string(index=False))
print("\n" + "="*60)
print("POTENTIALLY SUGGESTIVE ATTRIBUTES:")
print("="*60)
print(SUGGESTIVE_attributes[['id', 'name', 'supercategory']].to_string(index=False))


Found 0 potentially SUGGESTIVE categories
Found 4 potentially SUGGESTIVE attributes

POTENTIALLY SUGGESTIVE CATEGORIES:
Empty DataFrame
Columns: [id, name, supercategory]
Index: []

POTENTIALLY SUGGESTIVE ATTRIBUTES:
 id                name supercategory
 51      booty (shorts)      nickname
106     bodycon (dress)      nickname
148      micro (length)        length
192 plunging (neckline) neckline type


In [58]:
# Create a detailed breakdown by supercategory
print("="*60)
print("BREAKDOWN BY SUPERCATEGORY:")
print("="*60)

if len(SUGGESTIVE_attributes) > 0:
    print("\nAttributes by supercategory:")
    print(SUGGESTIVE_attributes.groupby('supercategory').size().sort_values(ascending=False))
    
    print("\nDetailed attribute breakdown:")
    for supercat in SUGGESTIVE_attributes['supercategory'].unique():
        print(f"\n{supercat}:")
        subset = SUGGESTIVE_attributes[SUGGESTIVE_attributes['supercategory'] == supercat]
        for _, row in subset.iterrows():
            print(f"  - {row['name']} (id: {row['id']})")

# Create summary DataFrames for export
SUGGESTIVE_summary = {
    'type': ['category'] * len(SUGGESTIVE_categories) + ['attribute'] * len(SUGGESTIVE_attributes),
    'id': list(SUGGESTIVE_categories['id']) + list(SUGGESTIVE_attributes['id']),
    'name': list(SUGGESTIVE_categories['name']) + list(SUGGESTIVE_attributes['name']),
    'supercategory': list(SUGGESTIVE_categories['supercategory']) + list(SUGGESTIVE_attributes['supercategory'])
}

SUGGESTIVE_summary_df = pd.DataFrame(SUGGESTIVE_summary)
print("\n" + "="*60)
print("SUMMARY DATAFRAME (for export):")
print("="*60)
print(SUGGESTIVE_summary_df)

# Optionally save to CSV
# SUGGESTIVE_summary_df.to_csv(DATA_PATH / "SUGGESTIVE_labels.csv", index=False)


BREAKDOWN BY SUPERCATEGORY:

Attributes by supercategory:
supercategory
nickname         2
length           1
neckline type    1
dtype: int64

Detailed attribute breakdown:

nickname:
  - booty (shorts) (id: 51)
  - bodycon (dress) (id: 106)

length:
  - micro (length) (id: 148)

neckline type:
  - plunging (neckline) (id: 192)

SUMMARY DATAFRAME (for export):
        type   id                 name  supercategory
0  attribute   51       booty (shorts)       nickname
1  attribute  106      bodycon (dress)       nickname
2  attribute  148       micro (length)         length
3  attribute  192  plunging (neckline)  neckline type


In [59]:
# Get IDs of SUGGESTIVE categories and attributes
SUGGESTIVE_category_ids = set(SUGGESTIVE_categories['id'].tolist())
SUGGESTIVE_attribute_ids = set(SUGGESTIVE_attributes['id'].tolist())

print(f"SUGGESTIVE category IDs: {SUGGESTIVE_category_ids}")
print(f"SUGGESTIVE attribute IDs: {SUGGESTIVE_attribute_ids}")
print(f"\nTotal unique SUGGESTIVE category IDs: {len(SUGGESTIVE_category_ids)}")
print(f"Total unique SUGGESTIVE attribute IDs: {len(SUGGESTIVE_attribute_ids)}")

# Function to check if AttributesIds string contains any SUGGESTIVE attribute ID
def has_SUGGESTIVE_attribute(attributes_str: str) -> bool:
    """Check if the comma-separated attributes string contains any SUGGESTIVE attribute ID."""
    if pd.isna(attributes_str) or attributes_str == '':
        return False
    # Parse comma-separated string and convert to integers
    try:
        attr_ids = [int(x.strip()) for x in str(attributes_str).split(',')]
        return bool(SUGGESTIVE_attribute_ids.intersection(set(attr_ids)))
    except (ValueError, AttributeError):
        return False

# Filter fashion_df for SUGGESTIVE images
# An image is SUGGESTIVE if:
# 1. Its ClassId matches a SUGGESTIVE category, OR
# 2. Its AttributesIds contains any SUGGESTIVE attribute ID

SUGGESTIVE_mask = (
    fashion_df['ClassId'].isin(SUGGESTIVE_category_ids) |
    fashion_df['AttributesIds'].apply(has_SUGGESTIVE_attribute)
)

SUGGESTIVE_fashion_df = fashion_df[SUGGESTIVE_mask].copy()

print("\n" + "="*60)
print("FILTERING RESULTS:")
print("="*60)
print(f"Total images in fashion_df: {len(fashion_df)}")
print(f"Images with SUGGESTIVE content: {len(SUGGESTIVE_fashion_df)}")
print(f"Percentage: {len(SUGGESTIVE_fashion_df) / len(fashion_df) * 100:.2f}%")

# Show breakdown by type of match
category_matches = fashion_df['ClassId'].isin(SUGGESTIVE_category_ids).sum()
attribute_matches = fashion_df['AttributesIds'].apply(has_SUGGESTIVE_attribute).sum()
both_matches = ((fashion_df['ClassId'].isin(SUGGESTIVE_category_ids)) & 
                (fashion_df['AttributesIds'].apply(has_SUGGESTIVE_attribute))).sum()

print(f"\nBreakdown:")
print(f"  - Matched by category only: {category_matches - both_matches}")
print(f"  - Matched by attribute only: {attribute_matches - both_matches}")
print(f"  - Matched by both: {both_matches}")

# Show sample of SUGGESTIVE images
print("\n" + "="*60)
print("SAMPLE OF SUGGESTIVE IMAGES (first 10 rows):")
print("="*60)
print(SUGGESTIVE_fashion_df[['ImageId', 'ClassId', 'AttributesIds']].head(10))


SUGGESTIVE category IDs: set()
SUGGESTIVE attribute IDs: {192, 106, 51, 148}

Total unique SUGGESTIVE category IDs: 0
Total unique SUGGESTIVE attribute IDs: 4

FILTERING RESULTS:
Total images in fashion_df: 333401
Images with SUGGESTIVE content: 5218
Percentage: 1.57%

Breakdown:
  - Matched by category only: 0
  - Matched by attribute only: 5218
  - Matched by both: 0

SAMPLE OF SUGGESTIVE IMAGES (first 10 rows):
                              ImageId  ClassId  \
49   000b3a87508b0fa185fbd53ecbe2e4c6       33   
147  001a66b16b12f12dc45e2bba40e04683       10   
180  00211c06b1fe730097dde122cd4d3f8c        7   
304  003ae3da258f7ba7267af5f159dd3502       10   
369  0048f6c47de85cc4dc263912bd0ff6f5       33   
372  0048f6c47de85cc4dc263912bd0ff6f5        7   
445  005380bd939eb68085af3f804d387824       10   
456  0054564ae183ad9a1b152eef0bc11e1d       10   
465  0055347a114b215f8f469fec9e38c272       10   
526  005e9b75edcee7d655c390ea5416641d       33   

                           Attr

In [60]:
# DIAGNOSTIC: Show what's actually being matched
# This helps identify which attributes/categories are causing matches

print("="*60)
print("DIAGNOSTIC: BREAKDOWN OF MATCHES")
print("="*60)

# Create a mapping of attribute IDs to names
attr_id_to_name = dict(zip(attributes_df['id'], attributes_df['name']))
cat_id_to_name = dict(zip(categories_df['id'], categories_df['name']))

# Analyze what's matching in the SUGGESTIVE_fashion_df
print("\n1. Matches by Category (ClassId):")
category_matches = SUGGESTIVE_fashion_df[SUGGESTIVE_fashion_df['ClassId'].isin(SUGGESTIVE_category_ids)]
if len(category_matches) > 0:
    cat_counts = category_matches['ClassId'].value_counts()
    for cat_id, count in cat_counts.items():
        cat_name = cat_id_to_name.get(cat_id, f"Unknown (id: {cat_id})")
        print(f"   - {cat_name} (id: {cat_id}): {count} matches")
else:
    print("   No category matches")

print("\n2. Matches by Attribute (AttributesIds):")
# Find which attributes are matching
matching_attributes = {}
for idx, row in SUGGESTIVE_fashion_df.iterrows():
    if pd.notna(row['AttributesIds']) and row['AttributesIds'] != '':
        try:
            attr_ids = [int(x.strip()) for x in str(row['AttributesIds']).split(',')]
            matching_attr_ids = SUGGESTIVE_attribute_ids.intersection(set(attr_ids))
            for attr_id in matching_attr_ids:
                matching_attributes[attr_id] = matching_attributes.get(attr_id, 0) + 1
        except:
            pass

if matching_attributes:
    for attr_id, count in sorted(matching_attributes.items(), key=lambda x: x[1], reverse=True):
        attr_name = attr_id_to_name.get(attr_id, f"Unknown (id: {attr_id})")
        print(f"   - {attr_name} (id: {attr_id}): {count} matches")
else:
    print("   No attribute matches")

print("\n3. Sample rows with their matched attributes/categories:")
print("   (First 5 rows showing ImageId, ClassId, and matched attributes)")
for idx, row in SUGGESTIVE_fashion_df.head(5).iterrows():
    print(f"\n   ImageId: {row['ImageId']}")
    print(f"   ClassId: {row['ClassId']} -> {cat_id_to_name.get(row['ClassId'], 'Unknown')}")
    if pd.notna(row['AttributesIds']) and row['AttributesIds'] != '':
        try:
            attr_ids = [int(x.strip()) for x in str(row['AttributesIds']).split(',')]
            matching_attr_ids = SUGGESTIVE_attribute_ids.intersection(set(attr_ids))
            if matching_attr_ids:
                print(f"   Matched Attributes: {[attr_id_to_name.get(aid, f'id:{aid}') for aid in matching_attr_ids]}")
        except:
            pass


DIAGNOSTIC: BREAKDOWN OF MATCHES

1. Matches by Category (ClassId):
   No category matches

2. Matches by Attribute (AttributesIds):
   - micro (length) (id: 148): 3190 matches
   - bodycon (dress) (id: 106): 1144 matches
   - plunging (neckline) (id: 192): 966 matches
   - booty (shorts) (id: 51): 20 matches

3. Sample rows with their matched attributes/categories:
   (First 5 rows showing ImageId, ClassId, and matched attributes)

   ImageId: 000b3a87508b0fa185fbd53ecbe2e4c6
   ClassId: 33 -> neckline
   Matched Attributes: ['plunging (neckline)']

   ImageId: 001a66b16b12f12dc45e2bba40e04683
   ClassId: 10 -> dress
   Matched Attributes: ['bodycon (dress)']

   ImageId: 00211c06b1fe730097dde122cd4d3f8c
   ClassId: 7 -> shorts
   Matched Attributes: ['micro (length)']

   ImageId: 003ae3da258f7ba7267af5f159dd3502
   ClassId: 10 -> dress
   Matched Attributes: ['bodycon (dress)']

   ImageId: 0048f6c47de85cc4dc263912bd0ff6f5
   ClassId: 33 -> neckline
   Matched Attributes: ['plunging

In [61]:
# Get unique image IDs (since same image can have multiple annotations)
unique_SUGGESTIVE_image_ids = SUGGESTIVE_fashion_df['ImageId'].unique()
unique_total_image_ids = fashion_df['ImageId'].unique()

print("="*60)
print("UNIQUE IMAGE ANALYSIS:")
print("="*60)
print(f"Total unique images in dataset: {len(unique_total_image_ids)}")
print(f"Unique images with SUGGESTIVE content: {len(unique_SUGGESTIVE_image_ids)}")
print(f"Percentage of unique images: {len(unique_SUGGESTIVE_image_ids) / len(unique_total_image_ids) * 100:.2f}%")

# Count how many annotations per SUGGESTIVE image
annotations_per_image = SUGGESTIVE_fashion_df.groupby('ImageId').size().sort_values(ascending=False)
print(f"\nAverage annotations per SUGGESTIVE image: {annotations_per_image.mean():.2f}")
print(f"Max annotations for a single image: {annotations_per_image.max()}")
print(f"Min annotations for a single image: {annotations_per_image.min()}")

# Show distribution of SUGGESTIVE categories in the filtered data
print("\n" + "="*60)
print("DISTRIBUTION OF SUGGESTIVE CATEGORIES IN FILTERED DATA:")
print("="*60)
category_counts = SUGGESTIVE_fashion_df['ClassId'].value_counts()
print(category_counts)

# Map category IDs to names for better readability
category_id_to_name = dict(zip(categories_df['id'], categories_df['name']))
print("\nTop SUGGESTIVE categories by count:")
for cat_id, count in category_counts.head(10).items():
    cat_name = category_id_to_name.get(cat_id, f"Unknown (id: {cat_id})")
    print(f"  - {cat_name} (id: {cat_id}): {count} annotations")

# Save the filtered DataFrame
# SUGGESTIVE_fashion_df.to_csv(DATA_PATH / "SUGGESTIVE_train.csv", index=False)
# pd.Series(unique_SUGGESTIVE_image_ids).to_csv(DATA_PATH / "SUGGESTIVE_image_ids.csv", index=False, header=['ImageId'])


UNIQUE IMAGE ANALYSIS:
Total unique images in dataset: 45623
Unique images with SUGGESTIVE content: 5079
Percentage of unique images: 11.13%

Average annotations per SUGGESTIVE image: 1.03
Max annotations for a single image: 4
Min annotations for a single image: 1

DISTRIBUTION OF SUGGESTIVE CATEGORIES IN FILTERED DATA:
ClassId
10    1502
33     965
7      857
4      684
1      322
0      317
9      215
8      123
3       83
2       79
11      51
5       11
12       8
37       1
Name: count, dtype: int64

Top SUGGESTIVE categories by count:
  - dress (id: 10): 1502 annotations
  - neckline (id: 33): 965 annotations
  - shorts (id: 7): 857 annotations
  - jacket (id: 4): 684 annotations
  - top, t-shirt, sweatshirt (id: 1): 322 annotations
  - shirt, blouse (id: 0): 317 annotations
  - coat (id: 9): 215 annotations
  - skirt (id: 8): 123 annotations
  - cardigan (id: 3): 83 annotations
  - sweater (id: 2): 79 annotations


In [62]:
# Create new_dataset folder and copy all SUGGESTIVE images from train and test
import shutil

# Get unique image IDs from SUGGESTIVE_fashion_df
unique_SUGGESTIVE_image_ids = set(SUGGESTIVE_fashion_df['ImageId'].unique())

print("="*60)
print("CREATING NEW DATASET")
print("="*60)
print(f"Total unique SUGGESTIVE image IDs: {len(unique_SUGGESTIVE_image_ids)}")

# Create new_dataset folder
NEW_DATASET_PATH = DATA_PATH / "new_dataset"
NEW_DATASET_PATH.mkdir(exist_ok=True)
print(f"\nCreated folder: {NEW_DATASET_PATH}")

# Paths to source folders
TRAIN_IMAGE_PATH = DATA_PATH / "train"
TEST_IMAGE_PATH = DATA_PATH / "test"

# Copy images from train and test folders
copied_count = 0
not_found_count = 0
not_found_ids = []

print(f"\nCopying images from {TRAIN_IMAGE_PATH} and {TEST_IMAGE_PATH}...")

for image_id in unique_SUGGESTIVE_image_ids:
    image_filename = f"{image_id}.jpg"
    source_path = None
    
    # Try train folder first
    train_path = TRAIN_IMAGE_PATH / image_filename
    if train_path.exists():
        source_path = train_path
    else:
        # Try test folder
        test_path = TEST_IMAGE_PATH / image_filename
        if test_path.exists():
            source_path = test_path
    
    if source_path:
        dest_path = NEW_DATASET_PATH / image_filename
        shutil.copy2(source_path, dest_path)
        copied_count += 1
    else:
        not_found_count += 1
        not_found_ids.append(image_id)

print(f"\n✓ Successfully copied: {copied_count} images")
if not_found_count > 0:
    print(f"⚠ Not found: {not_found_count} images")
    print(f"   First 10 missing IDs: {not_found_ids[:10]}")

# Save the SUGGESTIVE_fashion_df to CSV
csv_path = DATA_PATH / "SUGGESTIVE_fashion.csv"
SUGGESTIVE_fashion_df.to_csv(csv_path, index=False)
print(f"\n✓ Saved DataFrame to: {csv_path}")
print(f"   Total rows: {len(SUGGESTIVE_fashion_df)}")

print("\n" + "="*60)
print("SUMMARY:")
print("="*60)
print(f"  - Images folder: {NEW_DATASET_PATH}")
print(f"  - Images copied: {copied_count}")
print(f"  - CSV file: {csv_path}")
print(f"  - CSV rows: {len(SUGGESTIVE_fashion_df)}")
print(f"  - Unique images: {len(unique_SUGGESTIVE_image_ids)}")


CREATING NEW DATASET
Total unique SUGGESTIVE image IDs: 5079

Created folder: /Users/youniss/Documents/GitHub/haram-police/data/new_dataset

Copying images from /Users/youniss/Documents/GitHub/haram-police/data/train and /Users/youniss/Documents/GitHub/haram-police/data/test...

✓ Successfully copied: 5079 images

✓ Saved DataFrame to: /Users/youniss/Documents/GitHub/haram-police/data/SUGGESTIVE_fashion.csv
   Total rows: 5218

SUMMARY:
  - Images folder: /Users/youniss/Documents/GitHub/haram-police/data/new_dataset
  - Images copied: 5079
  - CSV file: /Users/youniss/Documents/GitHub/haram-police/data/SUGGESTIVE_fashion.csv
  - CSV rows: 5218
  - Unique images: 5079


In [63]:
SUGGESTIVE_fashion_df.head(20)

Unnamed: 0,ImageId,EncodedPixels,Height,Width,ClassId,AttributesIds
49,000b3a87508b0fa185fbd53ecbe2e4c6,457283 2 458562 6 459841 9 461120 13 462400 15...,1280,852,33,192
147,001a66b16b12f12dc45e2bba40e04683,64049 3 64548 10 65048 17 65548 23 65754 36 66...,500,375,10,106115127142149229295316
180,00211c06b1fe730097dde122cd4d3f8c,296470 1 297469 3 298468 5 299467 8 300466 10 ...,1000,665,7,50115136142148230295300317
304,003ae3da258f7ba7267af5f159dd3502,129565 3 130583 9 131602 14 132621 19 133641 2...,1024,683,10,106127141150295316317
369,0048f6c47de85cc4dc263912bd0ff6f5,4777361 1 4781320 3 4785279 5 4789239 7 479319...,3960,2640,33,192
372,0048f6c47de85cc4dc263912bd0ff6f5,3982550 2 3986509 8 3990469 13 3994429 18 3998...,3960,2640,7,50115136142148317
445,005380bd939eb68085af3f804d387824,2317673 15 2320644 45 2323624 67 2326613 79 23...,3000,2001,10,106114127142150229295311317
456,0054564ae183ad9a1b152eef0bc11e1d,195071 2 196093 5 197115 8 198134 13 199151 20...,1024,683,10,106115127142149229295316317
465,0055347a114b215f8f469fec9e38c272,236337 20 237832 26 239327 33 240823 38 242320...,1500,1000,10,106115127142149229295316317
526,005e9b75edcee7d655c390ea5416641d,480863 2 481943 3 483023 4 484102 6 485182 7 4...,1080,1080,33,192


In [64]:
# Show the "EncodedPixels" feature of the first SUGGESTIVE_fashion_df 
from PIL import Image

# Get the first image ID from the SUGGESTIVE_fashion_df
first_image_id = SUGGESTIVE_fashion_df.iloc[0]['ImageId']

# Load the image
image = Image.open((DATA_PATH / "train" / f"{first_image_id}.jpg"))
image.show()