Spaces:

namchain
/

STAR

Sleeping

App Files Files Community

ramagururadhakrishnan commited on Mar 16, 2025

Commit

af59080

verified ·

1 Parent(s): 1eaff06

Added Source Folder

Browse files

Files changed (4) hide show

src/dataset.py +78 -0
src/inference.py +121 -0
src/model.py +56 -0
src/train.py +91 -0

src/dataset.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import os
+import json
+import torch
+from torch.utils.data import Dataset, DataLoader
+from PIL import Image
+import torchvision.transforms as transforms
+class NudeMultiLabelDataset(Dataset):
+    def __init__(self, data_dir, label_file, transform=None):
+        self.data_dir = data_dir
+        self.transform = transform
+        self.label_file = label_file
+        # Load labels
+        with open(label_file, "r") as f:
+            self.labels = json.load(f)
+        self.image_paths = list(self.labels.keys())
+        self.classes = sorted(set(tag for tags in self.labels.values() for tag in tags))
+        self.class_to_idx = {tag: idx for idx, tag in enumerate(self.classes)}
+        # Print dataset info
+        print(f"📂 Dataset loaded from: {data_dir}")
+        print(f"📄 Labels loaded from: {label_file}")
+        print(f"🖼️ Total images: {len(self.image_paths)}")
+        print(f"🏷️ Unique labels: {len(self.classes)}")
+        print(f"🔹 Label-to-Index Mapping: {self.class_to_idx}")
+        # Print example data
+        if self.image_paths:
+            example_img, example_label = self.__getitem__(0)
+            print(f"✅ Example Image Shape: {example_img.shape}")
+            print(f"✅ Example Label: {example_label}")
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        img_name = self.image_paths[idx]
+        img_path = os.path.join(self.data_dir, img_name)
+        image = Image.open(img_path).convert("RGB")
+        # Convert labels to multi-hot encoding
+        labels = self.labels[img_name]
+        label_tensor = torch.zeros(len(self.classes))
+        for tag in labels:
+            if tag in self.class_to_idx:
+                label_tensor[self.class_to_idx[tag]] = 1  # Multi-label
+        if self.transform:
+            image = self.transform(image)
+        return image, label_tensor
+# 🔹 Main function to test the dataset independently
+if __name__ == "__main__":
+    # Set paths
+    DATA_DIR = "../data/images"   # Change to actual path
+    LABEL_FILE = "../data/labels.json"   # Change to actual path
+    # Define transformations
+    transform = transforms.Compose([
+        transforms.Resize((224, 224)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+    ])
+    # Load dataset
+    dataset = NudeMultiLabelDataset(DATA_DIR, LABEL_FILE, transform=transform)
+    # Create DataLoader for testing
+    dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
+    # Fetch one batch and print information
+    for images, labels in dataloader:
+        print(f"🖼️ Batch Image Shape: {images.shape}")  # Should be [batch_size, 3, 224, 224]
+        print(f"🏷️ Batch Labels: {labels}")
+        break  # Stop after one batch

src/inference.py ADDED Viewed

	@@ -0,0 +1,121 @@

+"""
+Explainable AI (XAI) Inference for Nude Multi-Label Classification
+==================================================================
+This script performs inference using a trained Swin Transformer model for
+multi-label classification of nude images. It also integrates Class Activation
+Mapping (CAM) to provide visual explanations for the model's predictions.
+Author: Ramaguru Radhakrishnan
+Date: March 2025
+"""
+import torch
+import torchvision.transforms as transforms
+from PIL import Image
+import json
+from model import SwinTransformerMultiLabel
+from torchcam.methods import SmoothGradCAMpp  # Explainability module
+import matplotlib.pyplot as plt
+import numpy as np
+# Define the number of output classes (should match the trained model)
+NUM_CLASSES = 18
+# Load the trained model with a correct classifier head
+model = SwinTransformerMultiLabel(num_classes=NUM_CLASSES)
+# Load model weights while ignoring mismatched layers
+checkpoint_path = "../models/multi_nude_detector.pth"
+checkpoint = torch.load(checkpoint_path, map_location="cpu")
+model_dict = model.state_dict()
+# Filter out layers that do not match
+filtered_checkpoint = {
+    k: v for k, v in checkpoint.items() if k in model_dict and v.shape == model_dict[k].shape
+}
+model_dict.update(filtered_checkpoint)
+model.load_state_dict(model_dict, strict=False)
+# Set the model to evaluation mode
+model.eval()
+# Define image preprocessing transformations
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),  # Resize to model's input size
+    transforms.ToTensor(),  # Convert to tensor
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize
+])
+# Load class labels from JSON file
+with open("../data/labels.json", "r") as f:
+    classes = sorted(set(tag for tags in json.load(f).values() for tag in tags))
+# Validate that the number of classes matches
+if len(classes) != NUM_CLASSES:
+    raise ValueError(f"❌ Mismatch: Model expects {NUM_CLASSES} classes, but labels.json has {len(classes)} labels!")
+# Load the test image
+img_path = "C:\\Users\\RamaguruRadhakrishna\\Videos\\STAR-main\\STAR-main\\data\\images\\442_.jpeg"
+image = Image.open(img_path).convert("RGB")  # Ensure RGB format
+input_tensor = transform(image).unsqueeze(0)  # Add batch dimension
+# Perform inference
+with torch.no_grad():
+    output = model(input_tensor)  # Forward pass through model
+    print(f"🔹 Model Output Shape: {output.shape}")  # Debugging
+    # Get predicted labels (threshold = 0.5)
+    predicted_labels = [
+        classes[i] for i in range(min(len(classes), output.shape[1])) if output[0][i] > 0.5
+    ]
+    predicted_indices = [i for i in range(output.shape[1]) if output[0][i] > 0.5]  # Store indices
+# Display predicted labels
+print("✅ Predicted Tags:", predicted_labels)
+# ===============================
+# Explainable AI: CAM Visualization
+# ===============================
+# Print model structure to find the correct target layer
+print(model)
+# Print model architecture to identify available layers
+print("🔍 Model Architecture:\n")
+for name, module in model.named_modules():
+    print(name)  # Uncomment to see available layers
+# Choose a valid convolutional layer from printed names
+# Example: 'features.7.3' (Update this with an actual layer from print output)
+valid_target_layer = "features.7.3"  # Modify based on your model structure
+# Verify if the layer exists in the model
+if valid_target_layer not in dict(model.named_modules()):
+    raise ValueError(f"❌ Layer '{valid_target_layer}' not found in model. Choose from:\n{list(dict(model.named_modules()).keys())}")
+# Initialize SmoothGradCAMpp with a valid layer
+cam_extractor = SmoothGradCAMpp(model, target_layer=valid_target_layer)
+print("✅ SmoothGradCAMpp initialized successfully!")
+# Ensure model has processed the input before extracting CAM
+output = model(input_tensor)
+# Generate CAM heatmaps for each predicted label
+for class_idx in predicted_indices:
+    cam = cam_extractor(class_idx, output)
+    cam = cam.squeeze().cpu().numpy()
+    cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam))  # Normalize
+    # Resize CAM to match input image dimensions
+    cam_resized = np.array(Image.fromarray(cam * 255).resize(image.size))
+    # Overlay CAM on the original image
+    plt.figure(figsize=(6, 6))
+    plt.imshow(image)
+    plt.imshow(cam_resized, cmap='jet', alpha=0.5)  # Heatmap overlay
+    plt.axis("off")
+    plt.title(f"Explainability Heatmap for '{classes[class_idx]}'")
+    plt.show()

src/model.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+import torch.nn as nn
+from torchvision.models import swin_t
+class SwinTransformerMultiLabel(nn.Module):
+    def __init__(self, num_classes):
+        super(SwinTransformerMultiLabel, self).__init__()
+        self.model = swin_t(weights="IMAGENET1K_V1")
+        # Adjust final classification layer
+        in_features = self.model.head.in_features  # Should be 768
+        self.model.head = nn.Linear(in_features, num_classes)
+    def forward(self, x):
+        x = self.model.features(x)  # Extract features
+        print(f"🔹 Feature map shape before flattening: {x.shape}")  # Debugging output
+        # ✅ Correctly apply GAP over height & width
+        x = x.mean(dim=[1, 2])  # Now shape is (batch_size, 768)
+        print(f"🔹 Feature shape after GAP: {x.shape}")
+        x = self.model.head(x)  # Classification layer
+        return x
+def main():
+    # Define number of classes
+    num_classes = 2
+    # Create the model
+    model = SwinTransformerMultiLabel(num_classes)
+    # Set the model to evaluation mode
+    model.eval()
+    # Generate a dummy input tensor (batch_size=5, channels=3, height=224, width=224)
+    dummy_input = torch.randn(5, 3, 224, 224)
+    # Forward pass through the model
+    output = model(dummy_input)
+    # Print output shape
+    print(f"✅ Model output shape: {output.shape}")  # Expected: (5, 2)
+    # Check model parameters (classification head)
+    print(f"✅ Model classification head: {model.model.head}")
+    # Check with different batch sizes
+    for batch_size in [1, 8, 16]:
+        dummy_input = torch.randn(batch_size, 3, 224, 224)
+        output = model(dummy_input)
+        print(f"✅ Batch Size {batch_size} -> Output Shape: {output.shape}")
+if __name__ == "__main__":
+    main()

src/train.py ADDED Viewed

	@@ -0,0 +1,91 @@

+"""
+Transformer-based Nude Classification Model Training Script
+Author: Ramaguru Radhakrishnan
+Description:
+This script trains a multi-label classification model based on the Swin Transformer architecture
+to classify images into various adult content categories. The dataset and label information
+are provided as inputs, and the trained model is saved for later inference.
+Usage:
+    python train.py --data <path_to_dataset> --labels <path_to_labels.json> --save <path_to_save_model>
+"""
+import torch
+import torchvision.transforms as transforms
+from torch.utils.data import DataLoader
+from dataset import NudeMultiLabelDataset
+from model import SwinTransformerMultiLabel
+import argparse
+import os
+import time
+# Argument parser for command-line input
+parser = argparse.ArgumentParser(description="Train a Transformer-based nude classification model")
+parser.add_argument("--data", type=str, required=True, help="Path to dataset directory")
+parser.add_argument("--labels", type=str, required=True, help="Path to labels.json file")
+parser.add_argument("--save", type=str, required=True, help="Directory to save trained model")
+args = parser.parse_args()
+# Define image preprocessing transformations
+transform = transforms.Compose([
+    transforms.Resize((224, 224)),  # Resize images to match model input size
+    transforms.ToTensor(),  # Convert images to PyTorch tensors
+    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize image pixel values
+])
+# Load dataset using the custom dataset class
+dataloader = DataLoader(dataset, batch_size=32, shuffle=True)  # Create a data loader for batching
+dataset = NudeMultiLabelDataset(args.data, args.labels, transform=transform)
+# Initialize the model and move it to the appropriate device (GPU if available, else CPU)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model = SwinTransformerMultiLabel(num_classes=len(dataset.classes)).to(device)
+# Define loss function and optimizer
+criterion = torch.nn.BCEWithLogitsLoss()  # Binary Cross Entropy Loss for multi-label classification
+optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)  # Adam optimizer with a learning rate of 0.0001
+# Start measuring total training time
+start_time = time.time()
+# Training loop for multiple epochs
+epochs = 50
+for epoch in range(epochs):
+    epoch_loss = 0.0
+    epoch_start = time.time()  # Track time taken for each epoch
+    for imgs, labels in dataloader:
+        imgs, labels = imgs.to(device), labels.to(device)  # Move data to the same device as the model
+        optimizer.zero_grad()  # Reset gradients before backpropagation
+        outputs = model(imgs)  # Forward pass: Get model predictions
+        # Debugging: Print tensor shapes to check dimensions
+        print(f"🔹 Outputs shape: {outputs.shape}")  # Expected: [batch_size, num_classes]
+        print(f"🔹 Labels shape: {labels.shape}")  # Expected: [batch_size, num_classes]
+        # Ensure output dimensions match expected shape
+        if outputs.dim() > 2:
+            outputs = outputs.view(outputs.size(0), -1)  # Flatten spatial dimensions if present
+        # Compute loss and update model parameters
+        loss = criterion(outputs, labels)
+        loss.backward()  # Compute gradients
+        optimizer.step()  # Update model weights
+        epoch_loss += loss.item()  # Accumulate loss for this epoch
+    epoch_end = time.time()  # Record epoch end time
+    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss / len(dataloader)}, Time: {epoch_end - epoch_start:.2f} sec")
+# End measuring total training time
+end_time = time.time()
+total_time = end_time - start_time
+# Save trained model to the specified directory
+os.makedirs(args.save, exist_ok=True)
+torch.save(model.state_dict(), os.path.join(args.save, "star.pth"))
+print(f"✅ Model saved at {args.save}/star.pth")
+print(f"⏳ Total Training Time: {total_time:.2f} seconds ({total_time/60:.2f} minutes)")