Isa0
/

cat-detection

+import os
+import glob
+import cv2
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import torchvision.models as models
+# 1. Dataset Definition
+class CatLandmarkDataset(Dataset):
+    def __init__(self, root_dirs, img_size=224):
+        self.img_size = img_size
+        self.image_paths = []
+        self.label_paths = []
+        for folder in root_dirs:
+            if not os.path.exists(folder):
+                continue
+            jpg_pattern = os.path.join(folder, "*.jpg")
+            for img_path in glob.glob(jpg_pattern):
+                cat_path = img_path + ".cat"
+                if os.path.exists(cat_path):
+                    self.image_paths.append(img_path)
+                    self.label_paths.append(cat_path)
+        print(f"[DATA] Total matching cat images: {len(self.image_paths)}")
+    def __len__(self):
+        return len(self.image_paths)
+    def __getitem__(self, idx):
+        # Read image and convert to RGB
+        img = cv2.imread(self.image_paths[idx])
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        orig_h, orig_w, _ = img.shape
+        # Read coordinates from .cat file
+        with open(self.label_paths[idx], 'r') as f:
+            data = f.read().split()
+            landmarks = np.array([float(x) for x in data[1:]], dtype=np.float32)
+            landmarks = landmarks.reshape(-1, 2)
+        # Resize image to 224x224
+        img_resized = cv2.resize(img, (self.img_size, self.img_size))
+        # Scale coordinates to new size and normalize between 0-1
+        landmarks[:, 0] = (landmarks[:, 0] * (self.img_size / orig_w)) / self.img_size
+        landmarks[:, 1] = (landmarks[:, 1] * (self.img_size / orig_h)) / self.img_size
+        # Convert to PyTorch format (C, H, W)
+        img_tensor = torch.tensor(img_resized, dtype=torch.float32).permute(2, 0, 1) / 255.0
+        landmarks_tensor = torch.tensor(landmarks.flatten(), dtype=torch.float32)
+        return img_tensor, landmarks_tensor
+# 2. Model Architecture (MobileNetV3 Small)
+def get_model():
+    # Lightest and optimized architecture for low-end devices
+    # Load pre-trained weights with MobileNet_V3_Small_Weights.DEFAULT
+    model = models.mobilenet_v3_small(weights=models.MobileNet_V3_Small_Weights.DEFAULT)
+    # Modify the final classification layer of the model.
+    # We will predict 18 coordinate values (9 points x 2) instead of classification (Regression).
+    in_features = model.classifier[3].in_features
+    model.classifier[3] = nn.Linear(in_features, 18)
+    return model
+# 3. Training Function
+def train_model(model, train_loader, val_loader, epochs=10, lr=0.001, device="cpu"):
+    model = model.to(device)
+    criterion = nn.MSELoss() # Mean Squared Error is used for coordinate predictions
+    optimizer = optim.Adam(model.parameters(), lr=lr)
+    print(f"\n[TRAINING] Starting... Device: {device}")
+    for epoch in range(epochs):
+        model.train()
+        train_loss = 0.0
+        for images, landmarks in train_loader:
+            images = images.to(device)
+            landmarks = landmarks.to(device)
+            optimizer.zero_grad()
+            outputs = model(images)
+            loss = criterion(outputs, landmarks)
+            loss.backward()
+            optimizer.step()
+            train_loss += loss.item() * images.size(0)
+        train_loss /= len(train_loader.dataset)
+        # Validation Phase
+        model.eval()
+        val_loss = 0.0
+        with torch.no_grad():
+            for images, landmarks in val_loader:
+                images = images.to(device)
+                landmarks = landmarks.to(device)
+                outputs = model(images)
+                loss = criterion(outputs, landmarks)
+                val_loss += loss.item() * images.size(0)
+        val_loss /= len(val_loader.dataset)
+        print(f"Epoch [{epoch+1}/{epochs}] -> Train Loss: {train_loss:.6f} | Val Loss: {val_loss:.6f}")
+    return model
+# 4. Export to ONNX Format
+def export_to_onnx(model, save_path="cat_landmark_model.onnx"):
+    model.eval()
+    # Dummy input to pass through the model (Batch_size=1, Channel=3, H=224, W=224)
+    dummy_input = torch.randn(1, 3, 224, 224).to(next(model.parameters()).device)
+    print(f"\n[ONNX] Converting model to ONNX format...")
+    torch.onnx.export(
+        model,
+        dummy_input,
+        save_path,
+        export_params=True,
+        opset_version=11,
+        do_constant_folding=True,
+        input_names=['input'],
+        output_names=['output']
+    )
+    print(f"[ONNX] Successfully saved: {save_path}")
+# Main Execution
+if __name__ == "__main__":
+    # Folder paths (You can update this according to your file structure)
+    data_dirs = ['/content/CAT_00', '/content/CAT_01', '/content/CAT_02',
+                 '/content/CAT_03', '/content/CAT_04', '/content/CAT_05', '/content/CAT_06']
+    # Device Selection (GPU if CUDA is available, otherwise CPU)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # 1. Load Data
+    full_dataset = CatLandmarkDataset(root_dirs=data_dirs, img_size=224)
+    if len(full_dataset) == 0:
+        print("[ERROR] No data found in the specified folders! Please check file paths.")
+    else:
+        # Split data into 90% Training - 10% Validation
+        train_size = int(0.9 * len(full_dataset))
+        val_size = len(full_dataset) - train_size
+        train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])
+        train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
+        val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
+        # 2. Get Model
+        cat_model = get_model()
+        # 3. Train Model (Set to 5 epochs for quick Colab execution, increase if desired)
+        trained_model = train_model(cat_model, train_loader, val_loader, epochs=5, lr=0.001, device=device)
+        # 4. Save PyTorch model (As backup)
+        torch.save(trained_model.state_dict(), "cat_landmark_model.pth")
+        print("\n[SAVE] PyTorch weights saved (cat_landmark_model.pth)")
+        # 5. Convert to ONNX format for running on low-end devices
+        export_to_onnx(trained_model, save_path="cat_landmark_model.onnx")