Spaces:

DariusGiannoli
/

PerceptionBenchmark

Sleeping

App Files Files Community

DariusGiannoli commited on Feb 15

Commit

f2e38f1

1 Parent(s): 3bec0b6

MobileNet, resnet, mid yolo, missing pipeline but recognition halfway

Browse files

Files changed (20) hide show

README.md +6 -0
models/mobilenet_v3_head.pkl +0 -0
models/resnet18_head.pkl +0 -0
notebooks/README.md +3 -0
notebooks/dev_01_test_yolo.ipynb +0 -10
notebooks/dev_02_test_mobilenet.ipynb +0 -0
notebooks/dev_03_test_resnet.ipynb +0 -0
notebooks/recognition/dev_01_test_yolo.ipynb +0 -0
notebooks/recognition/dev_02_test_mobilenet.ipynb +0 -0
notebooks/recognition/dev_03_test_resnet.ipynb +0 -0
notebooks/{dev_04_test_rce.ipynb → recognition/dev_04_test_rce.ipynb} +0 -0
src/config.py +16 -20
src/detectors/mobilenet.py +103 -0
src/detectors/resnet.py +122 -0
src/utils/data_loader.py +0 -0
src/utils/visualization.py +0 -0
training/train_mobilenet.py +52 -0
training/train_resenet.py +0 -0
training/train_resnet.py +82 -0
training/train_yolo.py +29 -0

README.md CHANGED Viewed

	@@ -1 +1,7 @@
1	Benchmarking comparison

 Benchmarking comparison
+- The Classical Baseline: ORB (Oriented FAST and Rotated BRIEF)
+- The Efficient Rival: MobileNetV3-Small
+- The Heavyweight Standard: ResNet-18
+- The Modern Challenger: MobileViT (XXS)
+- End-to-End Detector: YOLOv8n

models/mobilenet_v3_head.pkl ADDED Viewed

Binary file (5.54 kB). View file

models/resnet18_head.pkl ADDED Viewed

Binary file (5.02 kB). View file

notebooks/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ - Recognition only focuses on finding the BB of the object in the image loading a trained / fine_tuned model on the object
2	+
3	+ - Pipeline finetunes/trains the model and runs the recognition also

notebooks/dev_01_test_yolo.ipynb DELETED Viewed

@@ -1,10 +0,0 @@
-{
- "cells": [],
- "metadata": {
-  "language_info": {
-   "name": "python"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}

notebooks/dev_02_test_mobilenet.ipynb DELETED Viewed

File without changes

notebooks/dev_03_test_resnet.ipynb DELETED Viewed

File without changes

notebooks/recognition/dev_01_test_yolo.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/recognition/dev_02_test_mobilenet.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/recognition/dev_03_test_resnet.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

notebooks/{dev_04_test_rce.ipynb → recognition/dev_04_test_rce.ipynb} RENAMED Viewed

File without changes

src/config.py CHANGED Viewed

@@ -1,28 +1,24 @@
-import os
 from pathlib import Path
-# 1. Get the absolute path of the project root
-# This works no matter where you run the script from
-FILE_PATH = Path(__file__).resolve()
-PROJECT_ROOT = FILE_PATH.parent.parent  # Go up two levels (src -> root)
-# 2. Define Key Directories
 DATA_DIR = PROJECT_ROOT / "data"
-MODELS_DIR = PROJECT_ROOT / "models"
-RESULTS_DIR = PROJECT_ROOT / "results"
-SRC_DIR = PROJECT_ROOT / "src"
-# 3. Define Model Paths (Single Source of Truth)
 MODEL_PATHS = {
-    "yolo": MODELS_DIR / "yolov8n.pt",
-    "mobilenet": MODELS_DIR / "mobilenet_v3.pth",
-    "resnet": MODELS_DIR / "resnet18.pth",
-    # The compiled C++ executable path
-    "rce_cpp_exe": SRC_DIR / "cpp_engine" / "build" / "rce_engine"
 }
-# 4. Create directories if they don't exist
-os.makedirs(DATA_DIR, exist_ok=True)
-os.makedirs(MODELS_DIR, exist_ok=True)
-os.makedirs(RESULTS_DIR / "logs", exist_ok=True)
-os.makedirs(RESULTS_DIR / "plots", exist_ok=True)

+# src/config.py
 from pathlib import Path
+# Get project root
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+# Data Paths
 DATA_DIR = PROJECT_ROOT / "data"
+ARTROOM_DIR = DATA_DIR / "artroom"
+BIRD_YOLO_DIR = ARTROOM_DIR / "bird" / "yolo"
+BIRD_YAML = BIRD_YOLO_DIR / "bird_data.yaml"
+# Model Paths
+MODEL_DIR = PROJECT_ROOT / "models"
 MODEL_PATHS = {
+    # 'yolo': MODEL_DIR / "yolov8n.pt",
+    'yolo' : PROJECT_ROOT / "volov8n.pt",
+    'resnet': MODEL_DIR / "resnet18.pth",
+    'resnet_head': MODEL_DIR / "resnet18_head.pkl",
+    'mobilenet': MODEL_DIR / "mobilenet_v3.pth"
 }
+# Training Results
+TRAINING_DIR = PROJECT_ROOT / "training"

src/detectors/mobilenet.py CHANGED Viewed

	@@ -0,0 +1,103 @@

+import torch
+import torchvision.models as models
+import torchvision.transforms as transforms
+import cv2
+import numpy as np
+import joblib
+import time
+from pathlib import Path
+from src.config import MODEL_PATHS
+class MobileNetDetector:
+    """
+    Professional Wrapper for MobileNetV3-Small.
+    Target: Ultra-low latency (<3ms) feature extraction for robotics.
+    """
+    def __init__(self, device=None):
+        self.device = device or ("mps" if torch.backends.mps.is_available() else "cpu")
+        print(f"📱 Initializing MobileNetV3 on {self.device}...")
+        # 1. Initialize Architecture (Small version = Speed)
+        self.backbone = models.mobilenet_v3_small(weights=None)
+        # 2. Load Local Weights (The Backbone)
+        model_path = MODEL_PATHS.get('mobilenet')
+        if model_path and Path(model_path).exists():
+            print(f"📂 Loading backbone from {model_path}")
+            state_dict = torch.load(model_path, map_location=self.device)
+            try:
+                self.backbone.load_state_dict(state_dict)
+            except:
+                # 'strict=False' is standard when loading backbones for transfer learning
+                self.backbone.load_state_dict(state_dict, strict=False)
+        else:
+            print(f"⚠️ Warning: Local weights not found at {model_path}")
+        # 3. Cut off the Classifier
+        # We replace the final classifier block with Identity to get raw features
+        self.backbone.classifier = torch.nn.Identity()
+        self.backbone.eval()
+        self.backbone.to(self.device)
+        # 4. Preprocessing (Standard ImageNet stats)
+        self.preprocess = transforms.Compose([
+            transforms.ToPILImage(),
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+        # 5. Load the Head (The Brain we train)
+        # We auto-generate the head path based on the model path
+        self.head_path = str(model_path).replace('.pth', '_head.pkl')
+        self.head = None
+        self.load_head()
+    def load_head(self):
+        if Path(self.head_path).exists():
+            self.head = joblib.load(self.head_path)
+            print(f"✅ Loaded trained head from {self.head_path}")
+        else:
+            print(f"⚠️ Head not found. Model is in FEATURE ONLY mode.")
+    def _get_features(self, img):
+        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        input_tensor = self.preprocess(img_rgb).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            features = self.backbone(input_tensor)
+        return features.cpu().numpy().flatten()
+    def train_head(self, images, labels):
+        from sklearn.linear_model import LogisticRegression
+        if not images:
+            raise ValueError("No images provided.")
+        print(f"⏳ Extracting features for {len(images)} images...")
+        X_data = [self._get_features(img) for img in images]
+        print("🎓 Fitting Logistic Regression...")
+        self.head = LogisticRegression(max_iter=1000)
+        self.head.fit(X_data, labels)
+        joblib.dump(self.head, self.head_path)
+        print(f"💾 Model saved to {self.head_path}")
+    def predict(self, image):
+        if self.head is None:
+            return "Untrained", 0.0, 0.0
+        t0 = time.perf_counter()
+        features = self._get_features(image)
+        probs = self.head.predict_proba([features])[0]
+        winner_idx = np.argmax(probs)
+        label = self.head.classes_[winner_idx]
+        conf = probs[winner_idx]
+        t1 = time.perf_counter()
+        return label, conf, (t1 - t0) * 1000

src/detectors/resnet.py CHANGED Viewed

	@@ -0,0 +1,122 @@

+import torch
+import torchvision.models as models
+import torchvision.transforms as transforms
+import cv2
+import numpy as np
+import joblib
+import time
+from pathlib import Path
+from src.config import MODEL_PATHS
+class ResNetDetector:
+    """
+    Wrapper for ResNet-18 Feature Extractor.
+    Architecture: Local Frozen ResNet-18 Backbone + Logistic Regression Head.
+    """
+    def __init__(self, device=None):
+        self.device = device or ("mps" if torch.backends.mps.is_available() else "cpu")
+        # 1. Initialize the Architecture (Empty)
+        self.backbone = models.resnet18(weights=None)
+        # 2. Load YOUR Local Weights
+        resnet_path = MODEL_PATHS['resnet'] # Defined in config.py
+        if Path(resnet_path).exists():
+            print(f"Loading local weights from {resnet_path}")
+            state_dict = torch.load(resnet_path, map_location=self.device)
+            try:
+                self.backbone.load_state_dict(state_dict)
+            except RuntimeError as e:
+                print("loading failed:", e)
+                self.backbone.load_state_dict(state_dict, strict=False)
+        else:
+            print("Error Loading")
+        # 3. Prepare for Feature Extraction
+        self.backbone.eval() # Freeze layers
+        self.backbone.to(self.device)
+        # Remove the final classification layer
+        self.feature_extractor = torch.nn.Sequential(*list(self.backbone.children())[:-1])
+        # 4. Define Preprocessing (Standard ImageNet stats)
+        self.preprocess = transforms.Compose([
+            transforms.ToPILImage(),
+            transforms.Resize((224, 224)),
+            transforms.ToTensor(),
+            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
+        ])
+        # 5. Load the Head (trained brain)
+        self.head_path = MODEL_PATHS.get('resnet_head')
+        self.head = None
+        self.load_head()
+    def load_head(self):
+        """Loads the trained Logistic Regression head if it exists."""
+        if self.head_path and Path(self.head_path).exists():
+            self.head = joblib.load(self.head_path)
+            print(f"Loaded trained head from {self.head_path}")
+        else:
+            print(f"No trained head found at {self.head_path}")
+    def _get_features(self, img):
+        """Internal method to turn an image into a math vector."""
+        # Convert BGR (OpenCV) to RGB
+        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
+        # Preprocess to tensor
+        input_tensor = self.preprocess(img_rgb).unsqueeze(0).to(self.device)
+        # Extract features
+        with torch.no_grad():
+            features = self.feature_extractor(input_tensor)
+        # Flatten [1, 512, 1, 1] -> [512] for Scikit-Learn
+        return features.cpu().numpy().flatten()
+    def train_head(self, images, labels):
+        """
+        Trains the lightweight decision layer on top of your local ResNet.
+        """
+        from sklearn.linear_model import LogisticRegression
+        if not images:
+            raise ValueError("No images provided for training.")
+        print(f"⏳ Extracting features for {len(images)} images...")
+        X_data = [self._get_features(img) for img in images]
+        print("🎓 Fitting Logistic Regression...")
+        self.head = LogisticRegression(max_iter=1000, C=1.0)
+        self.head.fit(X_data, labels)
+        # Save immediately
+        if self.head_path:
+            joblib.dump(self.head, self.head_path)
+            print(f"💾 Model saved to {self.head_path}")
+    def predict(self, image):
+        """
+        Standard Interface: Returns (Label, Confidence, Time_ms)
+        """
+        if self.head is None:
+            return "Untrained", 0.0, 0.0
+        t0 = time.perf_counter()
+        # 1. Get Vector
+        features = self._get_features(image)
+        # 2. Get Probabilities
+        probs = self.head.predict_proba([features])[0]
+        winner_idx = np.argmax(probs)
+        label = self.head.classes_[winner_idx]
+        conf = probs[winner_idx]
+        t1 = time.perf_counter()
+        inference_ms = (t1 - t0) * 1000
+        return label, conf, inference_ms

src/utils/data_loader.py DELETED Viewed

File without changes

src/utils/visualization.py DELETED Viewed

File without changes

training/train_mobilenet.py CHANGED Viewed

	@@ -0,0 +1,52 @@

+import sys
+import os
+import cv2
+from pathlib import Path
+# Path Setup
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+sys.path.append(str(PROJECT_ROOT))
+from src.detectors.mobilenet import MobileNetDetector
+from src.config import PROJECT_ROOT
+def main():
+    print("🚀 Starting MobileNetV3 Training Pipeline...")
+    # 1. Load Data
+    images, labels = [], []
+    train_dir = PROJECT_ROOT / "data/artroom/bird/yolo/train/images"
+    print(f"📂 Scanning {train_dir}...")
+    for img_file in train_dir.glob("*.png"):
+        img = cv2.imread(str(img_file))
+        if img is None: continue
+        fname = img_file.name.lower()
+        if "bird" in fname:
+            images.append(img)
+            labels.append("bird")
+        elif any(x in fname for x in ["room", "wall", "floor", "empty"]):
+            images.append(img)
+            labels.append("background")
+    print(f"📊 Data Summary:")
+    print(f"   - Birds: {labels.count('bird')}")
+    print(f"   - Backgrounds: {labels.count('background')}")
+    if not images:
+        print("❌ No images found!")
+        return
+    # 2. Initialize & Train
+    detector = MobileNetDetector()
+    detector.train_head(images, labels)
+    # 3. Sanity Check
+    print("\n🔎 Sanity Check (Image 0):")
+    lbl, conf, ms = detector.predict(images[0])
+    print(f"   Result: {lbl} | Conf: {conf:.2%} | Time: {ms:.2f}ms")
+    print(f"✅ Training Complete. Head saved to {detector.head_path}")
+if __name__ == "__main__":
+    main()

training/train_resenet.py DELETED Viewed

File without changes

training/train_resnet.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import sys
+import os
+import cv2
+from pathlib import Path
+# Add project root to path so we can import 'src'
+# We use .parent because this script is inside 'training/'
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+sys.path.append(str(PROJECT_ROOT))
+from src.detectors.resnet import ResNetDetector
+from src.config import PROJECT_ROOT
+def load_data():
+    """
+    Scans the data folders and prepares clean lists for training.
+    """
+    images = []
+    labels = []
+    # 1. Load BIRDS (Positive)
+    # Using the path from your tree structure
+    train_dir = PROJECT_ROOT / "data/artroom/bird/yolo/train/images"
+    print(f"📂 Scanning {train_dir}...")
+    # We loop through all PNGs and decide based on filename
+    for img_file in train_dir.glob("*.png"):
+        img = cv2.imread(str(img_file))
+        if img is None:
+            continue
+        filename = img_file.name.lower()
+        # LOGIC:
+        # If it contains "bird", it's a bird.
+        # If it contains "room", "wall", "floor", it's background.
+        if "bird" in filename:
+            images.append(img)
+            labels.append("bird")
+            # print(f"  + Added Bird: {filename}")
+        elif any(x in filename for x in ["room", "wall", "floor", "empty"]):
+            images.append(img)
+            labels.append("background")
+            # print(f"  - Added Background: {filename}")
+    return images, labels
+def main():
+    # 1. Prepare Data
+    print("🚀 Starting ResNet Training Pipeline...")
+    images, labels = load_data()
+    # Statistics
+    n_bird = labels.count('bird')
+    n_bg = labels.count('background')
+    print(f"\n📊 Data Summary:")
+    print(f"   - Total Images: {len(images)}")
+    print(f"   - Birds (Positive): {n_bird}")
+    print(f"   - Backgrounds (Negative): {n_bg}")
+    if len(images) == 0:
+        print("❌ Error: No images found. Check your 'data/artroom/bird/yolo/train/images' folder.")
+        return
+    # 2. Initialize Model
+    detector = ResNetDetector()
+    # 3. Train & Save
+    detector.train_head(images, labels)
+    # 4. Verification Test (Sanity Check)
+    print("\n🔎 Running Sanity Check on Image 0...")
+    lbl, conf, ms = detector.predict(images[0])
+    print(f"   Result: {lbl} | Confidence: {conf:.2%} | Time: {ms:.2f}ms")
+    print("\n✅ Training Complete. You can now use dev_03_test_resnet.ipynb")
+if __name__ == "__main__":
+    main()

training/train_yolo.py CHANGED Viewed

	@@ -0,0 +1,29 @@

+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from ultralytics import YOLO
+import os
+from src.config import MODEL_PATHS, TRAINING_DIR, BIRD_YAML
+def run_fine_tuning():
+    #Load model
+    model = YOLO('yolov8n.pt')
+    # model = YOLO(MODEL_PATHS['yolo'])
+    # Train the model
+    results = model.train(
+        data = BIRD_YAML,
+        epochs = 50,
+        imgsz = 640,
+        batch = 4,
+        name = 'bird_artroom_finetune',
+        project = str(TRAINING_DIR / "runs"),
+        exist_ok = False
+    )
+    print("Training completed successfully!")
+if __name__ == "__main__":
+    run_fine_tuning()