DariusGiannoli commited on
Commit
f2e38f1
Β·
1 Parent(s): 3bec0b6

MobileNet, resnet, mid yolo, missing pipeline but recognition halfway

Browse files
README.md CHANGED
@@ -1 +1,7 @@
1
  Benchmarking comparison
 
 
 
 
 
 
 
1
  Benchmarking comparison
2
+
3
+ - The Classical Baseline: ORB (Oriented FAST and Rotated BRIEF)
4
+ - The Efficient Rival: MobileNetV3-Small
5
+ - The Heavyweight Standard: ResNet-18
6
+ - The Modern Challenger: MobileViT (XXS)
7
+ - End-to-End Detector: YOLOv8n
models/mobilenet_v3_head.pkl ADDED
Binary file (5.54 kB). View file
 
models/resnet18_head.pkl ADDED
Binary file (5.02 kB). View file
 
notebooks/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ - Recognition only focuses on finding the BB of the object in the image loading a trained / fine_tuned model on the object
2
+
3
+ - Pipeline finetunes/trains the model and runs the recognition also
notebooks/dev_01_test_yolo.ipynb DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "cells": [],
3
- "metadata": {
4
- "language_info": {
5
- "name": "python"
6
- }
7
- },
8
- "nbformat": 4,
9
- "nbformat_minor": 5
10
- }
 
 
 
 
 
 
 
 
 
 
 
notebooks/dev_02_test_mobilenet.ipynb DELETED
File without changes
notebooks/dev_03_test_resnet.ipynb DELETED
File without changes
notebooks/recognition/dev_01_test_yolo.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/recognition/dev_02_test_mobilenet.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/recognition/dev_03_test_resnet.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/{dev_04_test_rce.ipynb β†’ recognition/dev_04_test_rce.ipynb} RENAMED
File without changes
src/config.py CHANGED
@@ -1,28 +1,24 @@
1
- import os
2
  from pathlib import Path
3
 
4
- # 1. Get the absolute path of the project root
5
- # This works no matter where you run the script from
6
- FILE_PATH = Path(__file__).resolve()
7
- PROJECT_ROOT = FILE_PATH.parent.parent # Go up two levels (src -> root)
8
 
9
- # 2. Define Key Directories
10
  DATA_DIR = PROJECT_ROOT / "data"
11
- MODELS_DIR = PROJECT_ROOT / "models"
12
- RESULTS_DIR = PROJECT_ROOT / "results"
13
- SRC_DIR = PROJECT_ROOT / "src"
14
 
15
- # 3. Define Model Paths (Single Source of Truth)
 
16
  MODEL_PATHS = {
17
- "yolo": MODELS_DIR / "yolov8n.pt",
18
- "mobilenet": MODELS_DIR / "mobilenet_v3.pth",
19
- "resnet": MODELS_DIR / "resnet18.pth",
20
- # The compiled C++ executable path
21
- "rce_cpp_exe": SRC_DIR / "cpp_engine" / "build" / "rce_engine"
22
  }
23
 
24
- # 4. Create directories if they don't exist
25
- os.makedirs(DATA_DIR, exist_ok=True)
26
- os.makedirs(MODELS_DIR, exist_ok=True)
27
- os.makedirs(RESULTS_DIR / "logs", exist_ok=True)
28
- os.makedirs(RESULTS_DIR / "plots", exist_ok=True)
 
1
+ # src/config.py
2
  from pathlib import Path
3
 
4
+ # Get project root
5
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
 
 
6
 
7
+ # Data Paths
8
  DATA_DIR = PROJECT_ROOT / "data"
9
+ ARTROOM_DIR = DATA_DIR / "artroom"
10
+ BIRD_YOLO_DIR = ARTROOM_DIR / "bird" / "yolo"
11
+ BIRD_YAML = BIRD_YOLO_DIR / "bird_data.yaml"
12
 
13
+ # Model Paths
14
+ MODEL_DIR = PROJECT_ROOT / "models"
15
  MODEL_PATHS = {
16
+ # 'yolo': MODEL_DIR / "yolov8n.pt",
17
+ 'yolo' : PROJECT_ROOT / "volov8n.pt",
18
+ 'resnet': MODEL_DIR / "resnet18.pth",
19
+ 'resnet_head': MODEL_DIR / "resnet18_head.pkl",
20
+ 'mobilenet': MODEL_DIR / "mobilenet_v3.pth"
21
  }
22
 
23
+ # Training Results
24
+ TRAINING_DIR = PROJECT_ROOT / "training"
 
 
 
src/detectors/mobilenet.py CHANGED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.models as models
3
+ import torchvision.transforms as transforms
4
+ import cv2
5
+ import numpy as np
6
+ import joblib
7
+ import time
8
+ from pathlib import Path
9
+ from src.config import MODEL_PATHS
10
+
11
+ class MobileNetDetector:
12
+ """
13
+ Professional Wrapper for MobileNetV3-Small.
14
+ Target: Ultra-low latency (<3ms) feature extraction for robotics.
15
+ """
16
+ def __init__(self, device=None):
17
+ self.device = device or ("mps" if torch.backends.mps.is_available() else "cpu")
18
+ print(f"πŸ“± Initializing MobileNetV3 on {self.device}...")
19
+
20
+ # 1. Initialize Architecture (Small version = Speed)
21
+ self.backbone = models.mobilenet_v3_small(weights=None)
22
+
23
+ # 2. Load Local Weights (The Backbone)
24
+ model_path = MODEL_PATHS.get('mobilenet')
25
+ if model_path and Path(model_path).exists():
26
+ print(f"πŸ“‚ Loading backbone from {model_path}")
27
+ state_dict = torch.load(model_path, map_location=self.device)
28
+ try:
29
+ self.backbone.load_state_dict(state_dict)
30
+ except:
31
+ # 'strict=False' is standard when loading backbones for transfer learning
32
+ self.backbone.load_state_dict(state_dict, strict=False)
33
+ else:
34
+ print(f"⚠️ Warning: Local weights not found at {model_path}")
35
+
36
+ # 3. Cut off the Classifier
37
+ # We replace the final classifier block with Identity to get raw features
38
+ self.backbone.classifier = torch.nn.Identity()
39
+
40
+ self.backbone.eval()
41
+ self.backbone.to(self.device)
42
+
43
+ # 4. Preprocessing (Standard ImageNet stats)
44
+ self.preprocess = transforms.Compose([
45
+ transforms.ToPILImage(),
46
+ transforms.Resize((224, 224)),
47
+ transforms.ToTensor(),
48
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
49
+ ])
50
+
51
+ # 5. Load the Head (The Brain we train)
52
+ # We auto-generate the head path based on the model path
53
+ self.head_path = str(model_path).replace('.pth', '_head.pkl')
54
+ self.head = None
55
+ self.load_head()
56
+
57
+ def load_head(self):
58
+ if Path(self.head_path).exists():
59
+ self.head = joblib.load(self.head_path)
60
+ print(f"βœ… Loaded trained head from {self.head_path}")
61
+ else:
62
+ print(f"⚠️ Head not found. Model is in FEATURE ONLY mode.")
63
+
64
+ def _get_features(self, img):
65
+ img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
66
+ input_tensor = self.preprocess(img_rgb).unsqueeze(0).to(self.device)
67
+
68
+ with torch.no_grad():
69
+ features = self.backbone(input_tensor)
70
+
71
+ return features.cpu().numpy().flatten()
72
+
73
+ def train_head(self, images, labels):
74
+ from sklearn.linear_model import LogisticRegression
75
+
76
+ if not images:
77
+ raise ValueError("No images provided.")
78
+
79
+ print(f"⏳ Extracting features for {len(images)} images...")
80
+ X_data = [self._get_features(img) for img in images]
81
+
82
+ print("πŸŽ“ Fitting Logistic Regression...")
83
+ self.head = LogisticRegression(max_iter=1000)
84
+ self.head.fit(X_data, labels)
85
+
86
+ joblib.dump(self.head, self.head_path)
87
+ print(f"πŸ’Ύ Model saved to {self.head_path}")
88
+
89
+ def predict(self, image):
90
+ if self.head is None:
91
+ return "Untrained", 0.0, 0.0
92
+
93
+ t0 = time.perf_counter()
94
+
95
+ features = self._get_features(image)
96
+ probs = self.head.predict_proba([features])[0]
97
+ winner_idx = np.argmax(probs)
98
+
99
+ label = self.head.classes_[winner_idx]
100
+ conf = probs[winner_idx]
101
+
102
+ t1 = time.perf_counter()
103
+ return label, conf, (t1 - t0) * 1000
src/detectors/resnet.py CHANGED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torchvision.models as models
3
+ import torchvision.transforms as transforms
4
+ import cv2
5
+ import numpy as np
6
+ import joblib
7
+ import time
8
+ from pathlib import Path
9
+ from src.config import MODEL_PATHS
10
+
11
+ class ResNetDetector:
12
+ """
13
+ Wrapper for ResNet-18 Feature Extractor.
14
+ Architecture: Local Frozen ResNet-18 Backbone + Logistic Regression Head.
15
+ """
16
+ def __init__(self, device=None):
17
+ self.device = device or ("mps" if torch.backends.mps.is_available() else "cpu")
18
+
19
+ # 1. Initialize the Architecture (Empty)
20
+ self.backbone = models.resnet18(weights=None)
21
+
22
+ # 2. Load YOUR Local Weights
23
+ resnet_path = MODEL_PATHS['resnet'] # Defined in config.py
24
+ if Path(resnet_path).exists():
25
+ print(f"Loading local weights from {resnet_path}")
26
+ state_dict = torch.load(resnet_path, map_location=self.device)
27
+
28
+ try:
29
+ self.backbone.load_state_dict(state_dict)
30
+ except RuntimeError as e:
31
+ print("loading failed:", e)
32
+ self.backbone.load_state_dict(state_dict, strict=False)
33
+ else:
34
+ print("Error Loading")
35
+
36
+ # 3. Prepare for Feature Extraction
37
+ self.backbone.eval() # Freeze layers
38
+ self.backbone.to(self.device)
39
+
40
+ # Remove the final classification layer
41
+ self.feature_extractor = torch.nn.Sequential(*list(self.backbone.children())[:-1])
42
+
43
+ # 4. Define Preprocessing (Standard ImageNet stats)
44
+ self.preprocess = transforms.Compose([
45
+ transforms.ToPILImage(),
46
+ transforms.Resize((224, 224)),
47
+ transforms.ToTensor(),
48
+ transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
49
+ ])
50
+
51
+ # 5. Load the Head (trained brain)
52
+ self.head_path = MODEL_PATHS.get('resnet_head')
53
+ self.head = None
54
+ self.load_head()
55
+
56
+ def load_head(self):
57
+ """Loads the trained Logistic Regression head if it exists."""
58
+ if self.head_path and Path(self.head_path).exists():
59
+ self.head = joblib.load(self.head_path)
60
+ print(f"Loaded trained head from {self.head_path}")
61
+ else:
62
+ print(f"No trained head found at {self.head_path}")
63
+
64
+ def _get_features(self, img):
65
+ """Internal method to turn an image into a math vector."""
66
+ # Convert BGR (OpenCV) to RGB
67
+ img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
68
+
69
+ # Preprocess to tensor
70
+ input_tensor = self.preprocess(img_rgb).unsqueeze(0).to(self.device)
71
+
72
+ # Extract features
73
+ with torch.no_grad():
74
+ features = self.feature_extractor(input_tensor)
75
+
76
+ # Flatten [1, 512, 1, 1] -> [512] for Scikit-Learn
77
+ return features.cpu().numpy().flatten()
78
+
79
+ def train_head(self, images, labels):
80
+ """
81
+ Trains the lightweight decision layer on top of your local ResNet.
82
+ """
83
+ from sklearn.linear_model import LogisticRegression
84
+
85
+ if not images:
86
+ raise ValueError("No images provided for training.")
87
+
88
+ print(f"⏳ Extracting features for {len(images)} images...")
89
+ X_data = [self._get_features(img) for img in images]
90
+
91
+ print("πŸŽ“ Fitting Logistic Regression...")
92
+ self.head = LogisticRegression(max_iter=1000, C=1.0)
93
+ self.head.fit(X_data, labels)
94
+
95
+ # Save immediately
96
+ if self.head_path:
97
+ joblib.dump(self.head, self.head_path)
98
+ print(f"πŸ’Ύ Model saved to {self.head_path}")
99
+
100
+ def predict(self, image):
101
+ """
102
+ Standard Interface: Returns (Label, Confidence, Time_ms)
103
+ """
104
+ if self.head is None:
105
+ return "Untrained", 0.0, 0.0
106
+
107
+ t0 = time.perf_counter()
108
+
109
+ # 1. Get Vector
110
+ features = self._get_features(image)
111
+
112
+ # 2. Get Probabilities
113
+ probs = self.head.predict_proba([features])[0]
114
+ winner_idx = np.argmax(probs)
115
+
116
+ label = self.head.classes_[winner_idx]
117
+ conf = probs[winner_idx]
118
+
119
+ t1 = time.perf_counter()
120
+ inference_ms = (t1 - t0) * 1000
121
+
122
+ return label, conf, inference_ms
src/utils/data_loader.py DELETED
File without changes
src/utils/visualization.py DELETED
File without changes
training/train_mobilenet.py CHANGED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import cv2
4
+ from pathlib import Path
5
+
6
+ # Path Setup
7
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
8
+ sys.path.append(str(PROJECT_ROOT))
9
+
10
+ from src.detectors.mobilenet import MobileNetDetector
11
+ from src.config import PROJECT_ROOT
12
+
13
+ def main():
14
+ print("πŸš€ Starting MobileNetV3 Training Pipeline...")
15
+
16
+ # 1. Load Data
17
+ images, labels = [], []
18
+ train_dir = PROJECT_ROOT / "data/artroom/bird/yolo/train/images"
19
+
20
+ print(f"πŸ“‚ Scanning {train_dir}...")
21
+ for img_file in train_dir.glob("*.png"):
22
+ img = cv2.imread(str(img_file))
23
+ if img is None: continue
24
+
25
+ fname = img_file.name.lower()
26
+ if "bird" in fname:
27
+ images.append(img)
28
+ labels.append("bird")
29
+ elif any(x in fname for x in ["room", "wall", "floor", "empty"]):
30
+ images.append(img)
31
+ labels.append("background")
32
+
33
+ print(f"πŸ“Š Data Summary:")
34
+ print(f" - Birds: {labels.count('bird')}")
35
+ print(f" - Backgrounds: {labels.count('background')}")
36
+
37
+ if not images:
38
+ print("❌ No images found!")
39
+ return
40
+
41
+ # 2. Initialize & Train
42
+ detector = MobileNetDetector()
43
+ detector.train_head(images, labels)
44
+
45
+ # 3. Sanity Check
46
+ print("\nπŸ”Ž Sanity Check (Image 0):")
47
+ lbl, conf, ms = detector.predict(images[0])
48
+ print(f" Result: {lbl} | Conf: {conf:.2%} | Time: {ms:.2f}ms")
49
+ print(f"βœ… Training Complete. Head saved to {detector.head_path}")
50
+
51
+ if __name__ == "__main__":
52
+ main()
training/train_resenet.py DELETED
File without changes
training/train_resnet.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import cv2
4
+ from pathlib import Path
5
+
6
+ # Add project root to path so we can import 'src'
7
+ # We use .parent because this script is inside 'training/'
8
+ PROJECT_ROOT = Path(__file__).resolve().parent.parent
9
+ sys.path.append(str(PROJECT_ROOT))
10
+
11
+ from src.detectors.resnet import ResNetDetector
12
+ from src.config import PROJECT_ROOT
13
+
14
+ def load_data():
15
+ """
16
+ Scans the data folders and prepares clean lists for training.
17
+ """
18
+ images = []
19
+ labels = []
20
+
21
+ # 1. Load BIRDS (Positive)
22
+ # Using the path from your tree structure
23
+ train_dir = PROJECT_ROOT / "data/artroom/bird/yolo/train/images"
24
+
25
+ print(f"πŸ“‚ Scanning {train_dir}...")
26
+
27
+ # We loop through all PNGs and decide based on filename
28
+ for img_file in train_dir.glob("*.png"):
29
+ img = cv2.imread(str(img_file))
30
+ if img is None:
31
+ continue
32
+
33
+ filename = img_file.name.lower()
34
+
35
+ # LOGIC:
36
+ # If it contains "bird", it's a bird.
37
+ # If it contains "room", "wall", "floor", it's background.
38
+
39
+ if "bird" in filename:
40
+ images.append(img)
41
+ labels.append("bird")
42
+ # print(f" + Added Bird: {filename}")
43
+
44
+ elif any(x in filename for x in ["room", "wall", "floor", "empty"]):
45
+ images.append(img)
46
+ labels.append("background")
47
+ # print(f" - Added Background: {filename}")
48
+
49
+ return images, labels
50
+
51
+ def main():
52
+ # 1. Prepare Data
53
+ print("πŸš€ Starting ResNet Training Pipeline...")
54
+ images, labels = load_data()
55
+
56
+ # Statistics
57
+ n_bird = labels.count('bird')
58
+ n_bg = labels.count('background')
59
+
60
+ print(f"\nπŸ“Š Data Summary:")
61
+ print(f" - Total Images: {len(images)}")
62
+ print(f" - Birds (Positive): {n_bird}")
63
+ print(f" - Backgrounds (Negative): {n_bg}")
64
+
65
+ if len(images) == 0:
66
+ print("❌ Error: No images found. Check your 'data/artroom/bird/yolo/train/images' folder.")
67
+ return
68
+
69
+ # 2. Initialize Model
70
+ detector = ResNetDetector()
71
+
72
+ # 3. Train & Save
73
+ detector.train_head(images, labels)
74
+
75
+ # 4. Verification Test (Sanity Check)
76
+ print("\nπŸ”Ž Running Sanity Check on Image 0...")
77
+ lbl, conf, ms = detector.predict(images[0])
78
+ print(f" Result: {lbl} | Confidence: {conf:.2%} | Time: {ms:.2f}ms")
79
+ print("\nβœ… Training Complete. You can now use dev_03_test_resnet.ipynb")
80
+
81
+ if __name__ == "__main__":
82
+ main()
training/train_yolo.py CHANGED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
4
+
5
+ from ultralytics import YOLO
6
+ import os
7
+ from src.config import MODEL_PATHS, TRAINING_DIR, BIRD_YAML
8
+
9
+ def run_fine_tuning():
10
+
11
+ #Load model
12
+ model = YOLO('yolov8n.pt')
13
+ # model = YOLO(MODEL_PATHS['yolo'])
14
+
15
+ # Train the model
16
+ results = model.train(
17
+ data = BIRD_YAML,
18
+ epochs = 50,
19
+ imgsz = 640,
20
+ batch = 4,
21
+ name = 'bird_artroom_finetune',
22
+ project = str(TRAINING_DIR / "runs"),
23
+ exist_ok = False
24
+ )
25
+
26
+ print("Training completed successfully!")
27
+
28
+ if __name__ == "__main__":
29
+ run_fine_tuning()