Spaces:
Sleeping
Sleeping
DariusGiannoli commited on
Commit Β·
f2e38f1
1
Parent(s): 3bec0b6
MobileNet, resnet, mid yolo, missing pipeline but recognition halfway
Browse files- README.md +6 -0
- models/mobilenet_v3_head.pkl +0 -0
- models/resnet18_head.pkl +0 -0
- notebooks/README.md +3 -0
- notebooks/dev_01_test_yolo.ipynb +0 -10
- notebooks/dev_02_test_mobilenet.ipynb +0 -0
- notebooks/dev_03_test_resnet.ipynb +0 -0
- notebooks/recognition/dev_01_test_yolo.ipynb +0 -0
- notebooks/recognition/dev_02_test_mobilenet.ipynb +0 -0
- notebooks/recognition/dev_03_test_resnet.ipynb +0 -0
- notebooks/{dev_04_test_rce.ipynb β recognition/dev_04_test_rce.ipynb} +0 -0
- src/config.py +16 -20
- src/detectors/mobilenet.py +103 -0
- src/detectors/resnet.py +122 -0
- src/utils/data_loader.py +0 -0
- src/utils/visualization.py +0 -0
- training/train_mobilenet.py +52 -0
- training/train_resenet.py +0 -0
- training/train_resnet.py +82 -0
- training/train_yolo.py +29 -0
README.md
CHANGED
|
@@ -1 +1,7 @@
|
|
| 1 |
Benchmarking comparison
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
Benchmarking comparison
|
| 2 |
+
|
| 3 |
+
- The Classical Baseline: ORB (Oriented FAST and Rotated BRIEF)
|
| 4 |
+
- The Efficient Rival: MobileNetV3-Small
|
| 5 |
+
- The Heavyweight Standard: ResNet-18
|
| 6 |
+
- The Modern Challenger: MobileViT (XXS)
|
| 7 |
+
- End-to-End Detector: YOLOv8n
|
models/mobilenet_v3_head.pkl
ADDED
|
Binary file (5.54 kB). View file
|
|
|
models/resnet18_head.pkl
ADDED
|
Binary file (5.02 kB). View file
|
|
|
notebooks/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
- Recognition only focuses on finding the BB of the object in the image loading a trained / fine_tuned model on the object
|
| 2 |
+
|
| 3 |
+
- Pipeline finetunes/trains the model and runs the recognition also
|
notebooks/dev_01_test_yolo.ipynb
DELETED
|
@@ -1,10 +0,0 @@
|
|
| 1 |
-
{
|
| 2 |
-
"cells": [],
|
| 3 |
-
"metadata": {
|
| 4 |
-
"language_info": {
|
| 5 |
-
"name": "python"
|
| 6 |
-
}
|
| 7 |
-
},
|
| 8 |
-
"nbformat": 4,
|
| 9 |
-
"nbformat_minor": 5
|
| 10 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
notebooks/dev_02_test_mobilenet.ipynb
DELETED
|
File without changes
|
notebooks/dev_03_test_resnet.ipynb
DELETED
|
File without changes
|
notebooks/recognition/dev_01_test_yolo.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/recognition/dev_02_test_mobilenet.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/recognition/dev_03_test_resnet.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
notebooks/{dev_04_test_rce.ipynb β recognition/dev_04_test_rce.ipynb}
RENAMED
|
File without changes
|
src/config.py
CHANGED
|
@@ -1,28 +1,24 @@
|
|
| 1 |
-
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
-
#
|
| 5 |
-
|
| 6 |
-
FILE_PATH = Path(__file__).resolve()
|
| 7 |
-
PROJECT_ROOT = FILE_PATH.parent.parent # Go up two levels (src -> root)
|
| 8 |
|
| 9 |
-
#
|
| 10 |
DATA_DIR = PROJECT_ROOT / "data"
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
|
| 15 |
-
#
|
|
|
|
| 16 |
MODEL_PATHS = {
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
}
|
| 23 |
|
| 24 |
-
#
|
| 25 |
-
|
| 26 |
-
os.makedirs(MODELS_DIR, exist_ok=True)
|
| 27 |
-
os.makedirs(RESULTS_DIR / "logs", exist_ok=True)
|
| 28 |
-
os.makedirs(RESULTS_DIR / "plots", exist_ok=True)
|
|
|
|
| 1 |
+
# src/config.py
|
| 2 |
from pathlib import Path
|
| 3 |
|
| 4 |
+
# Get project root
|
| 5 |
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# Data Paths
|
| 8 |
DATA_DIR = PROJECT_ROOT / "data"
|
| 9 |
+
ARTROOM_DIR = DATA_DIR / "artroom"
|
| 10 |
+
BIRD_YOLO_DIR = ARTROOM_DIR / "bird" / "yolo"
|
| 11 |
+
BIRD_YAML = BIRD_YOLO_DIR / "bird_data.yaml"
|
| 12 |
|
| 13 |
+
# Model Paths
|
| 14 |
+
MODEL_DIR = PROJECT_ROOT / "models"
|
| 15 |
MODEL_PATHS = {
|
| 16 |
+
# 'yolo': MODEL_DIR / "yolov8n.pt",
|
| 17 |
+
'yolo' : PROJECT_ROOT / "volov8n.pt",
|
| 18 |
+
'resnet': MODEL_DIR / "resnet18.pth",
|
| 19 |
+
'resnet_head': MODEL_DIR / "resnet18_head.pkl",
|
| 20 |
+
'mobilenet': MODEL_DIR / "mobilenet_v3.pth"
|
| 21 |
}
|
| 22 |
|
| 23 |
+
# Training Results
|
| 24 |
+
TRAINING_DIR = PROJECT_ROOT / "training"
|
|
|
|
|
|
|
|
|
src/detectors/mobilenet.py
CHANGED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torchvision.models as models
|
| 3 |
+
import torchvision.transforms as transforms
|
| 4 |
+
import cv2
|
| 5 |
+
import numpy as np
|
| 6 |
+
import joblib
|
| 7 |
+
import time
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from src.config import MODEL_PATHS
|
| 10 |
+
|
| 11 |
+
class MobileNetDetector:
|
| 12 |
+
"""
|
| 13 |
+
Professional Wrapper for MobileNetV3-Small.
|
| 14 |
+
Target: Ultra-low latency (<3ms) feature extraction for robotics.
|
| 15 |
+
"""
|
| 16 |
+
def __init__(self, device=None):
|
| 17 |
+
self.device = device or ("mps" if torch.backends.mps.is_available() else "cpu")
|
| 18 |
+
print(f"π± Initializing MobileNetV3 on {self.device}...")
|
| 19 |
+
|
| 20 |
+
# 1. Initialize Architecture (Small version = Speed)
|
| 21 |
+
self.backbone = models.mobilenet_v3_small(weights=None)
|
| 22 |
+
|
| 23 |
+
# 2. Load Local Weights (The Backbone)
|
| 24 |
+
model_path = MODEL_PATHS.get('mobilenet')
|
| 25 |
+
if model_path and Path(model_path).exists():
|
| 26 |
+
print(f"π Loading backbone from {model_path}")
|
| 27 |
+
state_dict = torch.load(model_path, map_location=self.device)
|
| 28 |
+
try:
|
| 29 |
+
self.backbone.load_state_dict(state_dict)
|
| 30 |
+
except:
|
| 31 |
+
# 'strict=False' is standard when loading backbones for transfer learning
|
| 32 |
+
self.backbone.load_state_dict(state_dict, strict=False)
|
| 33 |
+
else:
|
| 34 |
+
print(f"β οΈ Warning: Local weights not found at {model_path}")
|
| 35 |
+
|
| 36 |
+
# 3. Cut off the Classifier
|
| 37 |
+
# We replace the final classifier block with Identity to get raw features
|
| 38 |
+
self.backbone.classifier = torch.nn.Identity()
|
| 39 |
+
|
| 40 |
+
self.backbone.eval()
|
| 41 |
+
self.backbone.to(self.device)
|
| 42 |
+
|
| 43 |
+
# 4. Preprocessing (Standard ImageNet stats)
|
| 44 |
+
self.preprocess = transforms.Compose([
|
| 45 |
+
transforms.ToPILImage(),
|
| 46 |
+
transforms.Resize((224, 224)),
|
| 47 |
+
transforms.ToTensor(),
|
| 48 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 49 |
+
])
|
| 50 |
+
|
| 51 |
+
# 5. Load the Head (The Brain we train)
|
| 52 |
+
# We auto-generate the head path based on the model path
|
| 53 |
+
self.head_path = str(model_path).replace('.pth', '_head.pkl')
|
| 54 |
+
self.head = None
|
| 55 |
+
self.load_head()
|
| 56 |
+
|
| 57 |
+
def load_head(self):
|
| 58 |
+
if Path(self.head_path).exists():
|
| 59 |
+
self.head = joblib.load(self.head_path)
|
| 60 |
+
print(f"β
Loaded trained head from {self.head_path}")
|
| 61 |
+
else:
|
| 62 |
+
print(f"β οΈ Head not found. Model is in FEATURE ONLY mode.")
|
| 63 |
+
|
| 64 |
+
def _get_features(self, img):
|
| 65 |
+
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 66 |
+
input_tensor = self.preprocess(img_rgb).unsqueeze(0).to(self.device)
|
| 67 |
+
|
| 68 |
+
with torch.no_grad():
|
| 69 |
+
features = self.backbone(input_tensor)
|
| 70 |
+
|
| 71 |
+
return features.cpu().numpy().flatten()
|
| 72 |
+
|
| 73 |
+
def train_head(self, images, labels):
|
| 74 |
+
from sklearn.linear_model import LogisticRegression
|
| 75 |
+
|
| 76 |
+
if not images:
|
| 77 |
+
raise ValueError("No images provided.")
|
| 78 |
+
|
| 79 |
+
print(f"β³ Extracting features for {len(images)} images...")
|
| 80 |
+
X_data = [self._get_features(img) for img in images]
|
| 81 |
+
|
| 82 |
+
print("π Fitting Logistic Regression...")
|
| 83 |
+
self.head = LogisticRegression(max_iter=1000)
|
| 84 |
+
self.head.fit(X_data, labels)
|
| 85 |
+
|
| 86 |
+
joblib.dump(self.head, self.head_path)
|
| 87 |
+
print(f"πΎ Model saved to {self.head_path}")
|
| 88 |
+
|
| 89 |
+
def predict(self, image):
|
| 90 |
+
if self.head is None:
|
| 91 |
+
return "Untrained", 0.0, 0.0
|
| 92 |
+
|
| 93 |
+
t0 = time.perf_counter()
|
| 94 |
+
|
| 95 |
+
features = self._get_features(image)
|
| 96 |
+
probs = self.head.predict_proba([features])[0]
|
| 97 |
+
winner_idx = np.argmax(probs)
|
| 98 |
+
|
| 99 |
+
label = self.head.classes_[winner_idx]
|
| 100 |
+
conf = probs[winner_idx]
|
| 101 |
+
|
| 102 |
+
t1 = time.perf_counter()
|
| 103 |
+
return label, conf, (t1 - t0) * 1000
|
src/detectors/resnet.py
CHANGED
|
@@ -0,0 +1,122 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torchvision.models as models
|
| 3 |
+
import torchvision.transforms as transforms
|
| 4 |
+
import cv2
|
| 5 |
+
import numpy as np
|
| 6 |
+
import joblib
|
| 7 |
+
import time
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from src.config import MODEL_PATHS
|
| 10 |
+
|
| 11 |
+
class ResNetDetector:
|
| 12 |
+
"""
|
| 13 |
+
Wrapper for ResNet-18 Feature Extractor.
|
| 14 |
+
Architecture: Local Frozen ResNet-18 Backbone + Logistic Regression Head.
|
| 15 |
+
"""
|
| 16 |
+
def __init__(self, device=None):
|
| 17 |
+
self.device = device or ("mps" if torch.backends.mps.is_available() else "cpu")
|
| 18 |
+
|
| 19 |
+
# 1. Initialize the Architecture (Empty)
|
| 20 |
+
self.backbone = models.resnet18(weights=None)
|
| 21 |
+
|
| 22 |
+
# 2. Load YOUR Local Weights
|
| 23 |
+
resnet_path = MODEL_PATHS['resnet'] # Defined in config.py
|
| 24 |
+
if Path(resnet_path).exists():
|
| 25 |
+
print(f"Loading local weights from {resnet_path}")
|
| 26 |
+
state_dict = torch.load(resnet_path, map_location=self.device)
|
| 27 |
+
|
| 28 |
+
try:
|
| 29 |
+
self.backbone.load_state_dict(state_dict)
|
| 30 |
+
except RuntimeError as e:
|
| 31 |
+
print("loading failed:", e)
|
| 32 |
+
self.backbone.load_state_dict(state_dict, strict=False)
|
| 33 |
+
else:
|
| 34 |
+
print("Error Loading")
|
| 35 |
+
|
| 36 |
+
# 3. Prepare for Feature Extraction
|
| 37 |
+
self.backbone.eval() # Freeze layers
|
| 38 |
+
self.backbone.to(self.device)
|
| 39 |
+
|
| 40 |
+
# Remove the final classification layer
|
| 41 |
+
self.feature_extractor = torch.nn.Sequential(*list(self.backbone.children())[:-1])
|
| 42 |
+
|
| 43 |
+
# 4. Define Preprocessing (Standard ImageNet stats)
|
| 44 |
+
self.preprocess = transforms.Compose([
|
| 45 |
+
transforms.ToPILImage(),
|
| 46 |
+
transforms.Resize((224, 224)),
|
| 47 |
+
transforms.ToTensor(),
|
| 48 |
+
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
| 49 |
+
])
|
| 50 |
+
|
| 51 |
+
# 5. Load the Head (trained brain)
|
| 52 |
+
self.head_path = MODEL_PATHS.get('resnet_head')
|
| 53 |
+
self.head = None
|
| 54 |
+
self.load_head()
|
| 55 |
+
|
| 56 |
+
def load_head(self):
|
| 57 |
+
"""Loads the trained Logistic Regression head if it exists."""
|
| 58 |
+
if self.head_path and Path(self.head_path).exists():
|
| 59 |
+
self.head = joblib.load(self.head_path)
|
| 60 |
+
print(f"Loaded trained head from {self.head_path}")
|
| 61 |
+
else:
|
| 62 |
+
print(f"No trained head found at {self.head_path}")
|
| 63 |
+
|
| 64 |
+
def _get_features(self, img):
|
| 65 |
+
"""Internal method to turn an image into a math vector."""
|
| 66 |
+
# Convert BGR (OpenCV) to RGB
|
| 67 |
+
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
|
| 68 |
+
|
| 69 |
+
# Preprocess to tensor
|
| 70 |
+
input_tensor = self.preprocess(img_rgb).unsqueeze(0).to(self.device)
|
| 71 |
+
|
| 72 |
+
# Extract features
|
| 73 |
+
with torch.no_grad():
|
| 74 |
+
features = self.feature_extractor(input_tensor)
|
| 75 |
+
|
| 76 |
+
# Flatten [1, 512, 1, 1] -> [512] for Scikit-Learn
|
| 77 |
+
return features.cpu().numpy().flatten()
|
| 78 |
+
|
| 79 |
+
def train_head(self, images, labels):
|
| 80 |
+
"""
|
| 81 |
+
Trains the lightweight decision layer on top of your local ResNet.
|
| 82 |
+
"""
|
| 83 |
+
from sklearn.linear_model import LogisticRegression
|
| 84 |
+
|
| 85 |
+
if not images:
|
| 86 |
+
raise ValueError("No images provided for training.")
|
| 87 |
+
|
| 88 |
+
print(f"β³ Extracting features for {len(images)} images...")
|
| 89 |
+
X_data = [self._get_features(img) for img in images]
|
| 90 |
+
|
| 91 |
+
print("π Fitting Logistic Regression...")
|
| 92 |
+
self.head = LogisticRegression(max_iter=1000, C=1.0)
|
| 93 |
+
self.head.fit(X_data, labels)
|
| 94 |
+
|
| 95 |
+
# Save immediately
|
| 96 |
+
if self.head_path:
|
| 97 |
+
joblib.dump(self.head, self.head_path)
|
| 98 |
+
print(f"πΎ Model saved to {self.head_path}")
|
| 99 |
+
|
| 100 |
+
def predict(self, image):
|
| 101 |
+
"""
|
| 102 |
+
Standard Interface: Returns (Label, Confidence, Time_ms)
|
| 103 |
+
"""
|
| 104 |
+
if self.head is None:
|
| 105 |
+
return "Untrained", 0.0, 0.0
|
| 106 |
+
|
| 107 |
+
t0 = time.perf_counter()
|
| 108 |
+
|
| 109 |
+
# 1. Get Vector
|
| 110 |
+
features = self._get_features(image)
|
| 111 |
+
|
| 112 |
+
# 2. Get Probabilities
|
| 113 |
+
probs = self.head.predict_proba([features])[0]
|
| 114 |
+
winner_idx = np.argmax(probs)
|
| 115 |
+
|
| 116 |
+
label = self.head.classes_[winner_idx]
|
| 117 |
+
conf = probs[winner_idx]
|
| 118 |
+
|
| 119 |
+
t1 = time.perf_counter()
|
| 120 |
+
inference_ms = (t1 - t0) * 1000
|
| 121 |
+
|
| 122 |
+
return label, conf, inference_ms
|
src/utils/data_loader.py
DELETED
|
File without changes
|
src/utils/visualization.py
DELETED
|
File without changes
|
training/train_mobilenet.py
CHANGED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import cv2
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
# Path Setup
|
| 7 |
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
| 8 |
+
sys.path.append(str(PROJECT_ROOT))
|
| 9 |
+
|
| 10 |
+
from src.detectors.mobilenet import MobileNetDetector
|
| 11 |
+
from src.config import PROJECT_ROOT
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
print("π Starting MobileNetV3 Training Pipeline...")
|
| 15 |
+
|
| 16 |
+
# 1. Load Data
|
| 17 |
+
images, labels = [], []
|
| 18 |
+
train_dir = PROJECT_ROOT / "data/artroom/bird/yolo/train/images"
|
| 19 |
+
|
| 20 |
+
print(f"π Scanning {train_dir}...")
|
| 21 |
+
for img_file in train_dir.glob("*.png"):
|
| 22 |
+
img = cv2.imread(str(img_file))
|
| 23 |
+
if img is None: continue
|
| 24 |
+
|
| 25 |
+
fname = img_file.name.lower()
|
| 26 |
+
if "bird" in fname:
|
| 27 |
+
images.append(img)
|
| 28 |
+
labels.append("bird")
|
| 29 |
+
elif any(x in fname for x in ["room", "wall", "floor", "empty"]):
|
| 30 |
+
images.append(img)
|
| 31 |
+
labels.append("background")
|
| 32 |
+
|
| 33 |
+
print(f"π Data Summary:")
|
| 34 |
+
print(f" - Birds: {labels.count('bird')}")
|
| 35 |
+
print(f" - Backgrounds: {labels.count('background')}")
|
| 36 |
+
|
| 37 |
+
if not images:
|
| 38 |
+
print("β No images found!")
|
| 39 |
+
return
|
| 40 |
+
|
| 41 |
+
# 2. Initialize & Train
|
| 42 |
+
detector = MobileNetDetector()
|
| 43 |
+
detector.train_head(images, labels)
|
| 44 |
+
|
| 45 |
+
# 3. Sanity Check
|
| 46 |
+
print("\nπ Sanity Check (Image 0):")
|
| 47 |
+
lbl, conf, ms = detector.predict(images[0])
|
| 48 |
+
print(f" Result: {lbl} | Conf: {conf:.2%} | Time: {ms:.2f}ms")
|
| 49 |
+
print(f"β
Training Complete. Head saved to {detector.head_path}")
|
| 50 |
+
|
| 51 |
+
if __name__ == "__main__":
|
| 52 |
+
main()
|
training/train_resenet.py
DELETED
|
File without changes
|
training/train_resnet.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
import cv2
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
# Add project root to path so we can import 'src'
|
| 7 |
+
# We use .parent because this script is inside 'training/'
|
| 8 |
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
| 9 |
+
sys.path.append(str(PROJECT_ROOT))
|
| 10 |
+
|
| 11 |
+
from src.detectors.resnet import ResNetDetector
|
| 12 |
+
from src.config import PROJECT_ROOT
|
| 13 |
+
|
| 14 |
+
def load_data():
|
| 15 |
+
"""
|
| 16 |
+
Scans the data folders and prepares clean lists for training.
|
| 17 |
+
"""
|
| 18 |
+
images = []
|
| 19 |
+
labels = []
|
| 20 |
+
|
| 21 |
+
# 1. Load BIRDS (Positive)
|
| 22 |
+
# Using the path from your tree structure
|
| 23 |
+
train_dir = PROJECT_ROOT / "data/artroom/bird/yolo/train/images"
|
| 24 |
+
|
| 25 |
+
print(f"π Scanning {train_dir}...")
|
| 26 |
+
|
| 27 |
+
# We loop through all PNGs and decide based on filename
|
| 28 |
+
for img_file in train_dir.glob("*.png"):
|
| 29 |
+
img = cv2.imread(str(img_file))
|
| 30 |
+
if img is None:
|
| 31 |
+
continue
|
| 32 |
+
|
| 33 |
+
filename = img_file.name.lower()
|
| 34 |
+
|
| 35 |
+
# LOGIC:
|
| 36 |
+
# If it contains "bird", it's a bird.
|
| 37 |
+
# If it contains "room", "wall", "floor", it's background.
|
| 38 |
+
|
| 39 |
+
if "bird" in filename:
|
| 40 |
+
images.append(img)
|
| 41 |
+
labels.append("bird")
|
| 42 |
+
# print(f" + Added Bird: {filename}")
|
| 43 |
+
|
| 44 |
+
elif any(x in filename for x in ["room", "wall", "floor", "empty"]):
|
| 45 |
+
images.append(img)
|
| 46 |
+
labels.append("background")
|
| 47 |
+
# print(f" - Added Background: {filename}")
|
| 48 |
+
|
| 49 |
+
return images, labels
|
| 50 |
+
|
| 51 |
+
def main():
|
| 52 |
+
# 1. Prepare Data
|
| 53 |
+
print("π Starting ResNet Training Pipeline...")
|
| 54 |
+
images, labels = load_data()
|
| 55 |
+
|
| 56 |
+
# Statistics
|
| 57 |
+
n_bird = labels.count('bird')
|
| 58 |
+
n_bg = labels.count('background')
|
| 59 |
+
|
| 60 |
+
print(f"\nπ Data Summary:")
|
| 61 |
+
print(f" - Total Images: {len(images)}")
|
| 62 |
+
print(f" - Birds (Positive): {n_bird}")
|
| 63 |
+
print(f" - Backgrounds (Negative): {n_bg}")
|
| 64 |
+
|
| 65 |
+
if len(images) == 0:
|
| 66 |
+
print("β Error: No images found. Check your 'data/artroom/bird/yolo/train/images' folder.")
|
| 67 |
+
return
|
| 68 |
+
|
| 69 |
+
# 2. Initialize Model
|
| 70 |
+
detector = ResNetDetector()
|
| 71 |
+
|
| 72 |
+
# 3. Train & Save
|
| 73 |
+
detector.train_head(images, labels)
|
| 74 |
+
|
| 75 |
+
# 4. Verification Test (Sanity Check)
|
| 76 |
+
print("\nπ Running Sanity Check on Image 0...")
|
| 77 |
+
lbl, conf, ms = detector.predict(images[0])
|
| 78 |
+
print(f" Result: {lbl} | Confidence: {conf:.2%} | Time: {ms:.2f}ms")
|
| 79 |
+
print("\nβ
Training Complete. You can now use dev_03_test_resnet.ipynb")
|
| 80 |
+
|
| 81 |
+
if __name__ == "__main__":
|
| 82 |
+
main()
|
training/train_yolo.py
CHANGED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import sys
|
| 2 |
+
import os
|
| 3 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 4 |
+
|
| 5 |
+
from ultralytics import YOLO
|
| 6 |
+
import os
|
| 7 |
+
from src.config import MODEL_PATHS, TRAINING_DIR, BIRD_YAML
|
| 8 |
+
|
| 9 |
+
def run_fine_tuning():
|
| 10 |
+
|
| 11 |
+
#Load model
|
| 12 |
+
model = YOLO('yolov8n.pt')
|
| 13 |
+
# model = YOLO(MODEL_PATHS['yolo'])
|
| 14 |
+
|
| 15 |
+
# Train the model
|
| 16 |
+
results = model.train(
|
| 17 |
+
data = BIRD_YAML,
|
| 18 |
+
epochs = 50,
|
| 19 |
+
imgsz = 640,
|
| 20 |
+
batch = 4,
|
| 21 |
+
name = 'bird_artroom_finetune',
|
| 22 |
+
project = str(TRAINING_DIR / "runs"),
|
| 23 |
+
exist_ok = False
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
print("Training completed successfully!")
|
| 27 |
+
|
| 28 |
+
if __name__ == "__main__":
|
| 29 |
+
run_fine_tuning()
|