Spaces:

sebastianc1233
/

videodetection

Runtime error

App Files Files Community

sebastianc1233 commited on Jan 24

Commit

928c611

1 Parent(s): de6ab73

Remove all comments from code

Browse files

Files changed (11) hide show

app.py +2 -2
download_model.py +1 -1
main.py +1 -1
src/autonomous_vision/detectors/depth_estimator.py +18 -18
src/autonomous_vision/detectors/lane_detector.py +23 -23
src/autonomous_vision/detectors/traffic_light_analyzer.py +11 -11
src/autonomous_vision/detectors/traffic_sign_detector.py +36 -36
src/autonomous_vision/detectors/yolo_detector.py +11 -11
src/autonomous_vision/models/detections.py +9 -9
src/autonomous_vision/pipeline/annotator.py +10 -10
src/autonomous_vision/pipeline/processor.py +22 -22

app.py CHANGED Viewed

@@ -1,14 +1,14 @@
 import os
 import sys
-# Ensure src is in python path
 sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
 from autonomous_vision.app import create_demo, THEME, CUSTOM_CSS
 if __name__ == "__main__":
     demo = create_demo()
-    # HF Spaces handles port/server_name automatically
     demo.launch(
         allowed_paths=["."]
     )

 import os
 import sys
 sys.path.append(os.path.join(os.path.dirname(__file__), "src"))
 from autonomous_vision.app import create_demo, THEME, CUSTOM_CSS
 if __name__ == "__main__":
     demo = create_demo()
     demo.launch(
         allowed_paths=["."]
     )

download_model.py CHANGED Viewed

@@ -9,7 +9,7 @@ hf_hub_download(
     local_dir="src/autonomous_vision/models",
     local_dir_use_symlinks=False
 )
-# Rename for clarity
 if os.path.exists("src/autonomous_vision/models/weights/best.pt"):
     os.rename("src/autonomous_vision/models/weights/best.pt", "src/autonomous_vision/models/yolov8_traffic.pt")
     os.rmdir("src/autonomous_vision/models/weights")

     local_dir="src/autonomous_vision/models",
     local_dir_use_symlinks=False
 )
 if os.path.exists("src/autonomous_vision/models/weights/best.pt"):
     os.rename("src/autonomous_vision/models/weights/best.pt", "src/autonomous_vision/models/yolov8_traffic.pt")
     os.rmdir("src/autonomous_vision/models/weights")

main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-#!/usr/bin/env python3
 """Entry point for Autonomous Vision System."""
 from autonomous_vision.app import main


1	+
2	"""Entry point for Autonomous Vision System."""
3
4	from autonomous_vision.app import main

src/autonomous_vision/detectors/depth_estimator.py CHANGED Viewed

@@ -15,10 +15,10 @@ class DepthEstimator(BaseDetector):
     approximate distances to detected objects.
     """
-    # Approximate focal length and baseline for distance estimation
-    # These are rough values - real systems need camera calibration
-    FOCAL_LENGTH = 700  # pixels (approximate for dashcam)
-    REFERENCE_DISTANCE = 10.0  # meters at mid-depth value
     def __init__(
         self,
@@ -80,7 +80,7 @@ class DepthEstimator(BaseDetector):
         try:
             from PIL import Image
-            # Resize for faster processing
             h, w = frame.shape[:2]
             scale = 384 / max(h, w)
             new_h, new_w = int(h * scale), int(w * scale)
@@ -89,11 +89,11 @@ class DepthEstimator(BaseDetector):
             rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
             pil_img = Image.fromarray(rgb)
-            # Run depth estimation
             result = self._pipeline(pil_img)
             depth_map = np.array(result['depth'])
-            # Resize back to original size
             self._depth_map = cv2.resize(depth_map, (w, h))
             return self._depth_map
@@ -125,18 +125,18 @@ class DepthEstimator(BaseDetector):
         if not (0 <= x < w and 0 <= y < h):
             return None
-        # Get depth value (higher = closer in MiDaS output)
         depth_value = depth_map[y, x]
         if depth_value <= 0:
             return 100.0
-        # Direct inverse mapping
-        # Adjusted scaling factor based on user feedback
-        # 400.0 provides slightly lower distances, better for closer objects
         distance = 400.0 / depth_value
-        # Clamp to reasonable range
         return min(max(distance, 1.5), 100.0)
     def get_distance_for_bbox(
@@ -160,7 +160,7 @@ class DepthEstimator(BaseDetector):
         x1, y1, x2, y2 = bbox
         h, w = depth_map.shape[:2]
-        # Clamp bbox to image boundaries
         x1 = max(0, x1)
         y1 = max(0, y1)
         x2 = min(w, x2)
@@ -169,8 +169,8 @@ class DepthEstimator(BaseDetector):
         if x1 >= x2 or y1 >= y2:
             return None
-        # Extract depth region for the object
-        # We focus on the center 50% of the box to avoid background noise at edges
         center_x1 = x1 + (x2 - x1) // 4
         center_x2 = x2 - (x2 - x1) // 4
         center_y1 = y1 + (y2 - y1) // 4
@@ -181,7 +181,7 @@ class DepthEstimator(BaseDetector):
         if depth_roi.size == 0:
             return None
-        # Use median depth for robustness against outliers
         median_depth = np.median(depth_roi)
         return self.get_distance_at_point(0, 0, np.full((1, 1), median_depth))
@@ -198,11 +198,11 @@ class DepthEstimator(BaseDetector):
         Returns:
             Colorized depth visualization (BGR).
         """
-        # Normalize to 0-255
         normalized = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX)
         normalized = normalized.astype(np.uint8)
-        # Apply colormap
         colored = cv2.applyColorMap(normalized, colormap)
         return colored

     approximate distances to detected objects.
     """
+    FOCAL_LENGTH = 700
+    REFERENCE_DISTANCE = 10.0
     def __init__(
         self,
         try:
             from PIL import Image
             h, w = frame.shape[:2]
             scale = 384 / max(h, w)
             new_h, new_w = int(h * scale), int(w * scale)
             rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
             pil_img = Image.fromarray(rgb)
             result = self._pipeline(pil_img)
             depth_map = np.array(result['depth'])
             self._depth_map = cv2.resize(depth_map, (w, h))
             return self._depth_map
         if not (0 <= x < w and 0 <= y < h):
             return None
         depth_value = depth_map[y, x]
         if depth_value <= 0:
             return 100.0
         distance = 400.0 / depth_value
         return min(max(distance, 1.5), 100.0)
     def get_distance_for_bbox(
         x1, y1, x2, y2 = bbox
         h, w = depth_map.shape[:2]
         x1 = max(0, x1)
         y1 = max(0, y1)
         x2 = min(w, x2)
         if x1 >= x2 or y1 >= y2:
             return None
         center_x1 = x1 + (x2 - x1) // 4
         center_x2 = x2 - (x2 - x1) // 4
         center_y1 = y1 + (y2 - y1) // 4
         if depth_roi.size == 0:
             return None
         median_depth = np.median(depth_roi)
         return self.get_distance_at_point(0, 0, np.full((1, 1), median_depth))
         Returns:
             Colorized depth visualization (BGR).
         """
         normalized = cv2.normalize(depth_map, None, 0, 255, cv2.NORM_MINMAX)
         normalized = normalized.astype(np.uint8)
         colored = cv2.applyColorMap(normalized, colormap)
         return colored

src/autonomous_vision/detectors/lane_detector.py CHANGED Viewed

@@ -17,9 +17,9 @@ from autonomous_vision.models.detections import LaneInfo
 class LaneDetector(BaseDetector):
     """Lane detector using semantic segmentation."""
-    # Cityscapes class indices for road-related classes
-    ROAD_CLASS = 0  # road
-    SIDEWALK_CLASS = 1  # sidewalk
     def __init__(
         self,
@@ -62,7 +62,7 @@ class LaneDetector(BaseDetector):
         try:
             from PIL import Image
-            # Resize for faster processing
             h, w = frame.shape[:2]
             scale = 512 / max(h, w)
             new_h, new_w = int(h * scale), int(w * scale)
@@ -71,10 +71,10 @@ class LaneDetector(BaseDetector):
             rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
             pil_img = Image.fromarray(rgb)
-            # Run segmentation
             results = self._seg_pipeline(pil_img)
-            # Find road mask
             road_mask = None
             for result in results:
                 label = result.get('label', '').lower()
@@ -86,7 +86,7 @@ class LaneDetector(BaseDetector):
                         road_mask = cv2.bitwise_or(road_mask, mask)
             if road_mask is not None:
-                # Resize back to original size
                 road_mask = cv2.resize(road_mask, (w, h), interpolation=cv2.INTER_NEAREST)
             return road_mask
@@ -99,26 +99,26 @@ class LaneDetector(BaseDetector):
         """Extract left and right lane boundaries from road mask."""
         height, width = mask.shape[:2]
-        # Ensure mask is binary
         if mask.dtype != np.uint8:
             mask = (mask > 0).astype(np.uint8) * 255
-        # Find contours
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if not contours:
             return None, None
-        # Get the largest contour (main road)
         main_contour = max(contours, key=cv2.contourArea)
-        # Extract left and right edges
         left_points = []
         right_points = []
-        # Sample at different y positions
         for y in range(int(height * 0.55), height, 10):
-            # Find x coordinates at this y level
             mask_row = mask[y, :]
             road_pixels = np.where(mask_row > 0)[0]
@@ -129,7 +129,7 @@ class LaneDetector(BaseDetector):
                 left_points.append((int(left_x), int(y)))
                 right_points.append((int(right_x), int(y)))
-        return left_points if len(left_points) >= 3 else None, \
                right_points if len(right_points) >= 3 else None
     def _create_lane_mask_cv(self, frame: np.ndarray) -> np.ndarray:
@@ -140,22 +140,22 @@ class LaneDetector(BaseDetector):
         lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        # White lanes
         l_channel = hls[:, :, 1]
         white_mask = cv2.inRange(l_channel, 200, 255)
-        # Yellow lanes
         b_channel = lab[:, :, 2]
         yellow_mask = cv2.inRange(b_channel, 145, 200)
         color_mask = cv2.bitwise_or(white_mask, yellow_mask)
-        # Edges
         blur = cv2.GaussianBlur(gray, (5, 5), 0)
         edges = cv2.Canny(blur, 50, 150)
         combined = cv2.bitwise_or(color_mask, edges)
-        # ROI
         roi_mask = np.zeros_like(combined)
         roi_pts = np.array([
             [int(width * 0.0), height],
@@ -219,7 +219,7 @@ class LaneDetector(BaseDetector):
         if not history:
             return current
-        # Average all points in history
         avg = np.mean(list(history), axis=0)
         return [(int(x), int(y)) for x, y in avg]
@@ -231,13 +231,13 @@ class LaneDetector(BaseDetector):
         left_points = None
         right_points = None
-        # Try segmentation first
         if self._seg_available:
             road_mask = self._segment_road(frame)
             if road_mask is not None:
                 left_points, right_points = self._extract_lanes_from_mask(road_mask)
-        # Fallback to classical CV
         if left_points is None or right_points is None:
             cv_mask = self._create_lane_mask_cv(frame)
             cv_left, cv_right = self._detect_lanes_hough(cv_mask, width, height)
@@ -246,11 +246,11 @@ class LaneDetector(BaseDetector):
             if right_points is None:
                 right_points = cv_right
-        # Apply temporal smoothing
         left_smooth = self._smooth_points(left_points, self._left_history)
         right_smooth = self._smooth_points(right_points, self._right_history)
-        # Calculate offset
         center_offset = 0.0
         if left_smooth and right_smooth:
             left_x = left_smooth[-1][0]

 class LaneDetector(BaseDetector):
     """Lane detector using semantic segmentation."""
+    ROAD_CLASS = 0
+    SIDEWALK_CLASS = 1
     def __init__(
         self,
         try:
             from PIL import Image
             h, w = frame.shape[:2]
             scale = 512 / max(h, w)
             new_h, new_w = int(h * scale), int(w * scale)
             rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)
             pil_img = Image.fromarray(rgb)
             results = self._seg_pipeline(pil_img)
             road_mask = None
             for result in results:
                 label = result.get('label', '').lower()
                         road_mask = cv2.bitwise_or(road_mask, mask)
             if road_mask is not None:
                 road_mask = cv2.resize(road_mask, (w, h), interpolation=cv2.INTER_NEAREST)
             return road_mask
         """Extract left and right lane boundaries from road mask."""
         height, width = mask.shape[:2]
         if mask.dtype != np.uint8:
             mask = (mask > 0).astype(np.uint8) * 255
         contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         if not contours:
             return None, None
         main_contour = max(contours, key=cv2.contourArea)
         left_points = []
         right_points = []
         for y in range(int(height * 0.55), height, 10):
             mask_row = mask[y, :]
             road_pixels = np.where(mask_row > 0)[0]
                 left_points.append((int(left_x), int(y)))
                 right_points.append((int(right_x), int(y)))
+        return left_points if len(left_points) >= 3 else None,\
                right_points if len(right_points) >= 3 else None
     def _create_lane_mask_cv(self, frame: np.ndarray) -> np.ndarray:
         lab = cv2.cvtColor(frame, cv2.COLOR_BGR2LAB)
         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
         l_channel = hls[:, :, 1]
         white_mask = cv2.inRange(l_channel, 200, 255)
         b_channel = lab[:, :, 2]
         yellow_mask = cv2.inRange(b_channel, 145, 200)
         color_mask = cv2.bitwise_or(white_mask, yellow_mask)
         blur = cv2.GaussianBlur(gray, (5, 5), 0)
         edges = cv2.Canny(blur, 50, 150)
         combined = cv2.bitwise_or(color_mask, edges)
         roi_mask = np.zeros_like(combined)
         roi_pts = np.array([
             [int(width * 0.0), height],
         if not history:
             return current
         avg = np.mean(list(history), axis=0)
         return [(int(x), int(y)) for x, y in avg]
         left_points = None
         right_points = None
         if self._seg_available:
             road_mask = self._segment_road(frame)
             if road_mask is not None:
                 left_points, right_points = self._extract_lanes_from_mask(road_mask)
         if left_points is None or right_points is None:
             cv_mask = self._create_lane_mask_cv(frame)
             cv_left, cv_right = self._detect_lanes_hough(cv_mask, width, height)
             if right_points is None:
                 right_points = cv_right
         left_smooth = self._smooth_points(left_points, self._left_history)
         right_smooth = self._smooth_points(right_points, self._right_history)
         center_offset = 0.0
         if left_smooth and right_smooth:
             left_x = left_smooth[-1][0]

src/autonomous_vision/detectors/traffic_light_analyzer.py CHANGED Viewed

@@ -18,11 +18,11 @@ class TrafficLightAnalyzer(BaseDetector):
         """
         super().__init__(confidence_threshold)
-        # HSV ranges for traffic light colors
         self.color_ranges = {
             TrafficLightState.RED: [
-                ((0, 100, 100), (10, 255, 255)),  # Lower red
-                ((160, 100, 100), (180, 255, 255)),  # Upper red
             ],
             TrafficLightState.YELLOW: [
                 ((15, 100, 100), (35, 255, 255)),
@@ -65,10 +65,10 @@ class TrafficLightAnalyzer(BaseDetector):
         """
         self.ensure_initialized()
-        # Extract ROI
         x1, y1, x2, y2 = detection.bbox
-        # Add some padding
         height, width = frame.shape[:2]
         pad = 5
         x1 = max(0, x1 - pad)
@@ -95,10 +95,10 @@ class TrafficLightAnalyzer(BaseDetector):
         if roi.size == 0:
             return TrafficLightState.UNKNOWN
-        # Convert to HSV
         hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
-        # Calculate color ratios
         color_scores: dict[TrafficLightState, float] = {}
         total_pixels = roi.shape[0] * roi.shape[1]
@@ -112,19 +112,19 @@ class TrafficLightAnalyzer(BaseDetector):
                 color_mask = cv2.inRange(hsv, lower_np, upper_np)
                 mask = cv2.bitwise_or(mask, color_mask)
-            # Count pixels
             pixel_count = cv2.countNonZero(mask)
             color_scores[state] = pixel_count / total_pixels
-        # Find dominant color
         if not color_scores:
             return TrafficLightState.UNKNOWN
         max_state = max(color_scores, key=lambda x: color_scores[x])
         max_score = color_scores[max_state]
-        # Need at least some percentage of pixels to be confident
-        if max_score < 0.05:  # At least 5% of pixels
             return TrafficLightState.UNKNOWN
         return max_state

         """
         super().__init__(confidence_threshold)
         self.color_ranges = {
             TrafficLightState.RED: [
+                ((0, 100, 100), (10, 255, 255)),
+                ((160, 100, 100), (180, 255, 255)),
             ],
             TrafficLightState.YELLOW: [
                 ((15, 100, 100), (35, 255, 255)),
         """
         self.ensure_initialized()
         x1, y1, x2, y2 = detection.bbox
         height, width = frame.shape[:2]
         pad = 5
         x1 = max(0, x1 - pad)
         if roi.size == 0:
             return TrafficLightState.UNKNOWN
         hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
         color_scores: dict[TrafficLightState, float] = {}
         total_pixels = roi.shape[0] * roi.shape[1]
                 color_mask = cv2.inRange(hsv, lower_np, upper_np)
                 mask = cv2.bitwise_or(mask, color_mask)
             pixel_count = cv2.countNonZero(mask)
             color_scores[state] = pixel_count / total_pixels
         if not color_scores:
             return TrafficLightState.UNKNOWN
         max_state = max(color_scores, key=lambda x: color_scores[x])
         max_score = color_scores[max_state]
+        if max_score < 0.05:
             return TrafficLightState.UNKNOWN
         return max_state

src/autonomous_vision/detectors/traffic_sign_detector.py CHANGED Viewed

@@ -15,7 +15,7 @@ from autonomous_vision.detectors.base import BaseDetector
 from autonomous_vision.models.detections import Detection
-# GTSRB 43 classes with English names (for Classifier)
 GTSRB_CLASSES = {
     0: ("speed_limit_20", "Speed Limit 20"),
     1: ("speed_limit_30", "Speed Limit 30"),
@@ -62,7 +62,7 @@ GTSRB_CLASSES = {
     42: ("end_no_overtaking_trucks", "End No Overtaking Trucks"),
 }
-# Custom YOLO model classes (Denmark/EU)
 DK_CLASS_MAPPING = {
     0: "End All Limits", 1: "No Parking", 2: "No Stopping", 3: "Speed Limit 100", 4: "End Speed 100",
     5: "Speed Limit 110", 6: "End Speed 110", 7: "Speed Limit 120", 8: "Speed Limit 30", 9: "Speed Limit 40",
@@ -70,15 +70,15 @@ DK_CLASS_MAPPING = {
     15: "Speed Limit 70", 16: "End Speed 70", 17: "Speed Limit 90", 18: "End Speed 90"
 }
-# Colors by sign type (BGR)
 SIGN_COLORS = {
-    "speed": (0, 165, 255),      # Orange - speed limits
     "limit": (0, 165, 255),
-    "no_": (0, 0, 255),          # Red - prohibition
-    "stop": (0, 0, 200),         # Dark red
     "yield": (0, 0, 200),
-    "priority": (0, 100, 200),   # Priority
-    "curve": (0, 200, 255),      # Yellow - warnings
     "bumpy": (0, 200, 255),
     "slippery": (0, 200, 255),
     "road_": (0, 200, 255),
@@ -89,12 +89,12 @@ SIGN_COLORS = {
     "ice": (0, 200, 255),
     "wild": (0, 200, 255),
     "general": (0, 200, 255),
-    "turn": (255, 100, 0),       # Blue - mandatory
     "ahead": (255, 100, 0),
     "straight": (255, 100, 0),
     "keep": (255, 100, 0),
     "roundabout": (255, 100, 0),
-    "end": (100, 100, 100),      # Gray - end signs
 }
@@ -104,7 +104,7 @@ def _get_color(class_name: str) -> tuple[int, int, int]:
     for key, color in SIGN_COLORS.items():
         if key in lower:
             return color
-    return (128, 0, 255)  # Purple default
 class TrafficSignDetector(BaseDetector):
@@ -124,7 +124,7 @@ class TrafficSignDetector(BaseDetector):
     def initialize(self) -> None:
         """Initialize detection and classification models."""
-        # Load YOLO for traffic sign detection
         traffic_model_path = Path(__file__).parent.parent / "models" / "yolov8_traffic.pt"
         try:
@@ -138,7 +138,7 @@ class TrafficSignDetector(BaseDetector):
         except Exception as e:
             print(f"YOLO loading failed: {e}")
-        # Load HuggingFace GTSRB classifier
         try:
             from transformers import pipeline
             self._classifier = pipeline(
@@ -157,36 +157,36 @@ class TrafficSignDetector(BaseDetector):
         h, w = frame.shape[:2]
         hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
-        # Detect red regions (Prohibitory, Danger)
         red_mask1 = cv2.inRange(hsv, np.array([0, 70, 50]), np.array([10, 255, 255]))
         red_mask2 = cv2.inRange(hsv, np.array([160, 70, 50]), np.array([180, 255, 255]))
         red_mask = cv2.bitwise_or(red_mask1, red_mask2)
-        # Detect blue regions (Mandatory, Information)
         blue_mask = cv2.inRange(hsv, np.array([100, 70, 50]), np.array([130, 255, 255]))
-        # Detect yellow regions (Diamond priority, Warning temp)
         yellow_mask = cv2.inRange(hsv, np.array([15, 70, 50]), np.array([35, 255, 255]))
-        # Combine masks
         combined_mask = cv2.bitwise_or(red_mask, blue_mask)
         combined_mask = cv2.bitwise_or(combined_mask, yellow_mask)
-        # Morphological operations
         kernel = np.ones((3, 3), np.uint8)
         combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel)
         combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
-        # Find contours
         contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         for contour in contours:
             area = cv2.contourArea(contour)
-            # Relaxed area constraints
             if 300 < area < (h * w * 0.2):
                 x, y, bw, bh = cv2.boundingRect(contour)
                 aspect = bw / bh if bh > 0 else 0
-                # Relaxed aspect ratio for different sign shapes
                 if 0.5 < aspect < 2.5:
                     pad = int(max(bw, bh) * 0.15)
                     x1 = max(0, x - pad)
@@ -195,7 +195,7 @@ class TrafficSignDetector(BaseDetector):
                     y2 = min(h, y + bh + pad)
                     regions.append((x1, y1, x2, y2))
-        return regions[:15]  # Limit candidates
     def _classify_sign(self, roi: np.ndarray) -> Optional[tuple[str, str, float]]:
         """Classify a traffic sign ROI using GTSRB classifier."""
@@ -204,7 +204,7 @@ class TrafficSignDetector(BaseDetector):
         try:
             rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
-            # Ensure ROI is reasonable size
             if rgb.shape[0] < 16 or rgb.shape[1] < 16:
                 return None
@@ -259,10 +259,10 @@ class TrafficSignDetector(BaseDetector):
         detections: list[Detection] = []
         covered_regions = []
-        # 1. Run Custom YOLO Detection (High Precision for Speed Limits)
         if self._yolo_model:
             try:
-                # conf=0.4 ensure high quality for direct YOLO detections
                 yolo_results = self._yolo_model(frame, verbose=False, conf=0.4)
                 for result in yolo_results:
                     for box in result.boxes:
@@ -273,14 +273,14 @@ class TrafficSignDetector(BaseDetector):
                         label_text = DK_CLASS_MAPPING.get(cls_id, "Traffic Sign")
-                        # Verify with classifier to correct specific values (e.g., speed 20 vs 30)
-                        # YOLO might be robust on detection but miss exact class if not in training set
                         verified_label = label_text
                         verified_color = _get_color(label_text)
-                        # Extract ROI for classification
                         h, w = frame.shape[:2]
-                        # Add small padding for better classification
                         pad_x = int((x2 - x1) * 0.1)
                         pad_y = int((y2 - y1) * 0.1)
                         c_x1 = max(0, x1 - pad_x)
@@ -293,14 +293,14 @@ class TrafficSignDetector(BaseDetector):
                             cls_result = self._classify_sign(roi)
                             if cls_result:
                                 cls_name, cls_eng, cls_score = cls_result
-                                # If classifier is confident (>0.4) and agrees it's a sign type we care about
-                                # We trust classifier for the specific number/type, especially for classes
-                                # like '20' which might be missing from the YOLO model.
                                 if cls_score > 0.4:
                                     verified_label = f"{cls_eng}"
                                     verified_color = _get_color(cls_name)
-                        # Add to final detections
                         detections.append(Detection(
                             class_name="traffic_sign",
                             confidence=conf,
@@ -312,11 +312,11 @@ class TrafficSignDetector(BaseDetector):
             except Exception as e:
                 print(f"YOLO detect error: {e}")
-        # 2. Run HSV Region Proposal + Classifier (Recall for signs YOLO missed)
         hsv_regions = self._find_sign_regions_hsv(frame)
         for region in hsv_regions:
-            # Check overlap with existing YOLO detections
             is_duplicate = False
             for covered in covered_regions:
                 if self._iou(region, covered) > 0.3:
@@ -326,7 +326,7 @@ class TrafficSignDetector(BaseDetector):
             if is_duplicate:
                 continue
-            # Classify valid candidate
             x1, y1, x2, y2 = region
             roi = frame[y1:y2, x1:x2]

 from autonomous_vision.models.detections import Detection
 GTSRB_CLASSES = {
     0: ("speed_limit_20", "Speed Limit 20"),
     1: ("speed_limit_30", "Speed Limit 30"),
     42: ("end_no_overtaking_trucks", "End No Overtaking Trucks"),
 }
 DK_CLASS_MAPPING = {
     0: "End All Limits", 1: "No Parking", 2: "No Stopping", 3: "Speed Limit 100", 4: "End Speed 100",
     5: "Speed Limit 110", 6: "End Speed 110", 7: "Speed Limit 120", 8: "Speed Limit 30", 9: "Speed Limit 40",
     15: "Speed Limit 70", 16: "End Speed 70", 17: "Speed Limit 90", 18: "End Speed 90"
 }
 SIGN_COLORS = {
+    "speed": (0, 165, 255),
     "limit": (0, 165, 255),
+    "no_": (0, 0, 255),
+    "stop": (0, 0, 200),
     "yield": (0, 0, 200),
+    "priority": (0, 100, 200),
+    "curve": (0, 200, 255),
     "bumpy": (0, 200, 255),
     "slippery": (0, 200, 255),
     "road_": (0, 200, 255),
     "ice": (0, 200, 255),
     "wild": (0, 200, 255),
     "general": (0, 200, 255),
+    "turn": (255, 100, 0),
     "ahead": (255, 100, 0),
     "straight": (255, 100, 0),
     "keep": (255, 100, 0),
     "roundabout": (255, 100, 0),
+    "end": (100, 100, 100),
 }
     for key, color in SIGN_COLORS.items():
         if key in lower:
             return color
+    return (128, 0, 255)
 class TrafficSignDetector(BaseDetector):
     def initialize(self) -> None:
         """Initialize detection and classification models."""
         traffic_model_path = Path(__file__).parent.parent / "models" / "yolov8_traffic.pt"
         try:
         except Exception as e:
             print(f"YOLO loading failed: {e}")
         try:
             from transformers import pipeline
             self._classifier = pipeline(
         h, w = frame.shape[:2]
         hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
         red_mask1 = cv2.inRange(hsv, np.array([0, 70, 50]), np.array([10, 255, 255]))
         red_mask2 = cv2.inRange(hsv, np.array([160, 70, 50]), np.array([180, 255, 255]))
         red_mask = cv2.bitwise_or(red_mask1, red_mask2)
         blue_mask = cv2.inRange(hsv, np.array([100, 70, 50]), np.array([130, 255, 255]))
         yellow_mask = cv2.inRange(hsv, np.array([15, 70, 50]), np.array([35, 255, 255]))
         combined_mask = cv2.bitwise_or(red_mask, blue_mask)
         combined_mask = cv2.bitwise_or(combined_mask, yellow_mask)
         kernel = np.ones((3, 3), np.uint8)
         combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_OPEN, kernel)
         combined_mask = cv2.morphologyEx(combined_mask, cv2.MORPH_CLOSE, kernel, iterations=2)
         contours, _ = cv2.findContours(combined_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         for contour in contours:
             area = cv2.contourArea(contour)
             if 300 < area < (h * w * 0.2):
                 x, y, bw, bh = cv2.boundingRect(contour)
                 aspect = bw / bh if bh > 0 else 0
                 if 0.5 < aspect < 2.5:
                     pad = int(max(bw, bh) * 0.15)
                     x1 = max(0, x - pad)
                     y2 = min(h, y + bh + pad)
                     regions.append((x1, y1, x2, y2))
+        return regions[:15]
     def _classify_sign(self, roi: np.ndarray) -> Optional[tuple[str, str, float]]:
         """Classify a traffic sign ROI using GTSRB classifier."""
         try:
             rgb = cv2.cvtColor(roi, cv2.COLOR_BGR2RGB)
             if rgb.shape[0] < 16 or rgb.shape[1] < 16:
                 return None
         detections: list[Detection] = []
         covered_regions = []
         if self._yolo_model:
             try:
                 yolo_results = self._yolo_model(frame, verbose=False, conf=0.4)
                 for result in yolo_results:
                     for box in result.boxes:
                         label_text = DK_CLASS_MAPPING.get(cls_id, "Traffic Sign")
                         verified_label = label_text
                         verified_color = _get_color(label_text)
                         h, w = frame.shape[:2]
                         pad_x = int((x2 - x1) * 0.1)
                         pad_y = int((y2 - y1) * 0.1)
                         c_x1 = max(0, x1 - pad_x)
                             cls_result = self._classify_sign(roi)
                             if cls_result:
                                 cls_name, cls_eng, cls_score = cls_result
                                 if cls_score > 0.4:
                                     verified_label = f"{cls_eng}"
                                     verified_color = _get_color(cls_name)
                         detections.append(Detection(
                             class_name="traffic_sign",
                             confidence=conf,
             except Exception as e:
                 print(f"YOLO detect error: {e}")
         hsv_regions = self._find_sign_regions_hsv(frame)
         for region in hsv_regions:
             is_duplicate = False
             for covered in covered_regions:
                 if self._iou(region, covered) > 0.3:
             if is_duplicate:
                 continue
             x1, y1, x2, y2 = region
             roi = frame[y1:y2, x1:x2]

src/autonomous_vision/detectors/yolo_detector.py CHANGED Viewed

@@ -10,7 +10,7 @@ from autonomous_vision.detectors.base import BaseDetector
 from autonomous_vision.models.detections import Detection
-# COCO class mapping for traffic-related objects
 COCO_CLASSES = {
     0: "person",
     1: "bicycle",
@@ -22,19 +22,19 @@ COCO_CLASSES = {
     11: "stop_sign",
 }
-# Colors for different detection classes (BGR format)
 CLASS_COLORS = {
-    "person": (0, 255, 255),  # Yellow
-    "bicycle": (255, 165, 0),  # Orange
-    "car": (255, 100, 0),  # Blue
-    "motorcycle": (255, 0, 128),  # Purple-blue
-    "bus": (0, 128, 255),  # Orange-red
-    "truck": (0, 100, 200),  # Dark orange
-    "traffic_light": (0, 255, 0),  # Green
-    "stop_sign": (0, 0, 255),  # Red
 }
-# English labels for classes
 ENGLISH_LABELS = {
     "person": "Pedestrian",
     "bicycle": "Cyclist",

 from autonomous_vision.models.detections import Detection
 COCO_CLASSES = {
     0: "person",
     1: "bicycle",
     11: "stop_sign",
 }
 CLASS_COLORS = {
+    "person": (0, 255, 255),
+    "bicycle": (255, 165, 0),
+    "car": (255, 100, 0),
+    "motorcycle": (255, 0, 128),
+    "bus": (0, 128, 255),
+    "truck": (0, 100, 200),
+    "traffic_light": (0, 255, 0),
+    "stop_sign": (0, 0, 255),
 }
 ENGLISH_LABELS = {
     "person": "Pedestrian",
     "bicycle": "Cyclist",

src/autonomous_vision/models/detections.py CHANGED Viewed

@@ -17,17 +17,17 @@ class TrafficLightState(Enum):
 class DetectionClass(Enum):
     """Detection class categories."""
-    # Vehicles
     CAR = "car"
     TRUCK = "truck"
     BUS = "bus"
     MOTORCYCLE = "motorcycle"
     BICYCLE = "bicycle"
-    # People
     PERSON = "person"
-    # Traffic
     TRAFFIC_LIGHT = "traffic_light"
     TRAFFIC_SIGN = "traffic_sign"
@@ -38,10 +38,10 @@ class Detection:
     class_name: str
     confidence: float
-    bbox: tuple[int, int, int, int]  # x1, y1, x2, y2
-    label: str = ""  # Human-readable label (e.g., "Speed limit 50 km/h")
-    color: tuple[int, int, int] = (0, 255, 0)  # BGR color for annotation
-    distance: Optional[float] = None  # Estimated distance in meters
     @property
     def center(self) -> tuple[int, int]:
@@ -60,9 +60,9 @@ class Detection:
 class LaneInfo:
     """Information about detected lanes."""
-    left_lane: Optional[list[tuple[int, int]]] = None  # List of points
     right_lane: Optional[list[tuple[int, int]]] = None
-    center_offset: float = 0.0  # Offset from lane center (-1 to 1)
     lane_width: Optional[int] = None

 class DetectionClass(Enum):
     """Detection class categories."""
     CAR = "car"
     TRUCK = "truck"
     BUS = "bus"
     MOTORCYCLE = "motorcycle"
     BICYCLE = "bicycle"
     PERSON = "person"
     TRAFFIC_LIGHT = "traffic_light"
     TRAFFIC_SIGN = "traffic_sign"
     class_name: str
     confidence: float
+    bbox: tuple[int, int, int, int]
+    label: str = ""
+    color: tuple[int, int, int] = (0, 255, 0)
+    distance: Optional[float] = None
     @property
     def center(self) -> tuple[int, int]:
 class LaneInfo:
     """Information about detected lanes."""
+    left_lane: Optional[list[tuple[int, int]]] = None
     right_lane: Optional[list[tuple[int, int]]] = None
+    center_offset: float = 0.0
     lane_width: Optional[int] = None

src/autonomous_vision/pipeline/annotator.py CHANGED Viewed

@@ -29,10 +29,10 @@ class FrameAnnotator:
         x1, y1, x2, y2 = detection.bbox
         color = detection.color
-        # Draw bounding box
         cv2.rectangle(frame, (x1, y1), (x2, y2), color, self.thickness)
-        # Draw corner accents
         corner_len = min(20, (x2 - x1) // 4, (y2 - y1) // 4)
         cv2.line(frame, (x1, y1), (x1 + corner_len, y1), color, self.thickness + 1)
         cv2.line(frame, (x1, y1), (x1, y1 + corner_len), color, self.thickness + 1)
@@ -43,7 +43,7 @@ class FrameAnnotator:
         cv2.line(frame, (x2, y2), (x2 - corner_len, y2), color, self.thickness + 1)
         cv2.line(frame, (x2, y2), (x2, y2 - corner_len), color, self.thickness + 1)
-        # Build label with distance if available (only for vehicles)
         label = detection.label
         display_distance = detection.distance is not None and detection.class_name in {
             "car", "truck", "bus", "motorcycle"
@@ -220,24 +220,24 @@ class FrameAnnotator:
         panel_w = 200
         panel_h = 105
-        # Dark background with better opacity
         overlay = frame.copy()
         cv2.rectangle(overlay, (panel_x, panel_y), (panel_x + panel_w, panel_y + panel_h), (30, 30, 30), -1)
         cv2.addWeighted(overlay, 0.85, frame, 0.15, 0, frame)
         cv2.rectangle(frame, (panel_x, panel_y), (panel_x + panel_w, panel_y + panel_h), (80, 80, 80), 1)
-        # Title
         cv2.putText(frame, "DETECTION STATS", (panel_x + 10, panel_y + 20), self.font, 0.5, (200, 200, 200), 1)
-        # Stats with high contrast colors
         stats = [
-            (f"Vehicles: {vehicle_count}", (255, 200, 100)),  # Light orange
-            (f"Pedestrians: {pedestrian_count}", (100, 255, 255)),  # Cyan
-            (f"Signs: {sign_count}", (255, 150, 255)),  # Light magenta
         ]
         if fps is not None:
-            stats.append((f"FPS: {fps:.1f}", (200, 200, 200)))  # Light gray
         for i, (text, color) in enumerate(stats):
             cv2.putText(

         x1, y1, x2, y2 = detection.bbox
         color = detection.color
         cv2.rectangle(frame, (x1, y1), (x2, y2), color, self.thickness)
         corner_len = min(20, (x2 - x1) // 4, (y2 - y1) // 4)
         cv2.line(frame, (x1, y1), (x1 + corner_len, y1), color, self.thickness + 1)
         cv2.line(frame, (x1, y1), (x1, y1 + corner_len), color, self.thickness + 1)
         cv2.line(frame, (x2, y2), (x2 - corner_len, y2), color, self.thickness + 1)
         cv2.line(frame, (x2, y2), (x2, y2 - corner_len), color, self.thickness + 1)
         label = detection.label
         display_distance = detection.distance is not None and detection.class_name in {
             "car", "truck", "bus", "motorcycle"
         panel_w = 200
         panel_h = 105
         overlay = frame.copy()
         cv2.rectangle(overlay, (panel_x, panel_y), (panel_x + panel_w, panel_y + panel_h), (30, 30, 30), -1)
         cv2.addWeighted(overlay, 0.85, frame, 0.15, 0, frame)
         cv2.rectangle(frame, (panel_x, panel_y), (panel_x + panel_w, panel_y + panel_h), (80, 80, 80), 1)
         cv2.putText(frame, "DETECTION STATS", (panel_x + 10, panel_y + 20), self.font, 0.5, (200, 200, 200), 1)
         stats = [
+            (f"Vehicles: {vehicle_count}", (255, 200, 100)),
+            (f"Pedestrians: {pedestrian_count}", (100, 255, 255)),
+            (f"Signs: {sign_count}", (255, 150, 255)),
         ]
         if fps is not None:
+            stats.append((f"FPS: {fps:.1f}", (200, 200, 200)))
         for i, (text, color) in enumerate(stats):
             cv2.putText(

src/autonomous_vision/pipeline/processor.py CHANGED Viewed

@@ -86,25 +86,25 @@ class VideoProcessor:
         if not self._initialized:
             self.initialize()
-        # Run YOLO detection
         detections = self.yolo_detector.detect(frame)
-        # Run traffic sign detection
         if self.traffic_sign_detector and self.traffic_sign_detector.is_available:
             sign_detections = self.traffic_sign_detector.detect(frame)
             detections.extend(sign_detections)
-        # Run lane detection
         lane_info = None
         if self.lane_detector:
             lane_info = self.lane_detector.detect(frame)
-        # Run depth estimation and calculate distances
         depth_map = None
         if self.depth_estimator and self.depth_estimator.is_available:
             depth_map = self.depth_estimator.estimate_depth(frame)
-        # Calculate distances for all detections
         if depth_map is not None:
             for detection in detections:
                 distance = self.depth_estimator.get_distance_for_bbox(
@@ -112,7 +112,7 @@ class VideoProcessor:
                 )
                 detection.distance = distance
-        # Analyze traffic lights
         traffic_light_state = TrafficLightState.UNKNOWN
         if self.traffic_light_analyzer:
             for detection in detections:
@@ -120,7 +120,7 @@ class VideoProcessor:
                     traffic_light_state = self.traffic_light_analyzer.analyze_detection(
                         frame, detection
                     )
-                    # Update detection color based on state
                     if traffic_light_state == TrafficLightState.RED:
                         detection.color = (0, 0, 255)
                         detection.label = "Sygnalizacja: Nie mozesz jechac"
@@ -130,7 +130,7 @@ class VideoProcessor:
                     elif traffic_light_state == TrafficLightState.GREEN:
                         detection.color = (0, 255, 0)
                         detection.label = "Sygnalizacja: Jedz"
-                    break  # Only analyze first traffic light
         return FrameResult(
             frame_number=0,
@@ -185,12 +185,12 @@ class VideoProcessor:
         if not self._initialized:
             self.initialize()
-        # Open input video
         cap = cv2.VideoCapture(input_path)
         if not cap.isOpened():
             raise ValueError(f"Cannot open video: {input_path}")
-        # Get video properties
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         fps = cap.get(cv2.CAP_PROP_FPS)
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
@@ -199,18 +199,18 @@ class VideoProcessor:
         if max_frames:
             total_frames = min(total_frames, max_frames)
-        # Setup output path
         if output_path is None:
             output_path = tempfile.mktemp(suffix=".mp4")
-        # Setup video writer
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
-        # Initialize analytics
         analytics = VideoAnalytics(total_frames=total_frames)
-        # Process frames
         frame_times: list[float] = []
         try:
@@ -219,31 +219,31 @@ class VideoProcessor:
                 if not ret:
                     break
-                # Time the processing
                 start_time = time.time()
-                # Process frame
                 result = self.process_frame(frame)
                 result.frame_number = frame_idx
-                # Update analytics
                 analytics.update_from_frame(result)
-                # Calculate FPS
                 process_time = time.time() - start_time
                 frame_times.append(process_time)
-                # Calculate smoothed FPS (last 30 frames)
                 recent_times = frame_times[-30:] if len(frame_times) >= 30 else frame_times
                 avg_fps = 1.0 / (sum(recent_times) / len(recent_times)) if recent_times else 0
-                # Annotate frame
                 annotated = self.annotate_frame(frame, result, fps=avg_fps)
-                # Write output
                 writer.write(annotated)
-                # Progress callback
                 if progress_callback:
                     progress_callback(frame_idx + 1, total_frames)

         if not self._initialized:
             self.initialize()
         detections = self.yolo_detector.detect(frame)
         if self.traffic_sign_detector and self.traffic_sign_detector.is_available:
             sign_detections = self.traffic_sign_detector.detect(frame)
             detections.extend(sign_detections)
         lane_info = None
         if self.lane_detector:
             lane_info = self.lane_detector.detect(frame)
         depth_map = None
         if self.depth_estimator and self.depth_estimator.is_available:
             depth_map = self.depth_estimator.estimate_depth(frame)
         if depth_map is not None:
             for detection in detections:
                 distance = self.depth_estimator.get_distance_for_bbox(
                 )
                 detection.distance = distance
         traffic_light_state = TrafficLightState.UNKNOWN
         if self.traffic_light_analyzer:
             for detection in detections:
                     traffic_light_state = self.traffic_light_analyzer.analyze_detection(
                         frame, detection
                     )
                     if traffic_light_state == TrafficLightState.RED:
                         detection.color = (0, 0, 255)
                         detection.label = "Sygnalizacja: Nie mozesz jechac"
                     elif traffic_light_state == TrafficLightState.GREEN:
                         detection.color = (0, 255, 0)
                         detection.label = "Sygnalizacja: Jedz"
+                    break
         return FrameResult(
             frame_number=0,
         if not self._initialized:
             self.initialize()
         cap = cv2.VideoCapture(input_path)
         if not cap.isOpened():
             raise ValueError(f"Cannot open video: {input_path}")
         total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
         fps = cap.get(cv2.CAP_PROP_FPS)
         width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
         if max_frames:
             total_frames = min(total_frames, max_frames)
         if output_path is None:
             output_path = tempfile.mktemp(suffix=".mp4")
         fourcc = cv2.VideoWriter_fourcc(*"mp4v")
         writer = cv2.VideoWriter(output_path, fourcc, fps, (width, height))
         analytics = VideoAnalytics(total_frames=total_frames)
         frame_times: list[float] = []
         try:
                 if not ret:
                     break
                 start_time = time.time()
                 result = self.process_frame(frame)
                 result.frame_number = frame_idx
                 analytics.update_from_frame(result)
                 process_time = time.time() - start_time
                 frame_times.append(process_time)
                 recent_times = frame_times[-30:] if len(frame_times) >= 30 else frame_times
                 avg_fps = 1.0 / (sum(recent_times) / len(recent_times)) if recent_times else 0
                 annotated = self.annotate_frame(frame, result, fps=avg_fps)
                 writer.write(annotated)
                 if progress_callback:
                     progress_callback(frame_idx + 1, total_frames)