Spaces:

mansimodi
/

Webcam-MaskRCNN

Runtime error

App Files Files Community

mansi.modi@streebo.com commited on Mar 14, 2025

Commit

3bacb10

1 Parent(s): 2c83160

init

Browse files

Files changed (3) hide show

.DS_Store +0 -0
app.py +157 -0
requirements.txt +5 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import cv2
+import numpy as np
+import torch
+import torchvision
+from torchvision.transforms import functional as F
+import time
+import random
+# Set up colors for visualization (vibrant colors for better visual impact)
+COLORS = [(0, 255, 0), (0, 0, 255), (255, 0, 0), (0, 255, 255), (255, 255, 0),
+          (255, 0, 255), (80, 70, 180), (250, 80, 190), (245, 145, 50)]
+# COCO dataset classes (only keep the ones we want to detect)
+# You can modify this list to focus only on specific objects
+CLASSES = [
+    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
+    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
+    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
+    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
+    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
+    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
+    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
+    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
+    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
+    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
+]
+# Define our target classes (simplified for a more focused demo)
+TARGET_CLASSES = ['person', 'bottle', 'cell phone', 'cup', 'laptop', 'chair']
+def get_prediction(img, threshold=0.7):
+    """Get model predictions and filter based on confidence threshold and target classes"""
+    transform = F.to_tensor(img)
+    prediction = model([transform])
+    # Filter predictions by confidence and target classes
+    masks = []
+    boxes = []
+    labels = []
+    scores = []
+    pred_classes = [CLASSES[i] for i in prediction[0]['labels']]
+    pred_masks = prediction[0]['masks'].detach().cpu().numpy()
+    pred_boxes = prediction[0]['boxes'].detach().cpu().numpy()
+    pred_scores = prediction[0]['scores'].detach().cpu().numpy()
+    for i, score in enumerate(pred_scores):
+        if score > threshold and pred_classes[i] in TARGET_CLASSES:
+            masks.append(pred_masks[i][0])
+            boxes.append(pred_boxes[i])
+            labels.append(pred_classes[i])
+            scores.append(score)
+    return masks, boxes, labels, scores
+def random_color():
+    """Generate a random vibrant color for better visual impact"""
+    return COLORS[random.randint(0, len(COLORS)-1)]
+def visualize(img, masks, boxes, labels, scores):
+    """Apply visually appealing overlays to the image"""
+    height, width = img.shape[:2]
+    alpha = 0.5  # Transparency factor for mask overlay
+    for i, (mask, box, label, score) in enumerate(zip(masks, boxes, labels, scores)):
+        color = COLORS[i % len(COLORS)]
+        # Apply color mask with transparency
+        mask_bin = (mask > 0.5).astype(np.uint8)
+        colored_mask = np.zeros((height, width, 3), dtype=np.uint8)
+        colored_mask[mask_bin == 1] = color
+        img = cv2.addWeighted(img, 1, colored_mask, alpha, 0)
+        # Draw bounding box with fancy parameters
+        x1, y1, x2, y2 = box.astype(int)
+        thickness = max(1, int(score * 3))  # Thicker lines for higher confidence
+        cv2.rectangle(img, (x1, y1), (x2, y2), color, thickness)
+        # Add fancy label with confidence
+        label_text = f"{label}: {score:.2f}"
+        label_size, baseline = cv2.getTextSize(label_text, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 2)
+        y1 = max(y1, label_size[1])
+        # Draw label background
+        cv2.rectangle(img, (x1, y1 - label_size[1] - 10), (x1 + label_size[0], y1), color, -1)
+        # Draw label text in white
+        cv2.putText(img, label_text, (x1, y1 - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+        # Add effect: Draw contour of mask
+        contours, _ = cv2.findContours(mask_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
+        cv2.drawContours(img, contours, -1, color, 2)
+    # Add a fancy title
+    cv2.putText(img, "Mask R-CNN Detection", (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 255, 255), 2)
+    # Add detected class counts
+    class_counts = {}
+    for label in labels:
+        class_counts[label] = class_counts.get(label, 0) + 1
+    y_pos = 60
+    for cls, count in class_counts.items():
+        text = f"{cls}: {count}"
+        cv2.putText(img, text, (10, y_pos), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
+        y_pos += 25
+    return img
+# Load a pre-trained Mask R-CNN model
+print("Loading Mask R-CNN model...")
+model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
+model.eval()
+if torch.cuda.is_available():
+    model.cuda()
+    print("Using GPU for inference")
+else:
+    print("Using CPU for inference")
+# Start video capture
+print("Starting webcam...")
+cap = cv2.VideoCapture(0)  # Use 0 for default webcam
+# Set properties for better quality
+cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
+cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)
+print("Press 'q' to quit")
+while True:
+    ret, frame = cap.read()
+    if not ret:
+        break
+    # FPS calculation removed
+    # Get predictions
+    with torch.no_grad():
+        masks, boxes, labels, scores = get_prediction(frame)
+    # Visualize results
+    result = visualize(frame, masks, boxes, labels, scores)
+    # Show the result
+    cv2.imshow('Mask R-CNN Real-time Object Detection', result)
+    # Break the loop if 'q' is pressed
+    if cv2.waitKey(30) & 0xFF == ord('q'):
+        break
+# Release resources
+cap.release()
+cv2.destroyAllWindows()
+print("Demo finished!")

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+opencv-python
+numpy
+torch
+torchvision
+gradio