File size: 1,420 Bytes
902525d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | import cv2
import torch
from transformers import AutoModelForObjectDetection, AutoTokenizer
# Load the model and tokenizer
model_name = "facebook/detectron2_resnet50"
model = AutoModelForObjectDetection.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Set up the camera
cap = cv2.VideoCapture(0)
if not cap.isOpened():
print("Failed to open camera.")
exit()
# Main loop for object detection
while True:
ret, frame = cap.read()
if not ret:
print("Failed to capture frame.")
break
# Preprocess the image
image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
inputs = tokenizer(image, return_tensors="pt")
# Perform object detection
outputs = model(**inputs)
predicted_boxes = outputs.pred_boxes[0].tensor.cpu().detach().numpy()
predicted_labels = outputs.pred_classes[0].cpu().detach().numpy()
# Visualize the predictions
for box, label in zip(predicted_boxes, predicted_labels):
x1, y1, x2, y2 = box.astype(int)
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, str(label), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
# Display the frame
cv2.imshow("Object Detection", frame)
# Exit on 'q' key press
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the camera and close the window
cap.release()
cv2.destroyAllWindows()
|