| | import cv2 |
| | import torch |
| | from transformers import AutoModelForObjectDetection, AutoTokenizer |
| |
|
| | |
| | model_name = "facebook/detectron2_resnet50" |
| | model = AutoModelForObjectDetection.from_pretrained(model_name) |
| | tokenizer = AutoTokenizer.from_pretrained(model_name) |
| |
|
| | |
| | cap = cv2.VideoCapture(0) |
| | if not cap.isOpened(): |
| | print("Failed to open camera.") |
| | exit() |
| |
|
| | |
| | while True: |
| | ret, frame = cap.read() |
| | if not ret: |
| | print("Failed to capture frame.") |
| | break |
| |
|
| | |
| | image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) |
| | inputs = tokenizer(image, return_tensors="pt") |
| |
|
| | |
| | outputs = model(**inputs) |
| | predicted_boxes = outputs.pred_boxes[0].tensor.cpu().detach().numpy() |
| | predicted_labels = outputs.pred_classes[0].cpu().detach().numpy() |
| |
|
| | |
| | for box, label in zip(predicted_boxes, predicted_labels): |
| | x1, y1, x2, y2 = box.astype(int) |
| | cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2) |
| | cv2.putText(frame, str(label), (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2) |
| |
|
| | |
| | cv2.imshow("Object Detection", frame) |
| |
|
| | |
| | if cv2.waitKey(1) & 0xFF == ord('q'): |
| | break |
| |
|
| | |
| | cap.release() |
| | cv2.destroyAllWindows() |
| |
|