from autodistill_gpt_4v import GPT4V from autodistill.detection import CaptionOntology from autodistill_grounded_sam import GroundedSAM import supervision as sv from autodistill.core.custom_detection_model import CustomDetectionModel import cv2 classes = ["mercedes", "toyota"] SAMGPT = CustomDetectionModel( detection_model=GroundedSAM( CaptionOntology({"car": "car"}) ), classification_model=GPT4V( CaptionOntology({k: k for k in classes}) ) ) IMAGE = "mercedes.jpeg" results = SAMGPT.predict(IMAGE) image = cv2.imread(IMAGE) annotator = sv.MaskAnnotator() label_annotator = sv.LabelAnnotator() labels = [ f"{classes[class_id]} {confidence:0.2f}" for _, _, confidence, class_id, _ in results ] annotated_frame = annotator.annotate( scene=image.copy(), detections=results ) annotated_frame = label_annotator.annotate( scene=annotated_frame, labels=labels, detections=results ) sv.plot_image(annotated_frame, size=(8, 8))