Spaces:

ZihanWang314
/

TSTAR

Runtime error

File size: 10,748 Bytes

d686824


import os
import cv2
import os.path as osp
from mmengine.config import Config
from mmengine.dataset import Compose
from mmdet.apis import init_detector
from mmdet.utils import get_test_pipeline_cfg
# from mmengine.runner.amp import autocast
from torch.amp import autocast
import torch
import supervision as sv
from typing import Dict, Optional, Sequence, List

import supervision as sv
class LabelAnnotator(sv.LabelAnnotator):

    @staticmethod
    def resolve_text_background_xyxy(
        center_coordinates,
        text_wh,
        position,
    ):
        center_x, center_y = center_coordinates
        text_w, text_h = text_wh
        return center_x, center_y, center_x + text_w, center_y + text_h


class YoloInterface:
    def __init__(self):
        """
        Initialize the YOLO-World model with the given configuration and checkpoint.

        Args:
        """
        
     
        pass
    def set_BBoxAnnotator(self):
        self.BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=1)
        # MASK_ANNOTATOR = sv.MaskAnnotator()
        self.LABEL_ANNOTATOR = LabelAnnotator(text_padding=4,
                                        text_scale=0.5,
                                        text_thickness=1)

class YoloWorldInterface(YoloInterface):
    def __init__(self, config_path: str, checkpoint_path: str, device: str = "cuda:0"):
        """
        Initialize the YOLO-World model with the given configuration and checkpoint.

        Args:
            config_path (str): Path to the model configuration file.
            checkpoint_path (str): Path to the model checkpoint.
            device (str): Device to run the model on (e.g., 'cuda:0', 'cpu').
        """
        self.config_path = config_path
        self.checkpoint_path = checkpoint_path
        self.device = device

        # Load configuration
        cfg = Config.fromfile(config_path)
        cfg.work_dir = osp.join('./work_dirs', osp.splitext(osp.basename(config_path))[0])
        cfg.load_from = checkpoint_path

        # Initialize the model
        self.model = init_detector(cfg, checkpoint=checkpoint_path, device=device)
        self.set_BBoxAnnotator()

        # Initialize the test pipeline
        # build test pipeline
        self.model.cfg.test_dataloader.dataset.pipeline[
            0].type = 'mmdet.LoadImageFromNDArray'
        self.test_pipeline = Compose(self.model.cfg.test_dataloader.dataset.pipeline)

        

    def reparameterize_object_list(self, target_objects: List[str], cue_objects: List[str]):
        """
        Reparameterize the detect object list to be used by the YOLO model.

        Args:
            target_objects (List[str]): List of target object names.
            cue_objects (List[str]): List of cue object names.
        """
        # Combine target objects and cue objects into the final text format
        combined_texts = target_objects + cue_objects

        # Format the text prompts for the YOLO model
        self.texts = [[obj.strip()] for obj in combined_texts] + [[' ']]

        # Reparameterize the YOLO model with the provided text prompts
        self.model.reparameterize(self.texts)


    def inference(self, image: str, max_dets: int = 100, score_threshold: float = 0.3, use_amp: bool = False):
        """
        Run inference on a single image.

        Args:
            image (str): Path to the image.
            max_dets (int): Maximum number of detections to keep.
            score_threshold (float): Score threshold for filtering detections.
            use_amp (bool): Whether to use mixed precision for inference.

        Returns:
            sv.Detections: Detection results.
        """
        # Prepare data for inference
        data_info = dict(img_id=0, img_path=image, texts=self.texts)
        data_info = self.test_pipeline(data_info)
        data_batch = dict(inputs=data_info['inputs'].unsqueeze(0),
                          data_samples=[data_info['data_samples']])

        # Run inference
        with autocast(enabled=use_amp), torch.no_grad():
            output = self.model.test_step(data_batch)[0]
            pred_instances = output.pred_instances
            pred_instances = pred_instances[pred_instances.scores.float() > score_threshold]

        if len(pred_instances.scores) > max_dets:
            indices = pred_instances.scores.float().topk(max_dets)[1]
            pred_instances = pred_instances[indices]

        pred_instances = pred_instances.cpu().numpy()

        # Process detections
        detections = sv.Detections(
            xyxy=pred_instances['bboxes'],
            class_id=pred_instances['labels'],
            confidence=pred_instances['scores'],
            mask=pred_instances.get('masks', None)
        )
        return detections
    
    def inference_detector(self, images, max_dets=50, score_threshold=0.2, use_amp: bool = False):
        data_info = dict(img_id=0, img=images[0], texts=self.texts) #TBD for batch searching
        data_info = self.test_pipeline(data_info)
        data_batch = dict(inputs=data_info['inputs'].unsqueeze(0),
                        data_samples=[data_info['data_samples']])
        detections_inbatch = []
        with torch.no_grad():
            outputs = self.model.test_step(data_batch)
            # cover to searcher interface format
            
            for output in outputs:
                pred_instances = output.pred_instances
                pred_instances = pred_instances[pred_instances.scores.float() >
                                                score_threshold]
                if len(pred_instances.scores) > max_dets:
                    indices = pred_instances.scores.float().topk(max_dets)[1]
                    pred_instances = pred_instances[indices]

                output.pred_instances = pred_instances

                if 'masks' in pred_instances:
                    masks = pred_instances['masks']
                else:
                    masks = None
                pred_instances = pred_instances.cpu().numpy()
                detections = sv.Detections(xyxy=pred_instances['bboxes'],
                    class_id=pred_instances['labels'],
                    confidence=pred_instances['scores'],
                    mask=masks)
                detections_inbatch.append(detections)
        self.detect_outputs_raw = outputs
        self.detections_inbatch = detections_inbatch
        return detections_inbatch

    def bbox_visualization(self, images, detections_inbatch):
        anno_images = []
        # detections_inbatch = self.detections_inbatch
        for b, detections in enumerate(detections_inbatch):
            texts = self.texts
            labels = [
                f"{texts[class_id][0]} {confidence:0.2f}" for class_id, confidence in
                zip(detections.class_id, detections.confidence)
            ]

        
            index = len(detections_inbatch) -1 
            image = images[index]
            anno_image = image.copy()
  
    
            anno_image = self.BOUNDING_BOX_ANNOTATOR.annotate(anno_image, detections)
            anno_image = self.LABEL_ANNOTATOR.annotate(anno_image, detections, labels=labels)
            anno_images.append(anno_image)
        
        return anno_images



import torch
from typing import List
import supervision as sv  # 确保已安装 Supervision 库
import os.path as osp

class YoloV5Interface(YoloInterface):
    def __init__(self,config_path="ultralytics/yolov5", checkpoint_path: str = 'yolov5s', device: str = 'cuda:0'):
        """
        初始化 YOLOv5 模型。

        Args:
            model_name (str): YOLOv5 模型变体名称（如 'yolov5s', 'yolov5m', 'yolov5l', 'yolov5x'）。
            device (str): 运行模型的设备（如 'cuda:0', 'cpu'）。
        """
        self.device = device
        self.model = torch.hub.load("ultralytics/yolov5", "yolov5s", pretrained=True)
        
        self.model.to(self.device)
        self.model.eval()
        self.target_classes = None  # 用于存储目标类别列表

        self.texts = None
        self.test_pipeline = None

    def reparameterize_object_list(self, target_objects: List[str], cue_objects: List[str]):
        """
        重新参数化检测对象列表，以便在推理时使用。

        Args:
            target_objects (List[str]): 目标对象名称列表。
            cue_objects (List[str]): 线索对象名称列表。
        """
        # 合并目标对象和线索对象
        combined_objects = target_objects + cue_objects
        self.target_classes = combined_objects

    def inference(self, images: str, max_dets: int = 100, score_threshold: float = 0.3, use_amp: bool = False):
        """
        对单张图像运行推理。

        Args:
            image (str): 图像路径。
            max_dets (int): 保留的最大检测数量。
            score_threshold (float): 过滤检测的分数阈值。
            use_amp (bool): 是否使用混合精度进行推理。

        Returns:
            sv.Detections: 检测结果。
        """
        results = self.model(images, size=640)  # 可以根据需要调整输入尺寸

        # 提取检测结果（假设批量大小为 1）
        detections_batch = results.pred  # B tensors of shape (N, 6) [x1, y1, x2, y2, confidence, class]

        # 应用分数阈值
        # 用于存储每个批次过滤后的检测结果
        filtered_detections = []

        for detections in detections_batch:
            # 应用分数阈值，过滤掉 confidence <= score_threshold 的检测
            detections = detections[detections[:, 4] > score_threshold]
            # 如果设置了 topk，截取前 topk 个检测
            if len(detections) > max_dets:
                detections = detections[:max_dets]
            # 如果设置了目标类别，过滤检测结果
            if self.target_classes is not None:
                # 获取所有类别名称
                class_names = self.model.names
                # 获取目标类别的类别ID
                target_class_ids = [i for i, name in class_names.items() if name in self.target_classes]

                
                # 过滤检测结果
                detections = detections[[cls in target_class_ids for cls in detections[:, 5]]]
                # 转换为 Supervision 库的 Detections 对象
                detections = sv.Detections(
                    xyxy=detections[:, :4].cpu().numpy(),
                    confidence=detections[:, 4].cpu().numpy(),
                    class_id=detections[:, 5].cpu().numpy().astype(int)
                )
                

                filtered_detections.append(detections)

        return filtered_detections