Thastp commited on
Commit
d21fe9a
·
verified ·
1 Parent(s): b824ae9

Upload processor

Browse files
image_processing_rf_detr.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Tuple, Optional, Literal
2
+
3
+ import torch
4
+ from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
5
+ from torchvision.transforms import ToTensor, Normalize
6
+ from rfdetr.util.misc import nested_tensor_from_tensor_list
7
+ from rfdetr.models.lwdetr import PostProcess
8
+
9
+
10
+ class RFDetrImageProcessor(BaseImageProcessor):
11
+ model_input_names = ["pixel_values", "pixel_mask"]
12
+
13
+ def __init__(
14
+ self,
15
+ model_name: Literal['RFDETRBase, RFDETRLarge']='RFDETRBase',
16
+ num_select: int=300,
17
+ image_mean: List[int]=[0.485, 0.456, 0.406],
18
+ image_std: List[int]=[0.229, 0.224, 0.225],
19
+ **kwargs
20
+ ):
21
+ super().__init__(**kwargs)
22
+ self.model_name = model_name
23
+ self.config = {
24
+ 'image_mean': image_mean,
25
+ 'image_std': image_std,
26
+ }
27
+ self.post_process_config = {
28
+ 'num_select': num_select,
29
+ }
30
+
31
+ def post_process_object_detection(
32
+ self,
33
+ outputs,
34
+ target_sizes: List[Tuple],
35
+ **kwargs
36
+ ) -> List[Dict[str, torch.Tensor]]:
37
+ """
38
+ Parameters
39
+ ----------
40
+ outputs:
41
+ outputs from model loaded with AutoModelForObjectDetection or ONNX model
42
+ target_sizes: list[tuple]
43
+ original sizes of the images.
44
+ """
45
+ if isinstance(outputs, list): ### Handle ONNX outputs
46
+ logits = torch.tensor(outputs[0])
47
+ pred_boxes = torch.tensor(outputs[1])
48
+ else:
49
+ logits = outputs.logits
50
+ pred_boxes = outputs.pred_boxes
51
+
52
+ outputs = {
53
+ 'pred_logits': logits,
54
+ 'pred_boxes': pred_boxes,
55
+ }
56
+
57
+ # using rfdetr's postprocess class
58
+ post_process = PostProcess(self.post_process_config['num_select'])
59
+ detections = post_process(
60
+ outputs,
61
+ target_sizes=target_sizes,
62
+ )
63
+
64
+ return detections
65
+
66
+ def convert_and_validate_boxes(self, annotations, images):
67
+ for ann, img in zip(annotations, images):
68
+ # convert from COCO format [x_min, y_min, width, height] to [cx, cy, w, h]
69
+ boxes = ann["boxes"].to(torch.float32)
70
+ boxes[:, [0,1]] += boxes[:, [2,3]] / 2
71
+ ann["boxes"] = boxes
72
+
73
+ torch._assert(isinstance(boxes, torch.Tensor), "Expected target boxes to be of type Tensor.")
74
+ torch._assert(
75
+ len(boxes.shape) == 2 and boxes.shape[-1] == 4,
76
+ "Expected target boxes to be a tensor of shape [N, 4].",
77
+ )
78
+ for box in boxes:
79
+ torch._assert(
80
+ box[2]/2 <= box[0] <= img.shape[2] - box[2]/2 and box[3]/2 <= box[1] <= img.shape[1] - box[3]/2,
81
+ "Expected w/2 <= x1 <= W - w/2 and h/2 <= cy <= H - h/2.",
82
+ )
83
+
84
+ def preprocess(
85
+ self,
86
+ images,
87
+ annotations=None,
88
+ ) -> BatchFeature:
89
+ """
90
+ Parameters
91
+ ----------
92
+ images: List[PIL.Image.Image]
93
+ a single or a list of PIL images
94
+ annotations: Optional[List[Dict[str, torch.Tensor | List]]]
95
+ List of annotations associated with the image or batch of images. If annotation is for object
96
+ detection, the annotations should be a dictionary with the following keys:
97
+ - boxes (FloatTensor[N, 4]): the ground-truth boxes COCO format [x_min, y_min, width, height]
98
+ - class_labels (Int64Tensor[N]): the class label for each ground-truth box
99
+ """
100
+ totensor = ToTensor()
101
+ normalize = Normalize(mean=self.config['image_mean'], std=self.config['image_std'])
102
+
103
+ if images is not None and not isinstance(images, list):
104
+ images = list(images)
105
+ if not isinstance(images[0], torch.Tensor):
106
+ images = [totensor(img) for img in images]
107
+ if annotations is not None:
108
+ self.convert_and_validate_boxes(annotations, images)
109
+
110
+ # get the original image sizes
111
+ original_image_sizes: List[Tuple[int, int]] = []
112
+ for img in images:
113
+ val = img.shape[-2:]
114
+ torch._assert(
115
+ len(val) == 2,
116
+ f"expecting the last two dimensions of the Tensor to be H and W instead got {img.shape[-2:]}",
117
+ )
118
+ original_image_sizes.append((val[0], val[1]))
119
+ target_sizes = torch.tensor(original_image_sizes)
120
+
121
+ # transform the input
122
+ # normalize image
123
+ images = [normalize(img) for img in images]
124
+ # pad the list of images to make a tensor of size [B, C, H, W] and [B, H, W]
125
+ nested_tensor = nested_tensor_from_tensor_list(images)
126
+
127
+ data = {
128
+ 'pixel_values': nested_tensor.tensors,
129
+ 'pixel_mask': nested_tensor.mask,
130
+ 'target_sizes': target_sizes,
131
+ 'labels': annotations
132
+ }
133
+ return BatchFeature(data=data)
134
+
135
+
136
+ __all__ = [
137
+ "RFDetrImageProcessor"
138
+ ]
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "image_processing_rf_detr.RFDetrImageProcessor"
4
+ },
5
+ "config": {
6
+ "image_mean": [
7
+ 0.485,
8
+ 0.456,
9
+ 0.406
10
+ ],
11
+ "image_std": [
12
+ 0.229,
13
+ 0.224,
14
+ 0.225
15
+ ]
16
+ },
17
+ "image_processor_type": "RFDetrImageProcessor",
18
+ "model_name": "RFDETRBase",
19
+ "post_process_config": {
20
+ "num_select": 300
21
+ }
22
+ }