tuandunghcmut commited on
Commit
3fc605e
·
verified ·
1 Parent(s): 4bb09e0

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. groundingLMM/eval/gcg/evaluate.py +277 -0
  2. groundingLMM/eval/gcg/infer.py +184 -0
  3. groundingLMM/eval/gcg/run_evaluation.sh +29 -0
  4. groundingLMM/eval/referring_seg/infer_and_evaluate.py +372 -0
  5. groundingLMM/eval/referring_seg/run_evaluation.sh +31 -0
  6. groundingLMM/eval/region_captioning/run_evaluation_RefCOCOg.sh +28 -0
  7. groundingLMM/mmcv/.circleci/config.yml +175 -0
  8. groundingLMM/mmcv/.dev_scripts/check_installation.py +44 -0
  9. groundingLMM/mmcv/.github/ISSUE_TEMPLATE/config.yml +9 -0
  10. groundingLMM/mmcv/.github/ISSUE_TEMPLATE/feature_request.md +22 -0
  11. groundingLMM/mmcv/.github/ISSUE_TEMPLATE/general_questions.md +13 -0
  12. groundingLMM/mmcv/.github/ISSUE_TEMPLATE/unexpected_report.md +46 -0
  13. groundingLMM/mmcv/.github/pull_request_template.md +33 -0
  14. groundingLMM/mmcv/.github/workflows/build.yml +362 -0
  15. groundingLMM/mmcv/.github/workflows/build_pat.yml +26 -0
  16. groundingLMM/mmcv/.github/workflows/lint.yml +33 -0
  17. groundingLMM/mmcv/.github/workflows/publish-to-pypi.yml +46 -0
  18. groundingLMM/mmcv/examples/train.py +84 -0
  19. groundingLMM/mmcv/mmcv.egg-info/PKG-INFO +23 -0
  20. groundingLMM/mmcv/mmcv.egg-info/SOURCES.txt +877 -0
  21. groundingLMM/mmcv/mmcv.egg-info/dependency_links.txt +1 -0
  22. groundingLMM/mmcv/mmcv.egg-info/not-zip-safe +1 -0
  23. groundingLMM/mmcv/mmcv.egg-info/requires.txt +48 -0
  24. groundingLMM/mmcv/mmcv.egg-info/top_level.txt +1 -0
  25. groundingLMM/mmcv/mmcv/__init__.py +15 -0
  26. groundingLMM/mmcv/mmcv/arraymisc/__init__.py +4 -0
  27. groundingLMM/mmcv/mmcv/arraymisc/quantization.py +55 -0
  28. groundingLMM/mmcv/mmcv/fileio/__init__.py +11 -0
  29. groundingLMM/mmcv/mmcv/fileio/file_client.py +1149 -0
  30. groundingLMM/mmcv/mmcv/fileio/io.py +151 -0
  31. groundingLMM/mmcv/mmcv/fileio/parse.py +97 -0
  32. groundingLMM/mmcv/mmcv/image/__init__.py +28 -0
  33. groundingLMM/mmcv/mmcv/image/colorspace.py +306 -0
  34. groundingLMM/mmcv/mmcv/image/geometric.py +728 -0
  35. groundingLMM/mmcv/mmcv/image/io.py +314 -0
  36. groundingLMM/mmcv/mmcv/image/misc.py +53 -0
  37. groundingLMM/mmcv/mmcv/image/photometric.py +428 -0
  38. groundingLMM/mmcv/mmcv/model_zoo/deprecated.json +6 -0
  39. groundingLMM/mmcv/mmcv/model_zoo/mmcls.json +59 -0
  40. groundingLMM/mmcv/mmcv/model_zoo/open_mmlab.json +50 -0
  41. groundingLMM/mmcv/mmcv/onnx/__init__.py +5 -0
  42. groundingLMM/mmcv/mmcv/onnx/info.py +35 -0
  43. groundingLMM/mmcv/mmcv/onnx/symbolic.py +509 -0
  44. groundingLMM/mmcv/mmcv/parallel/__init__.py +13 -0
  45. groundingLMM/mmcv/mmcv/parallel/_functions.py +76 -0
  46. groundingLMM/mmcv/mmcv/parallel/collate.py +84 -0
  47. groundingLMM/mmcv/mmcv/parallel/data_container.py +89 -0
  48. groundingLMM/mmcv/mmcv/parallel/data_parallel.py +97 -0
  49. groundingLMM/mmcv/mmcv/parallel/distributed.py +112 -0
  50. groundingLMM/mmcv/mmcv/parallel/distributed_deprecated.py +70 -0
groundingLMM/eval/gcg/evaluate.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import argparse
4
+ from tqdm import tqdm
5
+ from pycocotools.coco import COCO
6
+ from pycocotools.cocoeval import COCOeval
7
+ from pycocotools import mask as maskUtils
8
+ from pycocoevalcap.eval import COCOEvalCap
9
+ from transformers import AutoTokenizer, AutoModel
10
+ from sklearn.metrics.pairwise import cosine_similarity
11
+
12
+ from eval.utils import *
13
+
14
+
15
+ def parse_args():
16
+ parser = argparse.ArgumentParser(description="Training")
17
+
18
+ parser.add_argument("--split", required=True, help="Evaluation split, options are 'val', 'test'")
19
+ parser.add_argument("--prediction_dir_path", required=True, help="The path where the inference results are stored.")
20
+ parser.add_argument("--gt_dir_path", required=False, default="./data/GranDf/annotations/val_test",
21
+ help="The path containing GranD-f evaluation annotations.")
22
+
23
+ args = parser.parse_args()
24
+
25
+ return args
26
+
27
+
28
+ # Load pre-trained model tokenizer and model for evaluation
29
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
30
+ model = AutoModel.from_pretrained("bert-base-uncased")
31
+
32
+
33
+ def get_bert_embedding(text):
34
+ inputs = tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
35
+ outputs = model(**inputs)
36
+ # Use the mean of the last hidden states as sentence embedding
37
+ sentence_embedding = torch.mean(outputs.last_hidden_state[0], dim=0).detach().numpy()
38
+
39
+ return sentence_embedding
40
+
41
+
42
+ def compute_miou(pred_masks, gt_masks):
43
+ # Computing mIoU between predicted masks and ground truth masks
44
+ iou_matrix = np.zeros((len(pred_masks), len(gt_masks)))
45
+ for i, pred_mask in enumerate(pred_masks):
46
+ for j, gt_mask in enumerate(gt_masks):
47
+ iou_matrix[i, j] = compute_iou(pred_mask, gt_mask)
48
+
49
+ # One-to-one pairing and mean IoU calculation
50
+ paired_iou = []
51
+ while iou_matrix.size > 0 and np.max(iou_matrix) > 0:
52
+ max_iou_idx = np.unravel_index(np.argmax(iou_matrix, axis=None), iou_matrix.shape)
53
+ paired_iou.append(iou_matrix[max_iou_idx])
54
+ iou_matrix = np.delete(iou_matrix, max_iou_idx[0], axis=0)
55
+ iou_matrix = np.delete(iou_matrix, max_iou_idx[1], axis=1)
56
+
57
+ return np.mean(paired_iou) if paired_iou else 0.0
58
+
59
+
60
+ def evaluate_mask_miou(coco_gt, image_ids, pred_save_path):
61
+ # Load predictions
62
+ coco_dt = coco_gt.loadRes(pred_save_path)
63
+
64
+ mious = []
65
+ for image_id in tqdm(image_ids):
66
+ # Getting ground truth masks
67
+ matching_anns = [ann for ann in coco_gt.anns.values() if ann['image_id'] == image_id]
68
+ ann_ids = [ann['id'] for ann in matching_anns]
69
+
70
+ gt_anns = coco_gt.loadAnns(ann_ids)
71
+ gt_masks = [maskUtils.decode(ann['segmentation']) for ann in gt_anns if 'segmentation' in ann]
72
+
73
+ # Getting predicted masks
74
+ matching_anns = [ann for ann in coco_dt.anns.values() if ann['image_id'] == image_id]
75
+ dt_ann_ids = [ann['id'] for ann in matching_anns]
76
+ pred_anns = coco_dt.loadAnns(dt_ann_ids)
77
+ pred_masks = [maskUtils.decode(ann['segmentation']) for ann in pred_anns if 'segmentation' in ann]
78
+
79
+ # Compute and save the mIoU for the current image
80
+ mious.append(compute_miou(pred_masks, gt_masks))
81
+
82
+ # Report mean IoU across all images
83
+ mean_miou = np.mean(mious) if mious else 0.0 # If list is empty, return 0.0
84
+
85
+ print(f"Mean IoU (mIoU) across all images: {mean_miou:.3f}")
86
+
87
+
88
+ def compute_iou_matrix(pred_masks, gt_masks):
89
+ iou_matrix = np.zeros((len(pred_masks), len(gt_masks)))
90
+ for i, pred_mask in enumerate(pred_masks):
91
+ for j, gt_mask in enumerate(gt_masks):
92
+ iou_matrix[i, j] = compute_iou(pred_mask, gt_mask)
93
+
94
+ return iou_matrix
95
+
96
+
97
+ def text_similarity_bert(str1, str2):
98
+ emb1 = get_bert_embedding(str1)
99
+ emb2 = get_bert_embedding(str2)
100
+
101
+ return cosine_similarity([emb1], [emb2])[0, 0]
102
+
103
+
104
+ def find_best_matches(gt_anns, gt_labels, dt_anns, dt_labels, iou_threshold, text_sim_threshold, vectorizer=None):
105
+ best_matches = []
106
+
107
+ # Compute pair - wise IoU
108
+ pred_masks = [maskUtils.decode(ann['segmentation']) for ann in dt_anns]
109
+ gt_masks = [maskUtils.decode(ann['segmentation']) for ann in gt_anns]
110
+ ious = compute_iou_matrix(gt_masks, pred_masks)
111
+
112
+ text_sims = np.zeros((len(gt_labels), len(dt_labels)))
113
+
114
+ for i, gt_label in enumerate(gt_labels):
115
+ for j, dt_label in enumerate(dt_labels):
116
+ text_sims[i, j] = text_similarity_bert(gt_label, dt_label)
117
+
118
+ # Find one-to-one matches satisfying both IoU and text similarity thresholds
119
+ while ious.size > 0:
120
+ max_iou_idx = np.unravel_index(np.argmax(ious), ious.shape)
121
+ if ious[max_iou_idx] < iou_threshold or text_sims[max_iou_idx] < text_sim_threshold:
122
+ break # No admissible pair found
123
+
124
+ best_matches.append(max_iou_idx)
125
+
126
+ # Remove selected annotations from consideration
127
+ ious[max_iou_idx[0], :] = 0
128
+ ious[:, max_iou_idx[1]] = 0
129
+ text_sims[max_iou_idx[0], :] = 0
130
+ text_sims[:, max_iou_idx[1]] = 0
131
+
132
+ return best_matches # List of index pairs [(gt_idx, dt_idx), ...]
133
+
134
+
135
+ def evaluate_recall_with_mapping(coco_gt, coco_cap_gt, image_ids, pred_save_path, cap_pred_save_path, iou_threshold=0.5,
136
+ text_sim_threshold=0.5):
137
+ coco_dt = coco_gt.loadRes(pred_save_path)
138
+ coco_cap_dt = coco_cap_gt.loadRes(cap_pred_save_path)
139
+
140
+ true_positives = 0
141
+ actual_positives = 0
142
+
143
+ for image_id in tqdm(image_ids):
144
+ try:
145
+ # gt_ann_ids = coco_gt.getAnnIds(imgIds=image_id, iscrowd=None)
146
+ matching_anns = [ann for ann in coco_gt.anns.values() if ann['image_id'] == image_id]
147
+ gt_ann_ids = [ann['id'] for ann in matching_anns]
148
+ gt_anns = coco_gt.loadAnns(gt_ann_ids)
149
+
150
+ # dt_ann_ids = coco_dt.getAnnIds(imgIds=image_id, iscrowd=None)
151
+ matching_anns = [ann for ann in coco_dt.anns.values() if ann['image_id'] == image_id]
152
+ dt_ann_ids = [ann['id'] for ann in matching_anns]
153
+ dt_anns = coco_dt.loadAnns(dt_ann_ids)
154
+
155
+ # gt_cap_ann_ids = coco_cap_gt.getAnnIds(imgIds=image_id)
156
+ matching_anns = [ann for ann in coco_cap_gt.anns.values() if ann['image_id'] == image_id]
157
+ gt_cap_ann_ids = [ann['id'] for ann in matching_anns]
158
+ gt_cap_ann = coco_cap_gt.loadAnns(gt_cap_ann_ids)[0]
159
+
160
+ # dt_cap_ann_ids = coco_cap_dt.getAnnIds(imgIds=image_id)
161
+ matching_anns = [ann for ann in coco_cap_dt.anns.values() if ann['image_id'] == image_id]
162
+ dt_cap_ann_ids = [ann['id'] for ann in matching_anns]
163
+ dt_cap_ann = coco_cap_dt.loadAnns(dt_cap_ann_ids)[0]
164
+
165
+ gt_labels = gt_cap_ann['labels']
166
+ dt_labels = dt_cap_ann['labels']
167
+
168
+ actual_positives += len(gt_labels)
169
+
170
+ # Find best matching pairs
171
+ best_matches = find_best_matches(gt_anns, gt_labels, dt_anns, dt_labels, iou_threshold, text_sim_threshold)
172
+
173
+ true_positives += len(best_matches)
174
+ except Exception as e:
175
+ print(e)
176
+
177
+ recall = true_positives / actual_positives if actual_positives > 0 else 0
178
+
179
+ print(f"Recall: {recall:.3f}")
180
+
181
+
182
+ def main():
183
+ args = parse_args()
184
+
185
+ # Set the correct split
186
+ split = args.split
187
+ assert split == "val" or split == "test" # GCG Evaluation has only val and test splits
188
+ gt_mask_path = f"{args.gt_dir_path}/{split}_gcg_coco_mask_gt.json"
189
+ gt_cap_path = f"{args.gt_dir_path}/{split}_gcg_coco_caption_gt.json"
190
+
191
+ print(f"Starting evalution on {split} split.")
192
+
193
+ # Get the image names of the split
194
+ all_images_ids = []
195
+ with open(gt_cap_path, 'r') as f:
196
+ contents = json.load(f)
197
+ for image in contents['images']:
198
+ all_images_ids.append(image['id'])
199
+
200
+ # The directory is used to store intermediate files
201
+ tmp_dir_path = f"tmp/{os.path.basename(args.prediction_dir_path)}_{split}"
202
+ os.makedirs(tmp_dir_path, exist_ok=True) # Create directory if not exists already
203
+
204
+ # Create predictions
205
+ pred_save_path = f"{tmp_dir_path}/mask_pred_tmp_save.json"
206
+ cap_pred_save_path = f"{tmp_dir_path}/cap_pred_tmp_save.json"
207
+ coco_pred_file = []
208
+ caption_pred_dict = {}
209
+ for image_id in all_images_ids:
210
+ prediction_path = f"{args.prediction_dir_path}/{image_id}.json"
211
+ with open(prediction_path, 'r') as f:
212
+ pred = json.load(f)
213
+ bu = pred
214
+ key = list(pred.keys())[0]
215
+ pred = pred[key]
216
+ try:
217
+ caption_pred_dict[image_id] = {'caption': pred['caption'], 'labels': pred['phrases']}
218
+ except Exception as e:
219
+ pred = bu
220
+ caption_pred_dict[image_id] = {'caption': pred['caption'], 'labels': pred['phrases']}
221
+ for rle_mask in pred['pred_masks']:
222
+ coco_pred_file.append({"image_id": image_id, "category_id": 1, "segmentation": rle_mask, "score": 1.0})
223
+
224
+ # Save gcg_coco_predictions
225
+ with open(pred_save_path, 'w') as f:
226
+ json.dump(coco_pred_file, f)
227
+
228
+ # Prepare the CAPTION predictions in COCO format
229
+ cap_image_ids = []
230
+ coco_cap_pred_file = []
231
+ for image_id, values in caption_pred_dict.items():
232
+ cap_image_ids.append(image_id)
233
+ coco_cap_pred_file.append({"image_id": image_id, "caption": values['caption'], "labels": values['labels']})
234
+
235
+ # Save gcg_caption_coco_predictions
236
+ with open(cap_pred_save_path, 'w') as f:
237
+ json.dump(coco_cap_pred_file, f)
238
+
239
+ # # -------------------------------#
240
+ # 1. Evaluate AP
241
+ # Calculate mask mAP
242
+ # Load the ground truth and predictions in COCO format
243
+ coco_gt = COCO(gt_mask_path)
244
+ coco_dt = coco_gt.loadRes(pred_save_path) # load predictions
245
+ # Initialize COCOEval and specify the metric you want to use
246
+ coco_eval = COCOeval(coco_gt, coco_dt, "segm") # "segm" for segmentation
247
+ # Evaluate on a specific category
248
+ coco_eval.params.catIds = [1] # your category ID
249
+ # Evaluate
250
+ coco_eval.evaluate()
251
+ coco_eval.accumulate()
252
+ coco_eval.summarize()
253
+
254
+ # # -------------------------------#
255
+ # # 2. Evaluate Caption Quality
256
+ coco_cap_gt = COCO(gt_cap_path)
257
+ coco_cap_result = coco_cap_gt.loadRes(cap_pred_save_path)
258
+ # create coco_eval object by taking coco and coco_result
259
+ coco_eval = COCOEvalCap(coco_cap_gt, coco_cap_result)
260
+ coco_eval.params['image_id'] = coco_cap_result.getImgIds()
261
+ coco_eval.evaluate()
262
+ for metric, score in coco_eval.eval.items():
263
+ print(f'{metric}: {score:.3f}')
264
+
265
+ # # -------------------------------#
266
+ # 3. Evaluate Mask Mean MIoU
267
+ coco_gt = COCO(gt_mask_path) # Load ground truth annotations
268
+ evaluate_mask_miou(coco_gt, all_images_ids, pred_save_path)
269
+
270
+ # # -------------------------------#
271
+ # 4. Evaluate Recall
272
+ evaluate_recall_with_mapping(coco_gt, coco_cap_gt, all_images_ids, pred_save_path, cap_pred_save_path,
273
+ iou_threshold=0.5, text_sim_threshold=0.5)
274
+
275
+
276
+ if __name__ == "__main__":
277
+ main()
groundingLMM/eval/gcg/infer.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import cv2
3
+ import json
4
+ import bleach
5
+ import argparse
6
+ from tqdm import tqdm
7
+ from torch.utils.data import DataLoader, DistributedSampler
8
+ from transformers import AutoTokenizer, CLIPImageProcessor
9
+
10
+ from eval.utils import *
11
+ from eval.ddp import *
12
+ from model.GLaMM import GLaMMForCausalLM
13
+ from model.llava import conversation as conversation_lib
14
+ from model.llava.mm_utils import tokenizer_image_token
15
+ from model.SAM.utils.transforms import ResizeLongestSide
16
+ from tools.utils import DEFAULT_IM_END_TOKEN, DEFAULT_IM_START_TOKEN, DEFAULT_IMAGE_TOKEN, IMAGE_TOKEN_INDEX
17
+
18
+
19
+ def parse_args():
20
+ parser = argparse.ArgumentParser(description="GLaMM Inference - GCG")
21
+
22
+ parser.add_argument("--hf_model_path", required=True, help="The model path in huggingface format.")
23
+ parser.add_argument("--img_dir", required=False, default="./data/GranDf/GranDf_HA_images/val_test",
24
+ help="The directory containing images to run inference.")
25
+ parser.add_argument("--output_dir", required=True, help="The directory to store the response in json format.")
26
+
27
+ parser.add_argument("--image_size", default=1024, type=int, help="image size")
28
+ parser.add_argument("--model_max_length", default=512, type=int)
29
+ parser.add_argument("--use_mm_start_end", action="store_true", default=True)
30
+ parser.add_argument("--conv_type", default="llava_v1", type=str, choices=["llava_v1", "llava_llama_2"])
31
+
32
+ # DDP Related parameters
33
+ parser.add_argument("--batch_size_per_gpu", required=False, default=1)
34
+ parser.add_argument('--world_size', default=1, type=int, help='number of distributed processes')
35
+ parser.add_argument('--local_rank', default=-1, type=int)
36
+ parser.add_argument('--dist_url', default='env://', help='url used to set up distributed training')
37
+
38
+ return parser.parse_args()
39
+
40
+
41
+ def inference(instructions, image_path):
42
+ # Filter out special chars
43
+ instructions = bleach.clean(instructions)
44
+ instructions = instructions.replace('&lt;', '<').replace('&gt;', '>')
45
+
46
+ # Prepare prompt for model Inference
47
+ conv = conversation_lib.conv_templates[args.conv_type].copy()
48
+ conv.messages = []
49
+ begin_str = f"""The {DEFAULT_IMAGE_TOKEN} provides an overview of the picture.\n"""
50
+ prompt = begin_str + instructions
51
+ if args.use_mm_start_end:
52
+ replace_token = (DEFAULT_IM_START_TOKEN + DEFAULT_IMAGE_TOKEN + DEFAULT_IM_END_TOKEN)
53
+ prompt = prompt.replace(DEFAULT_IMAGE_TOKEN, replace_token)
54
+ conv.append_message(conv.roles[0], prompt)
55
+ conv.append_message(conv.roles[1], "")
56
+ prompt = conv.get_prompt()
57
+
58
+ # Read and preprocess the image (Global image encoder - CLIP)
59
+ image_np = cv2.imread(image_path)
60
+ image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
61
+ original_size_list = [image_np.shape[:2]]
62
+ image_clip = (clip_image_processor.preprocess(image_np, return_tensors="pt")["pixel_values"][0].unsqueeze(0).cuda())
63
+ image_clip = image_clip.bfloat16() # Precision is bf16 by default
64
+
65
+ # Preprocess the image (Grounding image encoder)
66
+ image = transform.apply_image(image_np)
67
+ resize_list = [image.shape[:2]]
68
+ image = (
69
+ grounding_image_ecoder_preprocess(torch.from_numpy(image).permute(2, 0, 1).contiguous()).unsqueeze(0).cuda())
70
+ image = image.bfloat16() # Precision is bf16 by default
71
+
72
+ # Prepare inputs for inference
73
+ input_ids = tokenizer_image_token(prompt, tokenizer, return_tensors="pt")
74
+ input_ids = input_ids.unsqueeze(0).cuda()
75
+ bboxes = None # No box/region is input in GCG task
76
+
77
+ # Generate output
78
+ output_ids, pred_masks = model.evaluate(image_clip, image, input_ids, resize_list, original_size_list,
79
+ max_tokens_new=512, bboxes=bboxes)
80
+ output_ids = output_ids[0][output_ids[0] != IMAGE_TOKEN_INDEX]
81
+
82
+ # Post-processing
83
+ text_output = tokenizer.decode(output_ids, skip_special_tokens=False)
84
+ text_output = text_output.replace("\n", "").replace(" ", " ")
85
+ text_output = text_output.split("ASSISTANT: ")[-1]
86
+
87
+ cleaned_str = re.sub(r'<.*?>', '', text_output)
88
+
89
+ pattern = re.compile(r'<p>(.*?)<\/p>')
90
+ phrases = pattern.findall(text_output)
91
+ phrases = [p.strip() for p in phrases]
92
+
93
+ # Remove the [SEG] token
94
+ cleaned_str = cleaned_str.replace('[SEG]', '')
95
+
96
+ # Strip unnecessary spaces
97
+ cleaned_str = ' '.join(cleaned_str.split()).strip("'")
98
+ cleaned_str = cleaned_str.strip()
99
+
100
+ return cleaned_str, pred_masks, phrases
101
+
102
+
103
+ def custom_collate_fn(batch):
104
+ image_id = [item[0] for item in batch]
105
+ image_path = [item[1] for item in batch]
106
+
107
+ return image_id, image_path
108
+
109
+
110
+ if __name__ == "__main__":
111
+ args = parse_args()
112
+ init_distributed_mode(args)
113
+
114
+ # Initialize tokenizer and model
115
+ tokenizer = AutoTokenizer.from_pretrained(args.hf_model_path, cache_dir=None,
116
+ model_max_length=args.model_max_length, padding_side="right",
117
+ use_fast=False)
118
+ tokenizer.pad_token = tokenizer.unk_token
119
+ seg_token_idx = tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
120
+ torch_dtype = torch.bfloat16 # By default, using bf16
121
+ kwargs = {"torch_dtype": torch_dtype}
122
+ model = GLaMMForCausalLM.from_pretrained(args.hf_model_path, low_cpu_mem_usage=True,
123
+ seg_token_idx=seg_token_idx, **kwargs)
124
+ # Update model config
125
+ model.config.eos_token_id = tokenizer.eos_token_id
126
+ model.config.bos_token_id = tokenizer.bos_token_id
127
+ model.config.pad_token_id = tokenizer.pad_token_id
128
+
129
+ # Initialize Global Image Encoder (CLIP)
130
+ model.get_model().initialize_vision_modules(model.get_model().config)
131
+ vision_tower = model.get_model().get_vision_tower()
132
+ vision_tower.to(dtype=torch_dtype)
133
+
134
+ # Transfer the model to GPU
135
+ model = model.bfloat16().cuda() # Replace with model = model.float().cuda() for 32 bit inference
136
+ vision_tower = model.get_model().get_vision_tower()
137
+ vision_tower.to(device="cuda")
138
+
139
+ # Initialize Image Processor for GLobal Image Encoder (CLIP)
140
+ clip_image_processor = CLIPImageProcessor.from_pretrained(model.config.vision_tower)
141
+ transform = ResizeLongestSide(args.image_size)
142
+
143
+ model.eval() # Model should be in evaluation mode for inference
144
+
145
+ # Prompt model to return grounded conversations
146
+ instruction = "Could you please give me a detailed description of the image? Please respond with interleaved \
147
+ segmentation masks for the corresponding parts of the answer."
148
+
149
+ # Create output directory if not exists already
150
+ os.makedirs(args.output_dir, exist_ok=True)
151
+
152
+ # Create DDP Dataset
153
+ dataset = GCGEvalDDP(args.img_dir)
154
+ distributed_sampler = DistributedSampler(dataset, rank=args.rank, shuffle=False)
155
+ dataloader = DataLoader(dataset, batch_size=args.batch_size_per_gpu, num_workers=2,
156
+ sampler=distributed_sampler, collate_fn=custom_collate_fn)
157
+
158
+ # Iterate over all the images, run inference and save results
159
+ for (image_id, image_path) in tqdm(dataloader):
160
+ image_id, image_path = image_id[0], image_path[0]
161
+
162
+ output_path = f"{args.output_dir}/{image_id[:-4]}.json"
163
+
164
+ result_caption, pred_masks, phrases = inference(instruction, image_path) # GLaMM Inference
165
+
166
+ # Convert the predicted masks into RLE format
167
+ pred_masks_tensor = pred_masks[0].cpu()
168
+ binary_pred_masks = pred_masks_tensor > 0
169
+ uncompressed_mask_rles = mask_to_rle_pytorch(binary_pred_masks)
170
+ rle_masks = []
171
+ for m in uncompressed_mask_rles:
172
+ rle_masks.append(coco_encode_rle(m))
173
+
174
+ # Create results dictionary
175
+ result_dict = {
176
+ "image_id": image_id[:-4],
177
+ "caption": result_caption,
178
+ "phrases": phrases,
179
+ "pred_masks": rle_masks
180
+ }
181
+
182
+ # Save the inference results
183
+ with open(output_path, 'w') as f:
184
+ json.dump(result_dict, f)
groundingLMM/eval/gcg/run_evaluation.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ ## USAGE
4
+
5
+ ## bash eval/gcg/run_evaluation.sh <path to the HF checkpoints path> <path to the directory to save the evaluation results>
6
+
7
+ ## USAGE
8
+
9
+
10
+ export PYTHONPATH="./:$PYTHONPATH"
11
+ MASTER_PORT=24999
12
+ NUM_GPUS=1 # Adjust it as per the available #GPU
13
+
14
+ # Positional arguments for the bash scripts
15
+ CKPT_PATH=$1
16
+ RESULT_PATH=$2
17
+
18
+ # Path to the GranD-f evaluation dataset images directory
19
+ IMAGE_DIR=./data/GranDf/GranDf_HA_images/val_test
20
+
21
+ # Run Inference
22
+ torchrun --nnodes=1 --nproc_per_node="$NUM_GPUS" --master_port="$MASTER_PORT" eval/gcg/infer.py --hf_model_path "$CKPT_PATH" --img_dir "$IMAGE_DIR" --output_dir "$RESULT_PATH"
23
+
24
+ # Path to the GranD-f evaluation dataset ground-truths directory
25
+ GT_DIR=./data/GranDf/annotations/val_test
26
+
27
+ # Evaluate
28
+ python eval/gcg/evaluate.py --prediction_dir_path "$RESULT_PATH" --gt_dir_path "$GT_DIR" --split "val"
29
+ python eval/gcg/evaluate.py --prediction_dir_path "$RESULT_PATH" --gt_dir_path "$GT_DIR" --split "test"
groundingLMM/eval/referring_seg/infer_and_evaluate.py ADDED
@@ -0,0 +1,372 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import json
4
+ import tqdm
5
+ import torch
6
+ import argparse
7
+ import deepspeed
8
+ import transformers
9
+ from functools import partial
10
+ from torch.utils.data import ConcatDataset
11
+ from peft import LoraConfig, get_peft_model
12
+ from torch.utils.tensorboard import SummaryWriter
13
+
14
+ from model.GLaMM import GLaMMForCausalLM
15
+ from model.llava import conversation as conversation_lib
16
+ from dataset.dataset import custom_collate_fn
17
+ from dataset.segm_datasets.RefCOCO_Segm_ds import ReferSegmDataset
18
+ from tools.utils import (DEFAULT_IM_END_TOKEN, DEFAULT_IM_START_TOKEN, AverageMeter, Summary,
19
+ intersectionAndUnionGPU, dict_to_cuda)
20
+
21
+
22
+ def parse_args(args):
23
+ parser = argparse.ArgumentParser(description="GLaMM Model Evaluation")
24
+
25
+ # Model-specific settings
26
+ parser.add_argument("--version", required=True, help="Path to the pretrained model for evaluation.")
27
+ parser.add_argument("--pretrained", action="store_true", default=False)
28
+ parser.add_argument("--vision_pretrained", default="./checkpoints/sam_vit_h_4b8939.pth", type=str)
29
+ parser.add_argument("--vision-tower", default="openai/clip-vit-large-patch14-336", type=str)
30
+ parser.add_argument("--conv_type", default="llava_v1", type=str, choices=["llava_v1", "llava_llama_2"])
31
+ parser.add_argument("--tune_mm_mlp_adapter", action="store_true", default=False)
32
+ parser.add_argument("--freeze_mm_mlp_adapter", action="store_true", default=False)
33
+ parser.add_argument("--mm_use_im_start_end", action="store_true", default=True)
34
+ parser.add_argument("--out_dim", default=256, type=int)
35
+ parser.add_argument("--lora_target_modules", default="q_proj,v_proj", type=str)
36
+ parser.add_argument("--with_region", action="store_true", default=True)
37
+ parser.add_argument("--mm_vision_select_layer", default=-2, type=int)
38
+ parser.add_argument("--pretrain_mm_mlp_adapter", default="", type=str)
39
+ parser.add_argument("--precision", default='bf16', type=str)
40
+
41
+ # Training settings
42
+ parser.add_argument("--lr", default=0.0003, type=float)
43
+ parser.add_argument("--epochs", default=10, type=int)
44
+ parser.add_argument("--steps_per_epoch", default=500, type=int)
45
+ parser.add_argument("--batch_size", default=2, type=int, help="batch size per device per step")
46
+ parser.add_argument("--grad_accumulation_steps", default=10, type=int)
47
+ parser.add_argument("--lora_r", default=8, type=int)
48
+ parser.add_argument("--lora_alpha", default=16, type=int)
49
+ parser.add_argument("--lora_dropout", default=0.05, type=float)
50
+ parser.add_argument("--ce_loss_weight", default=1.0, type=float)
51
+ parser.add_argument("--dice_loss_weight", default=0.5, type=float)
52
+ parser.add_argument("--bce_loss_weight", default=2.0, type=float)
53
+ parser.add_argument("--beta1", default=0.9, type=float)
54
+ parser.add_argument("--beta2", default=0.95, type=float)
55
+ parser.add_argument("--gradient_checkpointing", action="store_true", default=True)
56
+ parser.add_argument("--train_mask_decoder", action="store_true", default=True)
57
+ parser.add_argument("--use_mm_start_end", action="store_true", default=True)
58
+ parser.add_argument("--print_freq", default=1, type=int)
59
+ parser.add_argument("--start_epoch", default=0, type=int)
60
+
61
+ # Dataset settings
62
+ parser.add_argument("--dataset_dir", default="./data", type=str)
63
+ parser.add_argument("--image_size", default=1024, type=int, help="Image size for grounding image encoder")
64
+ parser.add_argument("--model_max_length", default=1536, type=int)
65
+ parser.add_argument("--refer_seg_data", default="refcocog|val", type=str)
66
+ parser.add_argument("--results_path", default="referring_seg_eval.json", type=str)
67
+
68
+ # Evaluation settings
69
+ parser.add_argument("--val_batch_size", default=1, type=int)
70
+ parser.add_argument("--workers", default=2, type=int)
71
+ parser.add_argument("--local_rank", default=0, type=int, help="node rank")
72
+
73
+ # Experiment settings
74
+ parser.add_argument("--log_base_dir", default="./runs", type=str)
75
+ parser.add_argument("--exp_name", default="glam_eval_referseg", type=str)
76
+
77
+ return parser.parse_args(args)
78
+
79
+
80
+ def initialize_environment(args):
81
+ """ Set up logging and model directories. """
82
+ args.log_dir = os.path.join(args.log_base_dir, args.exp_name)
83
+ if args.local_rank == 0:
84
+ os.makedirs(args.log_dir, exist_ok=True)
85
+ return SummaryWriter(args.log_dir)
86
+ return None
87
+
88
+
89
+ def setup_tokenizer_and_special_tokens(args):
90
+ """ Load tokenizer and add special tokens. """
91
+ tokenizer = transformers.AutoTokenizer.from_pretrained(
92
+ args.version, model_max_length=args.model_max_length, padding_side="right", use_fast=False
93
+ )
94
+ print('\033[92m' + "---- Initialized tokenizer from: {} ----".format(args.version) + '\033[0m')
95
+ tokenizer.pad_token = tokenizer.unk_token
96
+
97
+ if not args.pretrained:
98
+ if args.use_mm_start_end:
99
+ tokenizer.add_tokens(
100
+ [DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN], special_tokens=True
101
+ )
102
+ # modifications specific for regions
103
+ reg_tokens = ['<bbox>', '<point>']
104
+ # Adding special tokens for pixel grounding
105
+ segmentation_tokens = ['[SEG]']
106
+ # Adding tokens for GCG
107
+ phrase_tokens = ['<p>', '</p>']
108
+ special_tokens = reg_tokens + segmentation_tokens + phrase_tokens
109
+ tokenizer.add_tokens(special_tokens, special_tokens=True)
110
+
111
+ args.bbox_token_idx = tokenizer("<bbox>", add_special_tokens=False).input_ids[0]
112
+ args.seg_token_idx = tokenizer("[SEG]", add_special_tokens=False).input_ids[0]
113
+ args.bop_token_idx = tokenizer("<p>", add_special_tokens=False).input_ids[0]
114
+ args.eop_token_idx = tokenizer("</p>", add_special_tokens=False).input_ids[0]
115
+
116
+ return tokenizer
117
+
118
+
119
+ def initialize_model(args, tokenizer):
120
+ """ Initialize the GLaMM model. """
121
+ model_args = {k: getattr(args, k) for k in
122
+ ["train_mask_decoder", "out_dim", "ce_loss_weight", "dice_loss_weight", "bce_loss_weight",
123
+ "seg_token_idx", "vision_pretrained", "vision_tower", "use_mm_start_end", "mm_vision_select_layer",
124
+ "pretrain_mm_mlp_adapter", "tune_mm_mlp_adapter", "freeze_mm_mlp_adapter", "mm_use_im_start_end",
125
+ "with_region", "bbox_token_idx", "eop_token_idx", "bop_token_idx"]}
126
+ model_args["num_level_reg_features"] = 4
127
+
128
+ model = GLaMMForCausalLM.from_pretrained(
129
+ args.version, torch_dtype=torch.bfloat16, low_cpu_mem_usage=True, **model_args
130
+ )
131
+ print('\033[92m' + "---- Initialized model from: {} ----".format(args.version) + '\033[0m')
132
+
133
+ # Configure model tokens
134
+ model.config.eos_token_id = tokenizer.eos_token_id
135
+ model.config.bos_token_id = tokenizer.bos_token_id
136
+ model.config.pad_token_id = tokenizer.pad_token_id
137
+
138
+ return model
139
+
140
+
141
+ def prepare_model_for_training(model, tokenizer, args):
142
+ # Enable input gradients
143
+ model.enable_input_require_grads()
144
+ model.gradient_checkpointing_enable()
145
+
146
+ # Initialize vision tower
147
+ print(
148
+ '\033[92m' + "---- Initialized Global Image Encoder (vision tower) from: {} ----".format(
149
+ args.vision_tower
150
+ ) + '\033[0m'
151
+ )
152
+ model.get_model().initialize_vision_modules(model.get_model().config)
153
+ vision_tower = model.get_model().get_vision_tower()
154
+ vision_tower.to(dtype=torch.bfloat16, device=args.local_rank)
155
+
156
+ # Initialize GLaMM model and adjust requires_grad
157
+ if not args.pretrained:
158
+ model.get_model().initialize_glamm_model(model.get_model().config)
159
+ else:
160
+ for param in model.get_model().grounding_encoder.parameters():
161
+ param.requires_grad = False
162
+ if model.get_model().config.train_mask_decoder:
163
+ model.get_model().grounding_encoder.mask_decoder.train()
164
+ for param in model.get_model().grounding_encoder.mask_decoder.parameters():
165
+ param.requires_grad = True
166
+
167
+ # Projection layer
168
+ model.get_model().text_hidden_fcs.train()
169
+ for param in model.get_model().text_hidden_fcs.parameters():
170
+ param.requires_grad = True
171
+
172
+ # Set requires_grad for vision tower and mm projector
173
+ for p in vision_tower.parameters():
174
+ p.requires_grad = False
175
+ for p in model.get_model().mm_projector.parameters():
176
+ p.requires_grad = False
177
+
178
+ # Set requires_grad based on LoRA training
179
+ lora_r = args.lora_r
180
+ if lora_r == 0:
181
+ for p in model.get_model().layers.parameters():
182
+ p.requires_grad = True
183
+ for p in model.get_model().mm_projector.parameters():
184
+ p.requires_grad = True
185
+
186
+ # Configure conversation library
187
+ conversation_lib.default_conversation = conversation_lib.conv_templates[args.conv_type]
188
+
189
+ # Configure LoRA if applicable
190
+ if lora_r > 0:
191
+ lora_config = setup_lora_config(model, args)
192
+ model = get_peft_model(model, lora_config)
193
+
194
+ # Resize token embeddings
195
+ model.resize_token_embeddings(len(tokenizer))
196
+
197
+
198
+ def setup_lora_config(model, args):
199
+ """ Configure LoRA settings for the model. """
200
+
201
+ def find_proj_layers(model, target_modules):
202
+ """ Identify projection layers in the model for LoRA adaptation. """
203
+ linear_cls = torch.nn.Linear
204
+ lora_module_names = set()
205
+ for name, module in model.named_modules():
206
+ if (isinstance(module, linear_cls) and all(
207
+ x not in name for x in ["grounding_encoder", "vision_tower", "mm_projector", "text_hidden_fcs"]
208
+ ) and any(x in name for x in target_modules)):
209
+ lora_module_names.add(name)
210
+ return sorted(list(lora_module_names))
211
+
212
+ # Extracting LoRA target modules
213
+ lora_target_modules = args.lora_target_modules.split(",")
214
+ lora_module_names = find_proj_layers(model, lora_target_modules)
215
+
216
+ # Configuring LoRA
217
+ lora_config = LoraConfig(
218
+ r=args.lora_r, lora_alpha=args.lora_alpha, target_modules=lora_module_names, lora_dropout=args.lora_dropout,
219
+ bias="none", task_type="CAUSAL_LM"
220
+ )
221
+ return lora_config
222
+
223
+
224
+ def initialize_datasets_and_loaders(args, tokenizer):
225
+ # Dataset settings for ReferSegDataset
226
+ common_ds_args = {
227
+ "dataset_dir": args.dataset_dir,
228
+ "tokenizer": tokenizer,
229
+ "global_image_encoder": args.vision_tower,
230
+ "precision": args.precision,
231
+ "image_size": args.image_size
232
+ }
233
+
234
+ # Validation datasets
235
+ dataset, split = args.refer_seg_data.split('|')
236
+ val_datasets = [ReferSegmDataset(**common_ds_args, validation=True, refer_segm_data=dataset, split=split,
237
+ inference=True)]
238
+ _ = [d._set_len(len(d.refer_segm_data[dataset]['images'])) for d in val_datasets]
239
+
240
+ return val_datasets
241
+
242
+
243
+ def setup_data_loaders(args, val_datasets, tokenizer):
244
+ sampler_args = {"shuffle": False, "drop_last": False}
245
+ val_loader_args = {"batch_size": args.val_batch_size, "shuffle": False, "num_workers": args.workers,
246
+ "pin_memory": False}
247
+ collate_fn_args_val = partial(
248
+ custom_collate_fn, tokenizer=tokenizer, use_mm_start_end=args.use_mm_start_end, local_rank=args.local_rank,
249
+ inference=True
250
+ )
251
+
252
+ # Validation loader
253
+ combined_val_datasets = ConcatDataset(val_datasets)
254
+ val_loader = torch.utils.data.DataLoader(
255
+ combined_val_datasets, **val_loader_args, collate_fn=collate_fn_args_val,
256
+ sampler=torch.utils.data.distributed.DistributedSampler(combined_val_datasets, **sampler_args), )
257
+
258
+ return val_loader
259
+
260
+
261
+ def initialize_deepspeed(model, tokenizer, args):
262
+ ds_config = {"train_micro_batch_size_per_gpu": args.batch_size,
263
+ "gradient_accumulation_steps": args.grad_accumulation_steps, "optimizer": {"type": "AdamW",
264
+ "params": {"lr": args.lr,
265
+ "weight_decay": 0.0,
266
+ "betas": (
267
+ args.beta1,
268
+ args.beta2)}},
269
+ "scheduler": {"type": "WarmupDecayLR",
270
+ "params": {"total_num_steps": args.epochs * args.steps_per_epoch, "warmup_min_lr": 0,
271
+ "warmup_max_lr": args.lr, "warmup_num_steps": 100, "warmup_type": "linear"}},
272
+ "fp16": {"enabled": args.precision == "fp16"}, "bf16": {"enabled": args.precision == "bf16"},
273
+ "gradient_clipping": 1.0,
274
+ "zero_optimization": {"stage": 2, "contiguous_gradients": True, "overlap_comm": True,
275
+ "reduce_scatter": True, "reduce_bucket_size": 5e8,
276
+ "allgather_bucket_size": 5e8}, }
277
+
278
+ model_engine, optimizer, _, scheduler = deepspeed.initialize(
279
+ model=model, model_parameters=model.parameters(), collate_fn=partial(
280
+ custom_collate_fn, tokenizer=tokenizer, use_mm_start_end=args.use_mm_start_end, local_rank=args.local_rank
281
+ ), config=ds_config
282
+ )
283
+
284
+ return model_engine, optimizer, scheduler
285
+
286
+
287
+ def evaluate_model_performance(validation_loader, model, args):
288
+ # Trackers for metrics
289
+ trackers = {
290
+ "intersection": AverageMeter("Intersec", ":6.3f", Summary.SUM),
291
+ "union": AverageMeter("Union", ":6.3f", Summary.SUM),
292
+ "gIoU": AverageMeter("gIoU", ":6.3f", Summary.SUM)
293
+ }
294
+
295
+ model.eval()
296
+ for data_batch in tqdm.tqdm(validation_loader):
297
+ # Prepare data and convert relevant tensors to the appropriate type
298
+ data_batch = dict_to_cuda(data_batch)
299
+ for key in ["global_enc_images", "grounding_enc_images"]:
300
+ data_batch[key] = data_batch[key].to(dtype=torch.bfloat16, device=args.local_rank)
301
+
302
+ torch.cuda.empty_cache()
303
+
304
+ # Model inference without gradient tracking
305
+ with torch.no_grad():
306
+ results = model(**data_batch)
307
+
308
+ predictions = results["pred_masks"]
309
+ gt_masks = results["gt_masks"][0].int()
310
+ predicted_masks = (predictions[0] > 0).int() # Thresholding to get binary masks
311
+ assert len(predictions) == 1
312
+
313
+ intersection, union, accuracy_iou = 0.0, 0.0, 0.0
314
+ for target, prediction in zip(gt_masks, predicted_masks):
315
+ intersect, union_, _ = intersectionAndUnionGPU(
316
+ prediction.contiguous().clone(), target.contiguous(), 2, ignore_index=255
317
+ )
318
+ intersection += intersect
319
+ union += union_
320
+ accuracy_iou += intersect / (union_ + 1e-5)
321
+ # handles no-object targets
322
+ accuracy_iou[union_ == 0] += 1.0
323
+
324
+ intersection, union = intersection.cpu().numpy(), union.cpu().numpy()
325
+ accuracy_iou = accuracy_iou.cpu().numpy() / gt_masks.shape[0]
326
+ trackers["intersection"].update(intersection)
327
+ trackers["union"].update(union)
328
+ trackers["gIoU"].update(accuracy_iou, n=gt_masks.shape[0])
329
+
330
+ for meter in trackers.values():
331
+ meter.all_reduce()
332
+
333
+ iou_per_class = trackers["intersection"].sum / (trackers["union"].sum + 1e-10)
334
+ class_iou = iou_per_class[1]
335
+ global_iou = trackers["gIoU"].avg[1]
336
+
337
+ return global_iou, class_iou
338
+
339
+
340
+ def main(args):
341
+ tokenizer = setup_tokenizer_and_special_tokens(args)
342
+ model = initialize_model(args, tokenizer)
343
+ prepare_model_for_training(model, tokenizer, args)
344
+
345
+ model_engine, _, _ = initialize_deepspeed(model, tokenizer, args)
346
+
347
+ val_datasets = initialize_datasets_and_loaders(args, tokenizer)
348
+ val_loader = setup_data_loaders(args, val_datasets, tokenizer)
349
+
350
+ giou, ciou = evaluate_model_performance(val_loader, model_engine, args)
351
+
352
+ torch.distributed.barrier()
353
+ if args.local_rank == 0:
354
+ # Update and save the results
355
+ os.makedirs(args.results_path, exist_ok=True)
356
+ if os.path.exists(f"{args.results_path}/stats.json"):
357
+ with open(f"{args.results_path}/stats.json", 'r') as json_file:
358
+ result_list = json.load(json_file)
359
+ else:
360
+ result_list = []
361
+ result_dict = {"model": args.results_path, "dataset": args.refer_seg_data, "giou": str(giou), "ciou": str(ciou)}
362
+ result_list.append(result_dict)
363
+
364
+ with open(f"{args.results_path}/stats.json", 'w') as json_file:
365
+ json.dump(result_list, json_file, indent=2)
366
+
367
+ print(result_list) # Print all the results
368
+
369
+
370
+ if __name__ == "__main__":
371
+ args = parse_args(sys.argv[1:])
372
+ main(args)
groundingLMM/eval/referring_seg/run_evaluation.sh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ ## USAGE
4
+
5
+ ## bash eval/referring_seg/run_evaluation.sh <path to the HF checkpoints path> <path to the directory to save the evaluation results>
6
+
7
+ ## USAGE
8
+
9
+
10
+ # Adjust the environment variable if you have multiple gpus available, e.g. CUDA_VISIBLE_DEVICES=0,1,2,3 if you have 4 GPUs available
11
+ export CUDA_VISIBLE_DEVICES=0
12
+ export PYTHONPATH="./:$PYTHONPATH"
13
+ MASTER_PORT=24999
14
+
15
+ # Positional arguments for the bash scripts
16
+ CKPT_PATH=$1
17
+ RESULT_PATH=$2
18
+
19
+ # RefCOCO
20
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcoco|val" --results_path "$RESULT_PATH" --pretrained
21
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcoco|testA" --results_path "$RESULT_PATH" --pretrained
22
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcoco|testB" --results_path "$RESULT_PATH" --pretrained
23
+
24
+ # RefCOCO+
25
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcoco+|val" --results_path "$RESULT_PATH" --pretrained
26
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcoco+|testA" --results_path "$RESULT_PATH" --pretrained
27
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcoco+|testB" --results_path "$RESULT_PATH" --pretrained
28
+
29
+ # RefCOCOg
30
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcocog|val" --results_path "$RESULT_PATH" --pretrained
31
+ deepspeed --master_port="$MASTER_PORT" eval/referring_seg/infer_and_evaluate.py --version "$CKPT_PATH" --refer_seg_data "refcocog|test" --results_path "$RESULT_PATH" --pretrained
groundingLMM/eval/region_captioning/run_evaluation_RefCOCOg.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+
3
+ ## USAGE
4
+
5
+ ## bash eval/region_captioning/run_evaluation.sh <path to the HF checkpoints path> <path to the directory to save the evaluation results>
6
+
7
+ ## USAGE
8
+
9
+
10
+ export PYTHONPATH="./:$PYTHONPATH"
11
+ MASTER_PORT=24999
12
+ NUM_GPUS=1 # Adjust it as per the available #GPU
13
+
14
+ # Positional arguments for the bash scripts
15
+ CKPT_PATH=$1
16
+ RESULT_PATH=$2
17
+
18
+ # Adjust if needed
19
+ ANNOTATION_FILE=./data/RefCoco_Reg/mdetr_annotations/finetune_refcocog_val_captions.json
20
+ IMAGE_DIR=./data/coco_2014/train2014
21
+ DATASET=refcocog
22
+
23
+ # Run Inference
24
+ torchrun --nnodes=1 --nproc_per_node="$NUM_GPUS" --master_port="$MASTER_PORT" eval/region_captioning/infer.py --hf_model_path "$CKPT_PATH" --annotation_file "$ANNOTATION_FILE" --image_dir "$IMAGE_DIR" --dataset "$DATASET" --results_dir "$RESULT_PATH"
25
+
26
+
27
+ # Evaluate
28
+ python eval/region_captioning/evaluate.py --annotation_file "$ANNOTATION_FILE" --results_dir "$RESULT_PATH"
groundingLMM/mmcv/.circleci/config.yml ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+ jobs:
3
+ lint:
4
+ docker:
5
+ - image: cimg/python:3.7.4
6
+ steps:
7
+ - checkout
8
+ - run:
9
+ name: Install dependencies
10
+ command: |
11
+ sudo apt-add-repository ppa:brightbox/ruby-ng -y
12
+ sudo apt-get update
13
+ sudo apt-get install -y ruby2.7
14
+ - run:
15
+ name: Install pre-commit hook
16
+ command: |
17
+ pip install pre-commit
18
+ pre-commit install
19
+ - run:
20
+ name: Linting
21
+ command: pre-commit run --all-files
22
+
23
+ build_cpu:
24
+ parameters:
25
+ # The python version must match available image tags in
26
+ # https://circleci.com/developer/images/image/cimg/python
27
+ python:
28
+ type: string
29
+ default: "3.7.0"
30
+ torch:
31
+ type: string
32
+ torchvision:
33
+ type: string
34
+ machine:
35
+ image: ubuntu-2004:202010-01
36
+ resource_class: large
37
+ steps:
38
+ - checkout
39
+ - run:
40
+ name: Install system dependencies
41
+ command: |
42
+ sudo apt-get update
43
+ sudo apt-get install -y ffmpeg libturbojpeg ninja-build
44
+ ffmpeg -version
45
+ - run:
46
+ # https://github.com/pytorch/vision/issues/2921
47
+ name: Install dependency of torchvision when using pyenv
48
+ command: sudo apt-get install -y liblzma-dev
49
+ - run:
50
+ # python3.7 should be re-installed due to the issue https://github.com/pytorch/vision/issues/2921
51
+ name: Select Python
52
+ command: |
53
+ pyenv uninstall -f << parameters.python >>
54
+ pyenv install << parameters.python >>
55
+ pyenv global << parameters.python >>
56
+ - run:
57
+ name: Upgrade pip
58
+ command: |
59
+ python -m pip install pip --upgrade
60
+ - run:
61
+ name: Install PyTorch
62
+ command: python -m pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html
63
+ - run:
64
+ name: Install psutil
65
+ command: python -m pip install psutil
66
+ - run:
67
+ name: Build and install
68
+ command: |
69
+ rm -rf .eggs
70
+ python setup.py check -m -s
71
+ python -m pip install -e .
72
+ no_output_timeout: 20m
73
+ environment:
74
+ MMCV_WITH_OPS: 1
75
+ - run:
76
+ name: Install dependencies of unit test
77
+ command: |
78
+ python -m pip install -r requirements/test.txt
79
+ - run:
80
+ name: Run unittests and generate coverage report
81
+ command: |
82
+ python -m coverage run --branch --source mmcv -m pytest tests/
83
+ python -m coverage xml
84
+ python -m coverage report -m
85
+
86
+ build_cu102:
87
+ machine:
88
+ image: ubuntu-1604-cuda-10.1:201909-23 # the actual version of cuda is 10.2
89
+ resource_class: gpu.nvidia.small
90
+ steps:
91
+ - checkout
92
+ - run:
93
+ name: Set CUDA environment
94
+ command: |
95
+ echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> $BASH_ENV
96
+ echo 'export PATH=/usr/local/cuda/bin:$PATH' >> $BASH_ENV
97
+ echo 'export CUDA_HOME=/usr/local/cuda' >> $BASH_ENV
98
+ source $BASH_ENV
99
+ nvidia-smi
100
+ nvcc --version
101
+ gcc --version
102
+ - run:
103
+ name: Install system dependencies
104
+ command: |
105
+ sudo apt-get update
106
+ sudo apt-get install -y libturbojpeg ninja-build
107
+ # the default version of ffmpeg is 2.8.7, which should be upgraded to 4+
108
+ sudo add-apt-repository -y ppa:jonathonf/ffmpeg-4
109
+ sudo apt-get update
110
+ sudo apt-get install -y ffmpeg
111
+ ffmpeg -version
112
+ sudo add-apt-repository --remove ppa:jonathonf/ffmpeg-4 -y
113
+ - run:
114
+ # https://github.com/pytorch/vision/issues/2921
115
+ name: Install dependency of torchvision when using pyenv
116
+ command: sudo apt-get install -y liblzma-dev
117
+ - run:
118
+ # python3.7 should be re-installed due to the issue https://github.com/pytorch/vision/issues/2921
119
+ name: Select python3.7
120
+ command: |
121
+ pyenv uninstall -f 3.7.0
122
+ pyenv install 3.7.0
123
+ pyenv global 3.7.0
124
+ - run:
125
+ name: Upgrade pip
126
+ command: |
127
+ python -m pip install pip --upgrade
128
+ - run:
129
+ name: Install PyTorch
130
+ command: python -m pip install torch==1.8.1+cu102 torchvision==0.9.1+cu102 -f https://download.pytorch.org/whl/torch_stable.html
131
+ - run:
132
+ name: Install psutil
133
+ command: python -m pip install psutil
134
+ - run:
135
+ name: Download onnxruntime library and install onnxruntime
136
+ command: |
137
+ wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz
138
+ tar -zxvf onnxruntime-linux-x64-1.8.1.tgz
139
+ echo 'export ONNXRUNTIME_DIR=$(pwd)/onnxruntime-linux-x64-1.8.1' >> $BASH_ENV
140
+ echo 'export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH' >> $BASH_ENV
141
+ source $BASH_ENV
142
+ python -m pip install onnxruntime==1.8.1
143
+ - run:
144
+ name: Build and install
145
+ command: |
146
+ rm -rf .eggs
147
+ python setup.py check -m -s
148
+ python -m pip install -e .
149
+ environment:
150
+ MMCV_WITH_OPS: 1
151
+ MMCV_WITH_ORT: 1
152
+ - run:
153
+ name: Install dependencies for unit test
154
+ command: |
155
+ python -m pip install -r requirements/test.txt
156
+ - run:
157
+ name: Run unittests and generate coverage report
158
+ command: |
159
+ python -m coverage run --branch --source mmcv -m pytest tests/
160
+ python -m coverage xml
161
+ python -m coverage report -m
162
+ workflows:
163
+ unit_tests:
164
+ jobs:
165
+ - lint
166
+ - build_cpu:
167
+ name: build_py3.8_pt1.9_cpu
168
+ torch: 1.9.0
169
+ torchvision: 0.10.0
170
+ python: "3.8.0"
171
+ requires:
172
+ - lint
173
+ - build_cu102:
174
+ requires:
175
+ - build_py3.8_pt1.9_cpu
groundingLMM/mmcv/.dev_scripts/check_installation.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+
4
+ from mmcv.ops import box_iou_rotated
5
+ from mmcv.utils import collect_env
6
+
7
+
8
+ def check_installation():
9
+ """Check whether mmcv-full has been installed successfully."""
10
+ np_boxes1 = np.asarray(
11
+ [[1.0, 1.0, 3.0, 4.0, 0.5], [2.0, 2.0, 3.0, 4.0, 0.6],
12
+ [7.0, 7.0, 8.0, 8.0, 0.4]],
13
+ dtype=np.float32)
14
+ np_boxes2 = np.asarray(
15
+ [[0.0, 2.0, 2.0, 5.0, 0.3], [2.0, 1.0, 3.0, 3.0, 0.5],
16
+ [5.0, 5.0, 6.0, 7.0, 0.4]],
17
+ dtype=np.float32)
18
+ boxes1 = torch.from_numpy(np_boxes1)
19
+ boxes2 = torch.from_numpy(np_boxes2)
20
+
21
+ # test mmcv-full with CPU ops
22
+ box_iou_rotated(boxes1, boxes2)
23
+ print('CPU ops were compiled successfully.')
24
+
25
+ # test mmcv-full with both CPU and CUDA ops
26
+ if torch.cuda.is_available():
27
+ boxes1 = boxes1.cuda()
28
+ boxes2 = boxes2.cuda()
29
+ box_iou_rotated(boxes1, boxes2)
30
+ print('CUDA ops were compiled successfully.')
31
+ else:
32
+ print('No CUDA runtime is found, skipping the checking of CUDA ops.')
33
+
34
+
35
+ if __name__ == '__main__':
36
+ print('Start checking the installation of mmcv-full ...')
37
+ check_installation()
38
+ print('mmcv-full has been installed successfully.\n')
39
+
40
+ env_info_dict = collect_env()
41
+ env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
42
+ dash_line = '-' * 60 + '\n'
43
+ print('Environment information:')
44
+ print(dash_line + env_info + '\n' + dash_line)
groundingLMM/mmcv/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: false
2
+
3
+ contact_links:
4
+ - name: Common Issues
5
+ url: https://mmcv.readthedocs.io/en/latest/trouble_shooting.html
6
+ about: Check if your issue already has solutions
7
+ - name: MMCV Documentation
8
+ url: https://mmcv.readthedocs.io/en/latest/
9
+ about: Check if your question is answered in docs
groundingLMM/mmcv/.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Feature request
3
+ about: Suggest an idea for this project
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Describe the feature**
11
+
12
+ **Motivation**
13
+ A clear and concise description of the motivation of the feature.
14
+ Ex1. It is inconvenient when [....].
15
+ Ex2. There is a recent paper [....], which is very helpful for [....].
16
+
17
+ **Related resources**
18
+ If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
19
+
20
+ **Additional context**
21
+ Add any other context or screenshots about the feature request here.
22
+ If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
groundingLMM/mmcv/.github/ISSUE_TEMPLATE/general_questions.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: General questions
3
+ about: Ask general questions to get help
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ **Checklist**
11
+
12
+ 1. I have searched related issues but cannot get the expected help.
13
+ 2. I have read the FAQ documentation but cannot get the expected help.
groundingLMM/mmcv/.github/ISSUE_TEMPLATE/unexpected_report.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: Unexpected Results
3
+ about: Create a report to help us improve
4
+ title: ''
5
+ labels: ''
6
+ assignees: ''
7
+
8
+ ---
9
+
10
+ Thanks for reporting the unexpected results and we appreciate it a lot.
11
+
12
+ **Checklist**
13
+
14
+ 1. I have searched related issues but cannot get the expected help.
15
+ 2. I have read the [FAQ documentation](https://mmcv.readthedocs.io/en/latest/trouble_shooting.html) but cannot get the expected help.
16
+ 3. The unexpected results still exist in the latest version.
17
+
18
+ **Describe the Issue**
19
+ A clear and concise description of what the bug is, including what results are expected and what the real results you got.
20
+
21
+ **Reproduction**
22
+
23
+ 1. What command, code, or script did you run?
24
+
25
+ ```bash
26
+ A placeholder for the command.
27
+ ```
28
+
29
+ 2. Did you make any modifications on the code? Did you understand what you have modified?
30
+
31
+ **Environment**
32
+
33
+ 1. Please run `python -c "from mmcv.utils import collect_env; print(collect_env())"` to collect necessary environment information and paste it here.
34
+ 2. You may add addition that may be helpful for locating the problem, such as
35
+ - How you installed PyTorch [e.g., pip, conda, source]
36
+ - Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
37
+
38
+ **Error traceback**
39
+ If applicable, paste the error traceback here.
40
+
41
+ ```none
42
+ A placeholder for traceback.
43
+ ```
44
+
45
+ **Bug fix**
46
+ If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
groundingLMM/mmcv/.github/pull_request_template.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Thanks for your contribution and we appreciate it a lot. The following instructions would make your pull request more healthy and more easily get feedback. If you do not understand some items, don't worry, just make the pull request and seek help from maintainers.
2
+
3
+ ## Motivation
4
+
5
+ Please describe the motivation of this PR and the goal you want to achieve through this PR.
6
+
7
+ ## Modification
8
+
9
+ Please briefly describe what modification is made in this PR.
10
+
11
+ ## BC-breaking (Optional)
12
+
13
+ Does the modification introduce changes that break the backward-compatibility of the downstream repositories?
14
+ If so, please describe how it breaks the compatibility and how the downstream projects should modify their code to keep compatibility with this PR.
15
+
16
+ ## Use cases (Optional)
17
+
18
+ If this PR introduces a new feature, it is better to list some use cases here, and update the documentation.
19
+
20
+ ## Checklist
21
+
22
+ **Before PR**:
23
+
24
+ - [ ] I have read and followed the workflow indicated in the [CONTRIBUTING.md](https://github.com/open-mmlab/mmcv/blob/master/CONTRIBUTING.md) to create this PR.
25
+ - [ ] Pre-commit or linting tools indicated in [CONTRIBUTING.md](https://github.com/open-mmlab/mmcv/blob/master/CONTRIBUTING.md) are used to fix the potential lint issues.
26
+ - [ ] Bug fixes are covered by unit tests, the case that causes the bug should be added in the unit tests.
27
+ - [ ] New functionalities are covered by complete unit tests. If not, please add more unit test to ensure the correctness.
28
+ - [ ] The documentation has been modified accordingly, including docstring or example tutorials.
29
+
30
+ **After PR**:
31
+
32
+ - [ ] If the modification has potential influence on downstream or other related projects, this PR should be tested with some of those projects, like MMDet or MMCls.
33
+ - [ ] CLA has been signed and all committers have signed the CLA in this PR.
groundingLMM/mmcv/.github/workflows/build.yml ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: build
2
+
3
+ on:
4
+ push:
5
+ paths-ignore:
6
+ - 'README.md'
7
+ - 'README_zh-CN.md'
8
+ - 'docs/**'
9
+ - 'examples/**'
10
+ - '.dev_scripts/**'
11
+
12
+ pull_request:
13
+ paths-ignore:
14
+ - 'README.md'
15
+ - 'README_zh-CN.md'
16
+ - 'docs/**'
17
+ - 'examples/**'
18
+ - '.dev_scripts/**'
19
+
20
+ concurrency:
21
+ group: ${{ github.workflow }}-${{ github.ref }}
22
+ cancel-in-progress: true
23
+
24
+ env:
25
+ MMCV_WITH_OPS: 1
26
+
27
+ jobs:
28
+ build_without_torch:
29
+ runs-on: ubuntu-18.04
30
+ strategy:
31
+ matrix:
32
+ python-version: [3.7]
33
+ steps:
34
+ - uses: actions/checkout@v2
35
+ - name: Set up Python ${{ matrix.python-version }}
36
+ uses: actions/setup-python@v2
37
+ with:
38
+ python-version: ${{ matrix.python-version }}
39
+ - name: Install system dependencies
40
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg libturbojpeg
41
+ - name: Build and install
42
+ run: rm -rf .eggs && pip install -e .
43
+ - name: Validate the installation
44
+ run: python -c "import mmcv"
45
+ - name: Run unittests and generate coverage report
46
+ run: |
47
+ pip install -r requirements/test.txt
48
+ pytest tests/ --ignore=tests/test_runner --ignore=tests/test_optimizer.py --ignore=tests/test_cnn --ignore=tests/test_parallel.py --ignore=tests/test_ops --ignore=tests/test_load_model_zoo.py --ignore=tests/test_utils/test_logging.py --ignore=tests/test_image/test_io.py --ignore=tests/test_utils/test_registry.py --ignore=tests/test_utils/test_parrots_jit.py --ignore=tests/test_utils/test_trace.py --ignore=tests/test_utils/test_hub.py
49
+
50
+ build_without_ops:
51
+ runs-on: ubuntu-18.04
52
+ env:
53
+ MMCV_WITH_OPS: 0
54
+ strategy:
55
+ matrix:
56
+ python-version: [3.7]
57
+ torch: [1.7.0, 1.8.0, 1.9.0]
58
+ include:
59
+ - torch: 1.7.0
60
+ torchvision: 0.8.1
61
+ - torch: 1.8.0
62
+ torchvision: 0.9.0
63
+ - torch: 1.9.0
64
+ torchvision: 0.10.0
65
+ steps:
66
+ - uses: actions/checkout@v2
67
+ - name: Set up Python ${{ matrix.python-version }}
68
+ uses: actions/setup-python@v2
69
+ with:
70
+ python-version: ${{ matrix.python-version }}
71
+ - name: Install system dependencies
72
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg libturbojpeg
73
+ - name: Install PyTorch
74
+ run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
75
+ - name: Build and install
76
+ run: rm -rf .eggs && pip install -e .
77
+ - name: Validate the installation
78
+ run: python -c "import mmcv"
79
+ - name: Run unittests
80
+ run: |
81
+ pip install -r requirements/test.txt
82
+ pytest tests/ --ignore=tests/test_ops
83
+
84
+ build_cpu:
85
+ runs-on: ubuntu-18.04
86
+ strategy:
87
+ matrix:
88
+ python-version: [3.7]
89
+ torch: [1.5.1, 1.6.0, 1.7.0, 1.8.0, 1.9.0]
90
+ include:
91
+ - torch: 1.5.1
92
+ torchvision: 0.6.1
93
+ - torch: 1.6.0
94
+ torchvision: 0.7.0
95
+ - torch: 1.7.0
96
+ torchvision: 0.8.1
97
+ - torch: 1.8.0
98
+ torchvision: 0.9.0
99
+ - torch: 1.9.0
100
+ torchvision: 0.10.0
101
+ steps:
102
+ - uses: actions/checkout@v2
103
+ - name: Set up Python ${{ matrix.python-version }}
104
+ uses: actions/setup-python@v2
105
+ with:
106
+ python-version: ${{ matrix.python-version }}
107
+ - name: Install system dependencies
108
+ run: sudo apt-get update && sudo apt-get install -y ffmpeg libturbojpeg
109
+ - name: Install PyTorch
110
+ run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
111
+ # pstuil is an optional package to detect the number of CPU for compiling mmcv
112
+ - name: Install psutil
113
+ run: pip install psutil
114
+ - name: Create sdist and untar
115
+ run: |
116
+ MMCV_WITH_OPS=1 python setup.py sdist
117
+ tar zxvf dist/mmcv-full* -C /tmp
118
+ rm -r mmcv
119
+ - name: Build and install from sdist
120
+ run: |
121
+ pushd /tmp/mmcv-full*
122
+ pip install -e .
123
+ popd
124
+ - name: Validate the installation
125
+ run: python -c "import mmcv"
126
+ - name: Run unittests and generate coverage report
127
+ run: |
128
+ pip install -r requirements/test.txt
129
+ coverage run --branch --source=mmcv -m pytest tests/
130
+ coverage xml
131
+ coverage report -m
132
+
133
+ build_cu101:
134
+ runs-on: ubuntu-18.04
135
+ container:
136
+ image: pytorch/pytorch:1.6.0-cuda10.1-cudnn7-devel
137
+ env:
138
+ FORCE_CUDA: 1
139
+ MMCV_CUDA_ARGS: -gencode=arch=compute_61,code=sm_61
140
+ strategy:
141
+ matrix:
142
+ python-version: [3.7]
143
+ torch: [1.3.1, 1.5.1+cu101, 1.6.0+cu101, 1.7.0+cu101, 1.8.0+cu101]
144
+ include:
145
+ - torch: 1.3.1
146
+ torchvision: 0.4.2
147
+ - torch: 1.5.1+cu101
148
+ torchvision: 0.6.1+cu101
149
+ - torch: 1.6.0+cu101
150
+ torchvision: 0.7.0+cu101
151
+ - torch: 1.7.0+cu101
152
+ torchvision: 0.8.1+cu101
153
+ - torch: 1.8.0+cu101
154
+ torchvision: 0.9.0+cu101
155
+ - python-version: 3.6
156
+ torch: 1.8.0+cu101
157
+ torchvision: 0.9.0+cu101
158
+ - python-version: 3.8
159
+ torch: 1.8.0+cu101
160
+ torchvision: 0.9.0+cu101
161
+ - python-version: 3.9
162
+ torch: 1.8.0+cu101
163
+ torchvision: 0.9.0+cu101
164
+ steps:
165
+ - uses: actions/checkout@v2
166
+ - name: Set up Python ${{ matrix.python-version }}
167
+ uses: actions/setup-python@v2
168
+ with:
169
+ python-version: ${{ matrix.python-version }}
170
+ - name: Install python-dev
171
+ run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
172
+ if: ${{matrix.python-version != '3.9'}}
173
+ - name: Install Pillow
174
+ run: python -m pip install Pillow==6.2.2
175
+ if: ${{matrix.torchvision == '0.4.2'}}
176
+ # When we use a third-party container, we need to add python -m to call
177
+ # the user-installed pip when we use the pip command, otherwise it will
178
+ # call the system pip
179
+ - name: Install PyTorch
180
+ run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
181
+ - name: Install system dependencies
182
+ run: apt-get update && apt-get install -y ffmpeg libturbojpeg ninja-build
183
+ - name: Install dependencies for compiling onnx when python=3.9
184
+ run: python -m pip install protobuf && apt-get -y install libprotobuf-dev protobuf-compiler cmake
185
+ if: ${{matrix.python-version == '3.9'}}
186
+ # pstuil is an optional package to detect the number of CPU for compiling mmcv
187
+ - name: Install psutil
188
+ run: python -m pip install psutil
189
+ - name: Build and install
190
+ run: rm -rf .eggs && python -m pip install -e .
191
+ - name: Validate the installation
192
+ run: python -c "import mmcv"
193
+ - name: Run unittests and generate coverage report
194
+ run: |
195
+ python -m pip install -r requirements/test.txt
196
+ coverage run --branch --source=mmcv -m pytest tests/
197
+ coverage xml
198
+ coverage report -m
199
+ # Only upload coverage report for python3.7 && pytorch1.6
200
+ - name: Upload coverage to Codecov
201
+ if: ${{matrix.torch == '1.6.0+cu101' && matrix.python-version == '3.7'}}
202
+ uses: codecov/codecov-action@v1.0.14
203
+ with:
204
+ file: ./coverage.xml
205
+ flags: unittests
206
+ env_vars: OS,PYTHON
207
+ name: codecov-umbrella
208
+ fail_ci_if_error: false
209
+
210
+ build_cu102:
211
+ runs-on: ubuntu-18.04
212
+ container:
213
+ image: pytorch/pytorch:1.9.0-cuda10.2-cudnn7-devel
214
+ env:
215
+ FORCE_CUDA: 1
216
+ MMCV_CUDA_ARGS: -gencode=arch=compute_61,code=sm_61
217
+ strategy:
218
+ matrix:
219
+ python-version: [3.7]
220
+ torch: [1.9.0+cu102, 1.10.0+cu102, 1.11.0+cu102]
221
+ include:
222
+ - torch: 1.9.0+cu102
223
+ torchvision: 0.10.0+cu102
224
+ - torch: 1.10.0+cu102
225
+ torchvision: 0.11.0+cu102
226
+ - torch: 1.11.0+cu102
227
+ torchvision: 0.12.0+cu102
228
+ - python-version: 3.6
229
+ torch: 1.9.0+cu102
230
+ torchvision: 0.10.0+cu102
231
+ - python-version: 3.8
232
+ torch: 1.9.0+cu102
233
+ torchvision: 0.10.0+cu102
234
+ steps:
235
+ - uses: actions/checkout@v2
236
+ - name: Set up Python ${{ matrix.python-version }}
237
+ uses: actions/setup-python@v2
238
+ with:
239
+ python-version: ${{ matrix.python-version }}
240
+ - name: Install python-dev
241
+ run: apt-get update && apt-get install -y python${{matrix.python-version}}-dev
242
+ - name: python -m Install PyTorch
243
+ run: python -m pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} -f https://download.pytorch.org/whl/torch_stable.html
244
+ - name: Install system dependencies
245
+ run: apt-get update && apt-get install -y ffmpeg libturbojpeg ninja-build
246
+ # pstuil is an optional package to detect the number of CPU for compiling mmcv
247
+ - name: Install psutil
248
+ run: python -m pip install psutil
249
+ - name: Build and install
250
+ run: rm -rf .eggs && python -m pip install -e .
251
+ - name: Validate the installation
252
+ run: python -c "import mmcv"
253
+ - name: Run unittests and generate coverage report
254
+ run: |
255
+ python -m pip install -r requirements/test.txt
256
+ coverage run --branch --source=mmcv -m pytest tests/
257
+ coverage xml
258
+
259
+ build_windows_without_ops:
260
+ runs-on: windows-latest
261
+ env:
262
+ MMCV_WITH_OPS: 0
263
+ strategy:
264
+ matrix:
265
+ torch: [1.7.1, 1.8.0, 1.9.0]
266
+ include:
267
+ - torch: 1.7.1
268
+ torchvision: 0.8.2
269
+ - torch: 1.8.0
270
+ torchvision: 0.9.0
271
+ - torch: 1.9.0
272
+ torchvision: 0.10.0
273
+ steps:
274
+ - uses: actions/checkout@v2
275
+ - name: Set up Python 3.7
276
+ uses: actions/setup-python@v2
277
+ with:
278
+ python-version: 3.7
279
+ - name: Install PyTorch
280
+ run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu --no-cache-dir -f https://download.pytorch.org/whl/torch_stable.html
281
+ - name: Build and install
282
+ run: pip install -e .
283
+ - name: Validate the installation
284
+ run: python -c "import mmcv"
285
+ - name: Run unittests
286
+ run: |
287
+ pip install -r requirements/test.txt
288
+ pytest tests/ --ignore=tests/test_ops --ignore tests/test_utils/test_progressbar.py --ignore tests/test_utils/test_timer.py --ignore tests/test_image/test_io.py
289
+
290
+ build_windows:
291
+ runs-on: windows-latest
292
+ strategy:
293
+ matrix:
294
+ torch: [1.7.1, 1.8.0, 1.9.0]
295
+ include:
296
+ - torch: 1.7.1
297
+ torchvision: 0.8.2
298
+ - torch: 1.8.0
299
+ torchvision: 0.9.0
300
+ - torch: 1.9.0
301
+ torchvision: 0.10.0
302
+ steps:
303
+ - uses: actions/checkout@v2
304
+ - name: Set up Python 3.7
305
+ uses: actions/setup-python@v2
306
+ with:
307
+ python-version: 3.7
308
+ - name: Install PyTorch
309
+ run: pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu --no-cache-dir -f https://download.pytorch.org/whl/torch_stable.html
310
+ - name: Build and install
311
+ run: pip install -e .
312
+ - name: Validate the installation
313
+ run: python -c "import mmcv"
314
+ - name: Run unittests
315
+ run: |
316
+ pip install -r requirements/test.txt
317
+ pytest tests/ --ignore tests/test_utils/test_progressbar.py --ignore tests/test_utils/test_timer.py --ignore tests/test_image/test_io.py
318
+
319
+ build_macos:
320
+ runs-on: macos-latest
321
+ strategy:
322
+ matrix:
323
+ torch: [1.3.1, 1.5.1, 1.6.0, 1.7.0, 1.8.0, 1.9.0]
324
+ include:
325
+ - torch: 1.3.1
326
+ torchvision: 0.4.2
327
+ - torch: 1.5.1
328
+ torchvision: 0.6.1
329
+ - torch: 1.6.0
330
+ torchvision: 0.7.0
331
+ - torch: 1.7.0
332
+ torchvision: 0.8.1
333
+ - torch: 1.8.0
334
+ torchvision: 0.9.0
335
+ - torch: 1.9.0
336
+ torchvision: 0.10.0
337
+ steps:
338
+ - uses: actions/checkout@v2
339
+ - name: Set up Python 3.7
340
+ uses: actions/setup-python@v2
341
+ with:
342
+ python-version: 3.7
343
+ - name: Install system dependencies
344
+ run: brew install ffmpeg jpeg-turbo
345
+ - name: Install utils
346
+ run: pip install psutil
347
+ - name: Install Pillow
348
+ run: pip install Pillow==6.2.2
349
+ if: ${{matrix.torchvision == '0.4.2'}}
350
+ - name: Install PyTorch
351
+ run: pip install torch==${{matrix.torch}} torchvision==${{matrix.torchvision}} --no-cache-dir
352
+ - name: Build and install
353
+ run: |
354
+ rm -rf .eggs
355
+ CC=clang CXX=clang++ CFLAGS='-stdlib=libc++' pip install -e .
356
+ - name: Validate the installation
357
+ run: python -c "import mmcv"
358
+ - name: Run unittests
359
+ run: |
360
+ pip install -r requirements/test.txt
361
+ # The timing on macos VMs is not precise, so we skip the progressbar tests
362
+ pytest tests/ --ignore tests/test_utils/test_progressbar.py --ignore tests/test_utils/test_timer.py
groundingLMM/mmcv/.github/workflows/build_pat.yml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: build_pat
2
+
3
+ on: push
4
+
5
+ concurrency:
6
+ group: ${{ github.workflow }}-${{ github.ref }}
7
+ cancel-in-progress: true
8
+
9
+ env:
10
+ MMCV_WITH_OPS: 1
11
+
12
+ jobs:
13
+ build_parrots:
14
+ runs-on: ubuntu-18.04
15
+ container:
16
+ image: ghcr.io/zhouzaida/parrots-mmcv:1.3.4
17
+ credentials:
18
+ username: zhouzaida
19
+ password: ${{ secrets.CR_PAT }}
20
+
21
+ steps:
22
+ - uses: actions/checkout@v2
23
+ - name: Install unittest dependencies
24
+ run: pip install -r requirements/test.txt
25
+ - name: Build and install
26
+ run: rm -rf .eggs && MMCV_WITH_OPS=1 pip install -e .
groundingLMM/mmcv/.github/workflows/lint.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: lint
2
+
3
+ on: [push, pull_request]
4
+
5
+ concurrency:
6
+ group: ${{ github.workflow }}-${{ github.ref }}
7
+ cancel-in-progress: true
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-18.04
12
+ steps:
13
+ - uses: actions/checkout@v2
14
+ - name: Set up Python 3.7
15
+ uses: actions/setup-python@v2
16
+ with:
17
+ python-version: 3.7
18
+ - name: Install pre-commit hook
19
+ run: |
20
+ # markdownlint requires ruby >= 2.7
21
+ sudo apt-add-repository ppa:brightbox/ruby-ng -y
22
+ sudo apt-get update
23
+ sudo apt-get install -y ruby2.7
24
+ pip install pre-commit
25
+ pre-commit install
26
+ - name: Linting
27
+ run: pre-commit run --all-files
28
+ - name: Format c/cuda codes with clang-format
29
+ uses: DoozyX/clang-format-lint-action@v0.11
30
+ with:
31
+ source: mmcv/ops/csrc
32
+ extensions: h,c,cpp,hpp,cu,cuh
33
+ style: google
groundingLMM/mmcv/.github/workflows/publish-to-pypi.yml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: deploy
2
+
3
+ on: push
4
+
5
+ concurrency:
6
+ group: ${{ github.workflow }}-${{ github.ref }}
7
+ cancel-in-progress: true
8
+
9
+ jobs:
10
+ build-n-publish:
11
+ runs-on: ubuntu-18.04
12
+ if: startsWith(github.event.ref, 'refs/tags')
13
+ steps:
14
+ - uses: actions/checkout@v2
15
+ - name: Set up Python 3.7
16
+ uses: actions/setup-python@v1
17
+ with:
18
+ python-version: 3.7
19
+ - name: Upgrade Setuptools
20
+ run: pip install setuptools --upgrade
21
+ - name: Build MMCV
22
+ run: python setup.py sdist
23
+ - name: Publish distribution to PyPI
24
+ run: |
25
+ pip install twine
26
+ twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}
27
+
28
+ build-n-publish_with_ops:
29
+ runs-on: ubuntu-18.04
30
+ if: startsWith(github.event.ref, 'refs/tags')
31
+ steps:
32
+ - uses: actions/checkout@v2
33
+ - name: Set up Python 3.7
34
+ uses: actions/setup-python@v1
35
+ with:
36
+ python-version: 3.7
37
+ - name: Upgrade Setuptools
38
+ run: pip install setuptools --upgrade
39
+ - name: Build MMCV with ops
40
+ run: |
41
+ sed -i "s/os.getenv('MMCV_WITH_OPS', '0')/os.getenv('MMCV_WITH_OPS', '1')/g" setup.py
42
+ python setup.py sdist
43
+ - name: Publish distribution to PyPI
44
+ run: |
45
+ pip install twine
46
+ twine upload dist/* -u __token__ -p ${{ secrets.pypi_password }}
groundingLMM/mmcv/examples/train.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.nn.functional as F
4
+ import torch.optim as optim
5
+ import torchvision.transforms as transforms
6
+ from torch.utils.data import DataLoader
7
+ from torchvision.datasets import CIFAR10
8
+
9
+ from mmcv.parallel import MMDataParallel
10
+ from mmcv.runner import EpochBasedRunner
11
+ from mmcv.utils import get_logger
12
+
13
+
14
+ class Model(nn.Module):
15
+
16
+ def __init__(self):
17
+ super(Model, self).__init__()
18
+ self.conv1 = nn.Conv2d(3, 6, 5)
19
+ self.pool = nn.MaxPool2d(2, 2)
20
+ self.conv2 = nn.Conv2d(6, 16, 5)
21
+ self.fc1 = nn.Linear(16 * 5 * 5, 120)
22
+ self.fc2 = nn.Linear(120, 84)
23
+ self.fc3 = nn.Linear(84, 10)
24
+ self.loss_fn = nn.CrossEntropyLoss()
25
+
26
+ def forward(self, x):
27
+ x = self.pool(F.relu(self.conv1(x)))
28
+ x = self.pool(F.relu(self.conv2(x)))
29
+ x = x.view(-1, 16 * 5 * 5)
30
+ x = F.relu(self.fc1(x))
31
+ x = F.relu(self.fc2(x))
32
+ x = self.fc3(x)
33
+ return x
34
+
35
+ def train_step(self, data, optimizer):
36
+ images, labels = data
37
+ predicts = self(images) # -> self.__call__() -> self.forward()
38
+ loss = self.loss_fn(predicts, labels)
39
+ return {'loss': loss}
40
+
41
+
42
+ if __name__ == '__main__':
43
+ model = Model()
44
+ if torch.cuda.is_available():
45
+ # only use gpu:0 to train
46
+ # Solved issue https://github.com/open-mmlab/mmcv/issues/1470
47
+ model = MMDataParallel(model.cuda(), device_ids=[0])
48
+
49
+ # dataset and dataloader
50
+ transform = transforms.Compose([
51
+ transforms.ToTensor(),
52
+ transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
53
+ ])
54
+ trainset = CIFAR10(
55
+ root='data', train=True, download=True, transform=transform)
56
+ trainloader = DataLoader(
57
+ trainset, batch_size=128, shuffle=True, num_workers=2)
58
+
59
+ optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
60
+ logger = get_logger('mmcv')
61
+ # runner is a scheduler to manage the training
62
+ runner = EpochBasedRunner(
63
+ model,
64
+ optimizer=optimizer,
65
+ work_dir='./work_dir',
66
+ logger=logger,
67
+ max_epochs=4)
68
+
69
+ # learning rate scheduler config
70
+ lr_config = dict(policy='step', step=[2, 3])
71
+ # configuration of optimizer
72
+ optimizer_config = dict(grad_clip=None)
73
+ # configuration of saving checkpoints periodically
74
+ checkpoint_config = dict(interval=1)
75
+ # save log periodically and multiple hooks can be used simultaneously
76
+ log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')])
77
+ # register hooks to runner and those hooks will be invoked automatically
78
+ runner.register_training_hooks(
79
+ lr_config=lr_config,
80
+ optimizer_config=optimizer_config,
81
+ checkpoint_config=checkpoint_config,
82
+ log_config=log_config)
83
+
84
+ runner.run([trainloader], [('train', 1)])
groundingLMM/mmcv/mmcv.egg-info/PKG-INFO ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Metadata-Version: 2.1
2
+ Name: mmcv
3
+ Version: 1.4.7
4
+ Summary: OpenMMLab Computer Vision Foundation
5
+ Home-page: https://github.com/open-mmlab/mmcv
6
+ Author: MMCV Contributors
7
+ Author-email: openmmlab@gmail.com
8
+ Keywords: computer vision
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: License :: OSI Approved :: Apache Software License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.6
14
+ Classifier: Programming Language :: Python :: 3.7
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Topic :: Utilities
18
+ Provides-Extra: all
19
+ Provides-Extra: tests
20
+ Provides-Extra: build
21
+ Provides-Extra: optional
22
+ License-File: LICENSE
23
+ License-File: LICENSES.md
groundingLMM/mmcv/mmcv.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,877 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .dockerignore
2
+ .gitignore
3
+ .owners.yml
4
+ .pre-commit-config.yaml
5
+ .readthedocs.yml
6
+ CITATION.cff
7
+ CONTRIBUTING.md
8
+ Dockerfile
9
+ Jenkinsfile
10
+ LICENSE
11
+ LICENSES.md
12
+ MANIFEST.in
13
+ README.md
14
+ README_zh-CN.md
15
+ TERMINOLOGY.md
16
+ requirements.txt
17
+ setup.cfg
18
+ setup.py
19
+ .circleci/config.yml
20
+ .dev_scripts/check_installation.py
21
+ .github/pull_request_template.md
22
+ .github/ISSUE_TEMPLATE/config.yml
23
+ .github/ISSUE_TEMPLATE/feature_request.md
24
+ .github/ISSUE_TEMPLATE/general_questions.md
25
+ .github/ISSUE_TEMPLATE/unexpected_report.md
26
+ .github/workflows/build.yml
27
+ .github/workflows/build_pat.yml
28
+ .github/workflows/lint.yml
29
+ .github/workflows/publish-to-pypi.yml
30
+ docs/en/Makefile
31
+ docs/en/api.rst
32
+ docs/en/compatibility.md
33
+ docs/en/conf.py
34
+ docs/en/faq.md
35
+ docs/en/index.rst
36
+ docs/en/make.bat
37
+ docs/en/mmcv-logo.png
38
+ docs/en/_static/flow_img2toimg1.png
39
+ docs/en/_static/flow_raw_images.png
40
+ docs/en/_static/flow_visualization.png
41
+ docs/en/_static/flow_warp.png
42
+ docs/en/_static/flow_warp_diff.png
43
+ docs/en/_static/parallel_progress.gif
44
+ docs/en/_static/parallel_progress.png
45
+ docs/en/_static/progress.gif
46
+ docs/en/_static/progress.png
47
+ docs/en/_static/qq_group_qrcode.jpg
48
+ docs/en/_static/wechat_qrcode.jpg
49
+ docs/en/_static/zhihu_qrcode.jpg
50
+ docs/en/_static/community/1.png
51
+ docs/en/_static/community/2.png
52
+ docs/en/_static/community/3.png
53
+ docs/en/_static/css/readthedocs.css
54
+ docs/en/_static/image/mmcv-logo.png
55
+ docs/en/community/contributing.md
56
+ docs/en/community/pr.md
57
+ docs/en/deployment/mmcv_ops_definition.md
58
+ docs/en/deployment/onnx.md
59
+ docs/en/deployment/onnxruntime_custom_ops.md
60
+ docs/en/deployment/onnxruntime_op.md
61
+ docs/en/deployment/tensorrt_custom_ops.md
62
+ docs/en/deployment/tensorrt_plugin.md
63
+ docs/en/get_started/build.md
64
+ docs/en/get_started/installation.md
65
+ docs/en/get_started/introduction.md
66
+ docs/en/get_started/previous_versions.md
67
+ docs/en/understand_mmcv/cnn.md
68
+ docs/en/understand_mmcv/config.md
69
+ docs/en/understand_mmcv/data_process.md
70
+ docs/en/understand_mmcv/io.md
71
+ docs/en/understand_mmcv/ops.md
72
+ docs/en/understand_mmcv/registry.md
73
+ docs/en/understand_mmcv/runner.md
74
+ docs/en/understand_mmcv/utils.md
75
+ docs/en/understand_mmcv/visualization.md
76
+ docs/zh_cn/Makefile
77
+ docs/zh_cn/api.rst
78
+ docs/zh_cn/compatibility.md
79
+ docs/zh_cn/conf.py
80
+ docs/zh_cn/faq.md
81
+ docs/zh_cn/index.rst
82
+ docs/zh_cn/make.bat
83
+ docs/zh_cn/_static/css/readthedocs.css
84
+ docs/zh_cn/_static/image/mmcv-logo.png
85
+ docs/zh_cn/community/contributing.md
86
+ docs/zh_cn/community/pr.md
87
+ docs/zh_cn/deployment/onnx.md
88
+ docs/zh_cn/deployment/onnxruntime_custom_ops.md
89
+ docs/zh_cn/deployment/onnxruntime_op.md
90
+ docs/zh_cn/deployment/tensorrt_custom_ops.md
91
+ docs/zh_cn/deployment/tensorrt_plugin.md
92
+ docs/zh_cn/get_started/build.md
93
+ docs/zh_cn/get_started/installation.md
94
+ docs/zh_cn/get_started/introduction.md
95
+ docs/zh_cn/get_started/previous_versions.md
96
+ docs/zh_cn/understand_mmcv/cnn.md
97
+ docs/zh_cn/understand_mmcv/config.md
98
+ docs/zh_cn/understand_mmcv/data_process.md
99
+ docs/zh_cn/understand_mmcv/io.md
100
+ docs/zh_cn/understand_mmcv/ops.md
101
+ docs/zh_cn/understand_mmcv/registry.md
102
+ docs/zh_cn/understand_mmcv/runner.md
103
+ docs/zh_cn/understand_mmcv/utils.md
104
+ docs/zh_cn/understand_mmcv/visualization.md
105
+ examples/train.py
106
+ mmcv/__init__.py
107
+ mmcv/version.py
108
+ mmcv.egg-info/PKG-INFO
109
+ mmcv.egg-info/SOURCES.txt
110
+ mmcv.egg-info/dependency_links.txt
111
+ mmcv.egg-info/not-zip-safe
112
+ mmcv.egg-info/requires.txt
113
+ mmcv.egg-info/top_level.txt
114
+ mmcv/arraymisc/__init__.py
115
+ mmcv/arraymisc/quantization.py
116
+ mmcv/cnn/__init__.py
117
+ mmcv/cnn/alexnet.py
118
+ mmcv/cnn/builder.py
119
+ mmcv/cnn/resnet.py
120
+ mmcv/cnn/vgg.py
121
+ mmcv/cnn/bricks/__init__.py
122
+ mmcv/cnn/bricks/activation.py
123
+ mmcv/cnn/bricks/context_block.py
124
+ mmcv/cnn/bricks/conv.py
125
+ mmcv/cnn/bricks/conv2d_adaptive_padding.py
126
+ mmcv/cnn/bricks/conv_module.py
127
+ mmcv/cnn/bricks/conv_ws.py
128
+ mmcv/cnn/bricks/depthwise_separable_conv_module.py
129
+ mmcv/cnn/bricks/drop.py
130
+ mmcv/cnn/bricks/generalized_attention.py
131
+ mmcv/cnn/bricks/hsigmoid.py
132
+ mmcv/cnn/bricks/hswish.py
133
+ mmcv/cnn/bricks/non_local.py
134
+ mmcv/cnn/bricks/norm.py
135
+ mmcv/cnn/bricks/padding.py
136
+ mmcv/cnn/bricks/plugin.py
137
+ mmcv/cnn/bricks/registry.py
138
+ mmcv/cnn/bricks/scale.py
139
+ mmcv/cnn/bricks/swish.py
140
+ mmcv/cnn/bricks/transformer.py
141
+ mmcv/cnn/bricks/upsample.py
142
+ mmcv/cnn/bricks/wrappers.py
143
+ mmcv/cnn/utils/__init__.py
144
+ mmcv/cnn/utils/flops_counter.py
145
+ mmcv/cnn/utils/fuse_conv_bn.py
146
+ mmcv/cnn/utils/sync_bn.py
147
+ mmcv/cnn/utils/weight_init.py
148
+ mmcv/engine/__init__.py
149
+ mmcv/engine/test.py
150
+ mmcv/fileio/__init__.py
151
+ mmcv/fileio/file_client.py
152
+ mmcv/fileio/io.py
153
+ mmcv/fileio/parse.py
154
+ mmcv/fileio/handlers/__init__.py
155
+ mmcv/fileio/handlers/base.py
156
+ mmcv/fileio/handlers/json_handler.py
157
+ mmcv/fileio/handlers/pickle_handler.py
158
+ mmcv/fileio/handlers/yaml_handler.py
159
+ mmcv/image/__init__.py
160
+ mmcv/image/colorspace.py
161
+ mmcv/image/geometric.py
162
+ mmcv/image/io.py
163
+ mmcv/image/misc.py
164
+ mmcv/image/photometric.py
165
+ mmcv/model_zoo/deprecated.json
166
+ mmcv/model_zoo/mmcls.json
167
+ mmcv/model_zoo/open_mmlab.json
168
+ mmcv/onnx/__init__.py
169
+ mmcv/onnx/info.py
170
+ mmcv/onnx/symbolic.py
171
+ mmcv/onnx/onnx_utils/__init__.py
172
+ mmcv/onnx/onnx_utils/symbolic_helper.py
173
+ mmcv/ops/__init__.py
174
+ mmcv/ops/active_rotated_filter.py
175
+ mmcv/ops/assign_score_withk.py
176
+ mmcv/ops/ball_query.py
177
+ mmcv/ops/bbox.py
178
+ mmcv/ops/border_align.py
179
+ mmcv/ops/box_iou_rotated.py
180
+ mmcv/ops/carafe.py
181
+ mmcv/ops/cc_attention.py
182
+ mmcv/ops/contour_expand.py
183
+ mmcv/ops/convex_iou.py
184
+ mmcv/ops/corner_pool.py
185
+ mmcv/ops/correlation.py
186
+ mmcv/ops/deform_conv.py
187
+ mmcv/ops/deform_roi_pool.py
188
+ mmcv/ops/deprecated_wrappers.py
189
+ mmcv/ops/focal_loss.py
190
+ mmcv/ops/furthest_point_sample.py
191
+ mmcv/ops/fused_bias_leakyrelu.py
192
+ mmcv/ops/gather_points.py
193
+ mmcv/ops/group_points.py
194
+ mmcv/ops/info.py
195
+ mmcv/ops/iou3d.py
196
+ mmcv/ops/knn.py
197
+ mmcv/ops/masked_conv.py
198
+ mmcv/ops/merge_cells.py
199
+ mmcv/ops/min_area_polygons.py
200
+ mmcv/ops/modulated_deform_conv.py
201
+ mmcv/ops/multi_scale_deform_attn.py
202
+ mmcv/ops/nms.py
203
+ mmcv/ops/pixel_group.py
204
+ mmcv/ops/point_sample.py
205
+ mmcv/ops/points_in_boxes.py
206
+ mmcv/ops/points_in_polygons.py
207
+ mmcv/ops/points_sampler.py
208
+ mmcv/ops/psa_mask.py
209
+ mmcv/ops/riroi_align_rotated.py
210
+ mmcv/ops/roi_align.py
211
+ mmcv/ops/roi_align_rotated.py
212
+ mmcv/ops/roi_pool.py
213
+ mmcv/ops/roiaware_pool3d.py
214
+ mmcv/ops/roipoint_pool3d.py
215
+ mmcv/ops/rotated_feature_align.py
216
+ mmcv/ops/saconv.py
217
+ mmcv/ops/scatter_points.py
218
+ mmcv/ops/sparse_conv.py
219
+ mmcv/ops/sparse_functional.py
220
+ mmcv/ops/sparse_modules.py
221
+ mmcv/ops/sparse_ops.py
222
+ mmcv/ops/sparse_pool.py
223
+ mmcv/ops/sparse_structure.py
224
+ mmcv/ops/sync_bn.py
225
+ mmcv/ops/three_interpolate.py
226
+ mmcv/ops/three_nn.py
227
+ mmcv/ops/tin_shift.py
228
+ mmcv/ops/upfirdn2d.py
229
+ mmcv/ops/voxelize.py
230
+ mmcv/ops/csrc/README.md
231
+ mmcv/ops/csrc/common/box_iou_rotated_utils.hpp
232
+ mmcv/ops/csrc/common/parrots_cpp_helper.hpp
233
+ mmcv/ops/csrc/common/parrots_cuda_helper.hpp
234
+ mmcv/ops/csrc/common/pytorch_cpp_helper.hpp
235
+ mmcv/ops/csrc/common/pytorch_cuda_helper.hpp
236
+ mmcv/ops/csrc/common/pytorch_device_registry.hpp
237
+ mmcv/ops/csrc/common/cuda/active_rotated_filter_cuda_kernel.cuh
238
+ mmcv/ops/csrc/common/cuda/assign_score_withk_cuda_kernel.cuh
239
+ mmcv/ops/csrc/common/cuda/ball_query_cuda_kernel.cuh
240
+ mmcv/ops/csrc/common/cuda/bbox_overlaps_cuda_kernel.cuh
241
+ mmcv/ops/csrc/common/cuda/border_align_cuda_kernel.cuh
242
+ mmcv/ops/csrc/common/cuda/box_iou_rotated_cuda.cuh
243
+ mmcv/ops/csrc/common/cuda/carafe_cuda_kernel.cuh
244
+ mmcv/ops/csrc/common/cuda/carafe_naive_cuda_kernel.cuh
245
+ mmcv/ops/csrc/common/cuda/common_cuda_helper.hpp
246
+ mmcv/ops/csrc/common/cuda/convex_iou_cuda_kernel.cuh
247
+ mmcv/ops/csrc/common/cuda/correlation_cuda.cuh
248
+ mmcv/ops/csrc/common/cuda/deform_conv_cuda_kernel.cuh
249
+ mmcv/ops/csrc/common/cuda/deform_roi_pool_cuda_kernel.cuh
250
+ mmcv/ops/csrc/common/cuda/furthest_point_sample_cuda_kernel.cuh
251
+ mmcv/ops/csrc/common/cuda/gather_points_cuda_kernel.cuh
252
+ mmcv/ops/csrc/common/cuda/group_points_cuda_kernel.cuh
253
+ mmcv/ops/csrc/common/cuda/iou3d_cuda_kernel.cuh
254
+ mmcv/ops/csrc/common/cuda/knn_cuda_kernel.cuh
255
+ mmcv/ops/csrc/common/cuda/masked_conv2d_cuda_kernel.cuh
256
+ mmcv/ops/csrc/common/cuda/min_area_polygons_cuda.cuh
257
+ mmcv/ops/csrc/common/cuda/modulated_deform_conv_cuda_kernel.cuh
258
+ mmcv/ops/csrc/common/cuda/ms_deform_attn_cuda_kernel.cuh
259
+ mmcv/ops/csrc/common/cuda/nms_cuda_kernel.cuh
260
+ mmcv/ops/csrc/common/cuda/nms_rotated_cuda.cuh
261
+ mmcv/ops/csrc/common/cuda/parrots_cudawarpfunction.cuh
262
+ mmcv/ops/csrc/common/cuda/points_in_boxes_cuda_kernel.cuh
263
+ mmcv/ops/csrc/common/cuda/points_in_polygons_cuda_kernel.cuh
264
+ mmcv/ops/csrc/common/cuda/psamask_cuda_kernel.cuh
265
+ mmcv/ops/csrc/common/cuda/riroi_align_rotated_cuda_kernel.cuh
266
+ mmcv/ops/csrc/common/cuda/roi_align_cuda_kernel.cuh
267
+ mmcv/ops/csrc/common/cuda/roi_align_rotated_cuda_kernel.cuh
268
+ mmcv/ops/csrc/common/cuda/roi_pool_cuda_kernel.cuh
269
+ mmcv/ops/csrc/common/cuda/roiaware_pool3d_cuda_kernel.cuh
270
+ mmcv/ops/csrc/common/cuda/roipoint_pool3d_cuda_kernel.cuh
271
+ mmcv/ops/csrc/common/cuda/rotated_feature_align_cuda_kernel.cuh
272
+ mmcv/ops/csrc/common/cuda/scatter_points_cuda_kernel.cuh
273
+ mmcv/ops/csrc/common/cuda/sigmoid_focal_loss_cuda_kernel.cuh
274
+ mmcv/ops/csrc/common/cuda/softmax_focal_loss_cuda_kernel.cuh
275
+ mmcv/ops/csrc/common/cuda/sync_bn_cuda_kernel.cuh
276
+ mmcv/ops/csrc/common/cuda/three_interpolate_cuda_kernel.cuh
277
+ mmcv/ops/csrc/common/cuda/three_nn_cuda_kernel.cuh
278
+ mmcv/ops/csrc/common/cuda/tin_shift_cuda_kernel.cuh
279
+ mmcv/ops/csrc/common/cuda/voxelization_cuda_kernel.cuh
280
+ mmcv/ops/csrc/common/cuda/spconv/indice.cuh
281
+ mmcv/ops/csrc/common/cuda/spconv/reordering.cuh
282
+ mmcv/ops/csrc/common/utils/spconv/paramsgrid.h
283
+ mmcv/ops/csrc/common/utils/spconv/prettyprint.h
284
+ mmcv/ops/csrc/common/utils/spconv/pybind11_utils.h
285
+ mmcv/ops/csrc/common/utils/spconv/spconv/geometry.h
286
+ mmcv/ops/csrc/common/utils/spconv/spconv/indice.h
287
+ mmcv/ops/csrc/common/utils/spconv/spconv/maxpool.h
288
+ mmcv/ops/csrc/common/utils/spconv/spconv/mp_helper.h
289
+ mmcv/ops/csrc/common/utils/spconv/spconv/point2voxel.h
290
+ mmcv/ops/csrc/common/utils/spconv/spconv/reordering.h
291
+ mmcv/ops/csrc/common/utils/spconv/tensorview/helper_kernel.cuh
292
+ mmcv/ops/csrc/common/utils/spconv/tensorview/helper_launch.h
293
+ mmcv/ops/csrc/common/utils/spconv/tensorview/tensorview.h
294
+ mmcv/ops/csrc/onnxruntime/corner_pool.h
295
+ mmcv/ops/csrc/onnxruntime/deform_conv.h
296
+ mmcv/ops/csrc/onnxruntime/grid_sample.h
297
+ mmcv/ops/csrc/onnxruntime/modulated_deform_conv.h
298
+ mmcv/ops/csrc/onnxruntime/nms.h
299
+ mmcv/ops/csrc/onnxruntime/onnxruntime_register.h
300
+ mmcv/ops/csrc/onnxruntime/onnxruntime_session_options_config_keys.h
301
+ mmcv/ops/csrc/onnxruntime/ort_mmcv_utils.h
302
+ mmcv/ops/csrc/onnxruntime/reduce_ops.h
303
+ mmcv/ops/csrc/onnxruntime/roi_align.h
304
+ mmcv/ops/csrc/onnxruntime/roi_align_rotated.h
305
+ mmcv/ops/csrc/onnxruntime/soft_nms.h
306
+ mmcv/ops/csrc/onnxruntime/cpu/corner_pool.cpp
307
+ mmcv/ops/csrc/onnxruntime/cpu/deform_conv.cpp
308
+ mmcv/ops/csrc/onnxruntime/cpu/gridSample.cpp
309
+ mmcv/ops/csrc/onnxruntime/cpu/modulated_deform_conv.cpp
310
+ mmcv/ops/csrc/onnxruntime/cpu/nms.cpp
311
+ mmcv/ops/csrc/onnxruntime/cpu/onnxruntime_register.cpp
312
+ mmcv/ops/csrc/onnxruntime/cpu/reduce_ops.cpp
313
+ mmcv/ops/csrc/onnxruntime/cpu/roi_align.cpp
314
+ mmcv/ops/csrc/onnxruntime/cpu/roi_align_rotated.cpp
315
+ mmcv/ops/csrc/onnxruntime/cpu/soft_nms.cpp
316
+ mmcv/ops/csrc/parrots/active_rotated_filter.cpp
317
+ mmcv/ops/csrc/parrots/active_rotated_filter_parrots.cpp
318
+ mmcv/ops/csrc/parrots/active_rotated_filter_pytorch.h
319
+ mmcv/ops/csrc/parrots/assign_score_withk.cpp
320
+ mmcv/ops/csrc/parrots/assign_score_withk_parrots.cpp
321
+ mmcv/ops/csrc/parrots/assign_score_withk_pytorch.h
322
+ mmcv/ops/csrc/parrots/ball_query._parrots.cpp
323
+ mmcv/ops/csrc/parrots/ball_query.cpp
324
+ mmcv/ops/csrc/parrots/ball_query_pytorch.h
325
+ mmcv/ops/csrc/parrots/bbox_overlaps.cpp
326
+ mmcv/ops/csrc/parrots/bbox_overlaps_parrots.cpp
327
+ mmcv/ops/csrc/parrots/bbox_overlaps_pytorch.h
328
+ mmcv/ops/csrc/parrots/border_align.cpp
329
+ mmcv/ops/csrc/parrots/border_align_parrots.cpp
330
+ mmcv/ops/csrc/parrots/border_align_pytorch.h
331
+ mmcv/ops/csrc/parrots/box_iou_rotated.cpp
332
+ mmcv/ops/csrc/parrots/box_iou_rotated_parrots.cpp
333
+ mmcv/ops/csrc/parrots/box_iou_rotated_pytorch.h
334
+ mmcv/ops/csrc/parrots/carafe.cpp
335
+ mmcv/ops/csrc/parrots/carafe_naive.cpp
336
+ mmcv/ops/csrc/parrots/carafe_naive_parrots.cpp
337
+ mmcv/ops/csrc/parrots/carafe_naive_pytorch.h
338
+ mmcv/ops/csrc/parrots/carafe_parrots.cpp
339
+ mmcv/ops/csrc/parrots/carafe_pytorch.h
340
+ mmcv/ops/csrc/parrots/contour_expand.cpp
341
+ mmcv/ops/csrc/parrots/contour_expand_parrots.cpp
342
+ mmcv/ops/csrc/parrots/contour_expand_pytorch.h
343
+ mmcv/ops/csrc/parrots/convex_iou.cpp
344
+ mmcv/ops/csrc/parrots/convex_iou_parrots.cpp
345
+ mmcv/ops/csrc/parrots/convex_iou_pytorch.h
346
+ mmcv/ops/csrc/parrots/corner_pool.cpp
347
+ mmcv/ops/csrc/parrots/corner_pool_parrots.cpp
348
+ mmcv/ops/csrc/parrots/corner_pool_pytorch.h
349
+ mmcv/ops/csrc/parrots/correlation.cpp
350
+ mmcv/ops/csrc/parrots/correlation_parrots.cpp
351
+ mmcv/ops/csrc/parrots/correlation_pytorch.h
352
+ mmcv/ops/csrc/parrots/cudabind.cpp
353
+ mmcv/ops/csrc/parrots/deform_conv.cpp
354
+ mmcv/ops/csrc/parrots/deform_conv_parrots.cpp
355
+ mmcv/ops/csrc/parrots/deform_conv_pytorch.h
356
+ mmcv/ops/csrc/parrots/deform_roi_pool.cpp
357
+ mmcv/ops/csrc/parrots/deform_roi_pool_parrots.cpp
358
+ mmcv/ops/csrc/parrots/deform_roi_pool_pytorch.h
359
+ mmcv/ops/csrc/parrots/focal_loss.cpp
360
+ mmcv/ops/csrc/parrots/focal_loss_parrots.cpp
361
+ mmcv/ops/csrc/parrots/focal_loss_pytorch.h
362
+ mmcv/ops/csrc/parrots/furthest_point_sample.cpp
363
+ mmcv/ops/csrc/parrots/furthest_point_sample_parrots.cpp
364
+ mmcv/ops/csrc/parrots/furthest_point_sample_pytorch.h
365
+ mmcv/ops/csrc/parrots/fused_bias_leakyrelu.cpp
366
+ mmcv/ops/csrc/parrots/fused_bias_parrots.cpp
367
+ mmcv/ops/csrc/parrots/gather_points.cpp
368
+ mmcv/ops/csrc/parrots/gather_points_parrots.cpp
369
+ mmcv/ops/csrc/parrots/gather_points_pytorch.h
370
+ mmcv/ops/csrc/parrots/group_points.cpp
371
+ mmcv/ops/csrc/parrots/group_points_parrots.cpp
372
+ mmcv/ops/csrc/parrots/group_points_pytorch.h
373
+ mmcv/ops/csrc/parrots/info.cpp
374
+ mmcv/ops/csrc/parrots/iou3d.cpp
375
+ mmcv/ops/csrc/parrots/iou3d_parrots.cpp
376
+ mmcv/ops/csrc/parrots/iou3d_pytorch.h
377
+ mmcv/ops/csrc/parrots/knn.cpp
378
+ mmcv/ops/csrc/parrots/knn_parrots.cpp
379
+ mmcv/ops/csrc/parrots/knn_pytorch.h
380
+ mmcv/ops/csrc/parrots/masked_conv2d.cpp
381
+ mmcv/ops/csrc/parrots/masked_conv2d_parrots.cpp
382
+ mmcv/ops/csrc/parrots/masked_conv2d_pytorch.h
383
+ mmcv/ops/csrc/parrots/min_area_polygons.cpp
384
+ mmcv/ops/csrc/parrots/min_area_polygons_parrots.cpp
385
+ mmcv/ops/csrc/parrots/min_area_polygons_pytorch.h
386
+ mmcv/ops/csrc/parrots/modulated_deform_conv.cpp
387
+ mmcv/ops/csrc/parrots/modulated_deform_conv_parrots.cpp
388
+ mmcv/ops/csrc/parrots/modulated_deform_conv_pytorch.h
389
+ mmcv/ops/csrc/parrots/ms_deform_attn.cpp
390
+ mmcv/ops/csrc/parrots/ms_deform_attn_parrots.cpp
391
+ mmcv/ops/csrc/parrots/nms.cpp
392
+ mmcv/ops/csrc/parrots/nms_parrots.cpp
393
+ mmcv/ops/csrc/parrots/nms_pytorch.h
394
+ mmcv/ops/csrc/parrots/nms_rotated.cpp
395
+ mmcv/ops/csrc/parrots/pixel_group.cpp
396
+ mmcv/ops/csrc/parrots/pixel_group_parrots.cpp
397
+ mmcv/ops/csrc/parrots/pixel_group_pytorch.h
398
+ mmcv/ops/csrc/parrots/points_in_boxes.cpp
399
+ mmcv/ops/csrc/parrots/points_in_boxes_parrots.cpp
400
+ mmcv/ops/csrc/parrots/points_in_boxes_pytorch.h
401
+ mmcv/ops/csrc/parrots/points_in_polygons.cpp
402
+ mmcv/ops/csrc/parrots/points_in_polygons_parrots.cpp
403
+ mmcv/ops/csrc/parrots/points_in_polygons_pytorch.h
404
+ mmcv/ops/csrc/parrots/psamask.cpp
405
+ mmcv/ops/csrc/parrots/psamask_parrots.cpp
406
+ mmcv/ops/csrc/parrots/psamask_pytorch.h
407
+ mmcv/ops/csrc/parrots/riroi_align_rotated.cpp
408
+ mmcv/ops/csrc/parrots/riroi_align_rotated_parrots.cpp
409
+ mmcv/ops/csrc/parrots/riroi_align_rotated_pytorch.h
410
+ mmcv/ops/csrc/parrots/roi_align.cpp
411
+ mmcv/ops/csrc/parrots/roi_align_parrots.cpp
412
+ mmcv/ops/csrc/parrots/roi_align_pytorch.h
413
+ mmcv/ops/csrc/parrots/roi_align_rotated.cpp
414
+ mmcv/ops/csrc/parrots/roi_align_rotated_parrots.cpp
415
+ mmcv/ops/csrc/parrots/roi_align_rotated_pytorch.h
416
+ mmcv/ops/csrc/parrots/roi_pool.cpp
417
+ mmcv/ops/csrc/parrots/roi_pool_parrots.cpp
418
+ mmcv/ops/csrc/parrots/roi_pool_pytorch.h
419
+ mmcv/ops/csrc/parrots/roiaware_pool3d.cpp
420
+ mmcv/ops/csrc/parrots/roiaware_pool3d_parrots.cpp
421
+ mmcv/ops/csrc/parrots/roiaware_pool3d_pytorch.h
422
+ mmcv/ops/csrc/parrots/roipoint_pool3d.cpp
423
+ mmcv/ops/csrc/parrots/roipoint_pool3d_parrots.cpp
424
+ mmcv/ops/csrc/parrots/roipoint_pool3d_pytorch.h
425
+ mmcv/ops/csrc/parrots/rotated_feature_align.cpp
426
+ mmcv/ops/csrc/parrots/rotated_feature_align_parrots.cpp
427
+ mmcv/ops/csrc/parrots/rotated_feature_align_pytorch.h
428
+ mmcv/ops/csrc/parrots/sync_bn.cpp
429
+ mmcv/ops/csrc/parrots/sync_bn_parrots.cpp
430
+ mmcv/ops/csrc/parrots/sync_bn_pytorch.h
431
+ mmcv/ops/csrc/parrots/three_interpolate.cpp
432
+ mmcv/ops/csrc/parrots/three_interpolate_parrots.cpp
433
+ mmcv/ops/csrc/parrots/three_interpolate_pytorch.h
434
+ mmcv/ops/csrc/parrots/three_nn.cpp
435
+ mmcv/ops/csrc/parrots/three_nn_parrots.cpp
436
+ mmcv/ops/csrc/parrots/three_nn_pytorch.h
437
+ mmcv/ops/csrc/parrots/tin_shift.cpp
438
+ mmcv/ops/csrc/parrots/tin_shift_parrots.cpp
439
+ mmcv/ops/csrc/parrots/tin_shift_pytorch.h
440
+ mmcv/ops/csrc/parrots/upfirdn2d.cpp
441
+ mmcv/ops/csrc/parrots/upfirdn2d_parrots.cpp
442
+ mmcv/ops/csrc/parrots/voxelization.cpp
443
+ mmcv/ops/csrc/parrots/voxelization_parrots.cpp
444
+ mmcv/ops/csrc/parrots/voxelization_pytorch.h
445
+ mmcv/ops/csrc/pytorch/active_rotated_filter.cpp
446
+ mmcv/ops/csrc/pytorch/assign_score_withk.cpp
447
+ mmcv/ops/csrc/pytorch/ball_query.cpp
448
+ mmcv/ops/csrc/pytorch/bbox_overlaps.cpp
449
+ mmcv/ops/csrc/pytorch/border_align.cpp
450
+ mmcv/ops/csrc/pytorch/box_iou_rotated.cpp
451
+ mmcv/ops/csrc/pytorch/carafe.cpp
452
+ mmcv/ops/csrc/pytorch/carafe_naive.cpp
453
+ mmcv/ops/csrc/pytorch/contour_expand.cpp
454
+ mmcv/ops/csrc/pytorch/convex_iou.cpp
455
+ mmcv/ops/csrc/pytorch/corner_pool.cpp
456
+ mmcv/ops/csrc/pytorch/correlation.cpp
457
+ mmcv/ops/csrc/pytorch/deform_conv.cpp
458
+ mmcv/ops/csrc/pytorch/deform_roi_pool.cpp
459
+ mmcv/ops/csrc/pytorch/focal_loss.cpp
460
+ mmcv/ops/csrc/pytorch/furthest_point_sample.cpp
461
+ mmcv/ops/csrc/pytorch/fused_bias_leakyrelu.cpp
462
+ mmcv/ops/csrc/pytorch/fused_spconv_ops.cpp
463
+ mmcv/ops/csrc/pytorch/gather_points.cpp
464
+ mmcv/ops/csrc/pytorch/group_points.cpp
465
+ mmcv/ops/csrc/pytorch/info.cpp
466
+ mmcv/ops/csrc/pytorch/iou3d.cpp
467
+ mmcv/ops/csrc/pytorch/knn.cpp
468
+ mmcv/ops/csrc/pytorch/masked_conv2d.cpp
469
+ mmcv/ops/csrc/pytorch/min_area_polygons.cpp
470
+ mmcv/ops/csrc/pytorch/modulated_deform_conv.cpp
471
+ mmcv/ops/csrc/pytorch/ms_deform_attn.cpp
472
+ mmcv/ops/csrc/pytorch/nms.cpp
473
+ mmcv/ops/csrc/pytorch/nms_rotated.cpp
474
+ mmcv/ops/csrc/pytorch/pixel_group.cpp
475
+ mmcv/ops/csrc/pytorch/points_in_boxes.cpp
476
+ mmcv/ops/csrc/pytorch/points_in_polygons.cpp
477
+ mmcv/ops/csrc/pytorch/psamask.cpp
478
+ mmcv/ops/csrc/pytorch/pybind.cpp
479
+ mmcv/ops/csrc/pytorch/riroi_align_rotated.cpp
480
+ mmcv/ops/csrc/pytorch/roi_align.cpp
481
+ mmcv/ops/csrc/pytorch/roi_align_rotated.cpp
482
+ mmcv/ops/csrc/pytorch/roi_pool.cpp
483
+ mmcv/ops/csrc/pytorch/roiaware_pool3d.cpp
484
+ mmcv/ops/csrc/pytorch/roipoint_pool3d.cpp
485
+ mmcv/ops/csrc/pytorch/rotated_feature_align.cpp
486
+ mmcv/ops/csrc/pytorch/scatter_points.cpp
487
+ mmcv/ops/csrc/pytorch/sparse_pool_ops.cpp
488
+ mmcv/ops/csrc/pytorch/spconv_ops.cpp
489
+ mmcv/ops/csrc/pytorch/spconv_utils.h
490
+ mmcv/ops/csrc/pytorch/sync_bn.cpp
491
+ mmcv/ops/csrc/pytorch/three_interpolate.cpp
492
+ mmcv/ops/csrc/pytorch/three_nn.cpp
493
+ mmcv/ops/csrc/pytorch/tin_shift.cpp
494
+ mmcv/ops/csrc/pytorch/upfirdn2d.cpp
495
+ mmcv/ops/csrc/pytorch/voxelization.cpp
496
+ mmcv/ops/csrc/pytorch/cpu/active_rotated_filter.cpp
497
+ mmcv/ops/csrc/pytorch/cpu/box_iou_rotated.cpp
498
+ mmcv/ops/csrc/pytorch/cpu/deform_conv.cpp
499
+ mmcv/ops/csrc/pytorch/cpu/modulated_deform_conv.cpp
500
+ mmcv/ops/csrc/pytorch/cpu/nms.cpp
501
+ mmcv/ops/csrc/pytorch/cpu/nms_rotated.cpp
502
+ mmcv/ops/csrc/pytorch/cpu/pixel_group.cpp
503
+ mmcv/ops/csrc/pytorch/cpu/points_in_boxes.cpp
504
+ mmcv/ops/csrc/pytorch/cpu/psamask.cpp
505
+ mmcv/ops/csrc/pytorch/cpu/roi_align.cpp
506
+ mmcv/ops/csrc/pytorch/cpu/roi_align_rotated.cpp
507
+ mmcv/ops/csrc/pytorch/cpu/sparse_indice.cpp
508
+ mmcv/ops/csrc/pytorch/cpu/sparse_maxpool.cpp
509
+ mmcv/ops/csrc/pytorch/cpu/sparse_reordering.cpp
510
+ mmcv/ops/csrc/pytorch/cpu/voxelization.cpp
511
+ mmcv/ops/csrc/pytorch/cuda/active_rotated_filter_cuda.cu
512
+ mmcv/ops/csrc/pytorch/cuda/assign_score_withk_cuda.cu
513
+ mmcv/ops/csrc/pytorch/cuda/ball_query_cuda.cu
514
+ mmcv/ops/csrc/pytorch/cuda/bbox_overlaps_cuda.cu
515
+ mmcv/ops/csrc/pytorch/cuda/border_align_cuda.cu
516
+ mmcv/ops/csrc/pytorch/cuda/box_iou_rotated_cuda.cu
517
+ mmcv/ops/csrc/pytorch/cuda/carafe_cuda.cu
518
+ mmcv/ops/csrc/pytorch/cuda/carafe_naive_cuda.cu
519
+ mmcv/ops/csrc/pytorch/cuda/convex_iou.cu
520
+ mmcv/ops/csrc/pytorch/cuda/correlation_cuda.cu
521
+ mmcv/ops/csrc/pytorch/cuda/cudabind.cpp
522
+ mmcv/ops/csrc/pytorch/cuda/deform_conv_cuda.cu
523
+ mmcv/ops/csrc/pytorch/cuda/deform_roi_pool_cuda.cu
524
+ mmcv/ops/csrc/pytorch/cuda/focal_loss_cuda.cu
525
+ mmcv/ops/csrc/pytorch/cuda/furthest_point_sample_cuda.cu
526
+ mmcv/ops/csrc/pytorch/cuda/fused_bias_leakyrelu_cuda.cu
527
+ mmcv/ops/csrc/pytorch/cuda/fused_spconv_ops_cuda.cu
528
+ mmcv/ops/csrc/pytorch/cuda/gather_points_cuda.cu
529
+ mmcv/ops/csrc/pytorch/cuda/group_points_cuda.cu
530
+ mmcv/ops/csrc/pytorch/cuda/iou3d_cuda.cu
531
+ mmcv/ops/csrc/pytorch/cuda/knn_cuda.cu
532
+ mmcv/ops/csrc/pytorch/cuda/masked_conv2d_cuda.cu
533
+ mmcv/ops/csrc/pytorch/cuda/min_area_polygons.cu
534
+ mmcv/ops/csrc/pytorch/cuda/modulated_deform_conv_cuda.cu
535
+ mmcv/ops/csrc/pytorch/cuda/ms_deform_attn_cuda.cu
536
+ mmcv/ops/csrc/pytorch/cuda/nms_cuda.cu
537
+ mmcv/ops/csrc/pytorch/cuda/nms_rotated_cuda.cu
538
+ mmcv/ops/csrc/pytorch/cuda/points_in_boxes_cuda.cu
539
+ mmcv/ops/csrc/pytorch/cuda/points_in_polygons_cuda.cu
540
+ mmcv/ops/csrc/pytorch/cuda/psamask_cuda.cu
541
+ mmcv/ops/csrc/pytorch/cuda/riroi_align_rotated_cuda.cu
542
+ mmcv/ops/csrc/pytorch/cuda/roi_align_cuda.cu
543
+ mmcv/ops/csrc/pytorch/cuda/roi_align_rotated_cuda.cu
544
+ mmcv/ops/csrc/pytorch/cuda/roi_pool_cuda.cu
545
+ mmcv/ops/csrc/pytorch/cuda/roiaware_pool3d_cuda.cu
546
+ mmcv/ops/csrc/pytorch/cuda/roipoint_pool3d_cuda.cu
547
+ mmcv/ops/csrc/pytorch/cuda/rotated_feature_align_cuda.cu
548
+ mmcv/ops/csrc/pytorch/cuda/scatter_points_cuda.cu
549
+ mmcv/ops/csrc/pytorch/cuda/sparse_indice.cu
550
+ mmcv/ops/csrc/pytorch/cuda/sparse_maxpool.cu
551
+ mmcv/ops/csrc/pytorch/cuda/sparse_pool_ops_cuda.cu
552
+ mmcv/ops/csrc/pytorch/cuda/sparse_reordering.cu
553
+ mmcv/ops/csrc/pytorch/cuda/spconv_ops_cuda.cu
554
+ mmcv/ops/csrc/pytorch/cuda/sync_bn_cuda.cu
555
+ mmcv/ops/csrc/pytorch/cuda/three_interpolate_cuda.cu
556
+ mmcv/ops/csrc/pytorch/cuda/three_nn_cuda.cu
557
+ mmcv/ops/csrc/pytorch/cuda/tin_shift_cuda.cu
558
+ mmcv/ops/csrc/pytorch/cuda/upfirdn2d_kernel.cu
559
+ mmcv/ops/csrc/pytorch/cuda/voxelization_cuda.cu
560
+ mmcv/ops/csrc/tensorrt/trt_corner_pool.hpp
561
+ mmcv/ops/csrc/tensorrt/trt_cuda_helper.cuh
562
+ mmcv/ops/csrc/tensorrt/trt_cummaxmin.hpp
563
+ mmcv/ops/csrc/tensorrt/trt_deform_conv.hpp
564
+ mmcv/ops/csrc/tensorrt/trt_grid_sampler.hpp
565
+ mmcv/ops/csrc/tensorrt/trt_instance_norm.hpp
566
+ mmcv/ops/csrc/tensorrt/trt_modulated_deform_conv.hpp
567
+ mmcv/ops/csrc/tensorrt/trt_nms.hpp
568
+ mmcv/ops/csrc/tensorrt/trt_plugin.hpp
569
+ mmcv/ops/csrc/tensorrt/trt_plugin_helper.hpp
570
+ mmcv/ops/csrc/tensorrt/trt_roi_align.hpp
571
+ mmcv/ops/csrc/tensorrt/trt_scatternd.hpp
572
+ mmcv/ops/csrc/tensorrt/trt_serialize.hpp
573
+ mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool.cpp
574
+ mmcv/ops/csrc/tensorrt/plugins/trt_corner_pool_kernel.cu
575
+ mmcv/ops/csrc/tensorrt/plugins/trt_cuda_helper.cu
576
+ mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin.cpp
577
+ mmcv/ops/csrc/tensorrt/plugins/trt_cummaxmin_kernel.cu
578
+ mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv.cpp
579
+ mmcv/ops/csrc/tensorrt/plugins/trt_deform_conv_kernel.cu
580
+ mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler.cpp
581
+ mmcv/ops/csrc/tensorrt/plugins/trt_grid_sampler_kernel.cu
582
+ mmcv/ops/csrc/tensorrt/plugins/trt_instance_norm.cpp
583
+ mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv.cpp
584
+ mmcv/ops/csrc/tensorrt/plugins/trt_modulated_deform_conv_kernel.cu
585
+ mmcv/ops/csrc/tensorrt/plugins/trt_nms.cpp
586
+ mmcv/ops/csrc/tensorrt/plugins/trt_nms_kernel.cu
587
+ mmcv/ops/csrc/tensorrt/plugins/trt_plugin.cpp
588
+ mmcv/ops/csrc/tensorrt/plugins/trt_roi_align.cpp
589
+ mmcv/ops/csrc/tensorrt/plugins/trt_roi_align_kernel.cu
590
+ mmcv/ops/csrc/tensorrt/plugins/trt_scatternd.cpp
591
+ mmcv/ops/csrc/tensorrt/plugins/trt_scatternd_kernel.cu
592
+ mmcv/parallel/__init__.py
593
+ mmcv/parallel/_functions.py
594
+ mmcv/parallel/collate.py
595
+ mmcv/parallel/data_container.py
596
+ mmcv/parallel/data_parallel.py
597
+ mmcv/parallel/distributed.py
598
+ mmcv/parallel/distributed_deprecated.py
599
+ mmcv/parallel/registry.py
600
+ mmcv/parallel/scatter_gather.py
601
+ mmcv/parallel/utils.py
602
+ mmcv/runner/__init__.py
603
+ mmcv/runner/base_module.py
604
+ mmcv/runner/base_runner.py
605
+ mmcv/runner/builder.py
606
+ mmcv/runner/checkpoint.py
607
+ mmcv/runner/default_constructor.py
608
+ mmcv/runner/dist_utils.py
609
+ mmcv/runner/epoch_based_runner.py
610
+ mmcv/runner/fp16_utils.py
611
+ mmcv/runner/iter_based_runner.py
612
+ mmcv/runner/log_buffer.py
613
+ mmcv/runner/priority.py
614
+ mmcv/runner/utils.py
615
+ mmcv/runner/hooks/__init__.py
616
+ mmcv/runner/hooks/checkpoint.py
617
+ mmcv/runner/hooks/closure.py
618
+ mmcv/runner/hooks/ema.py
619
+ mmcv/runner/hooks/evaluation.py
620
+ mmcv/runner/hooks/hook.py
621
+ mmcv/runner/hooks/iter_timer.py
622
+ mmcv/runner/hooks/lr_updater.py
623
+ mmcv/runner/hooks/memory.py
624
+ mmcv/runner/hooks/momentum_updater.py
625
+ mmcv/runner/hooks/optimizer.py
626
+ mmcv/runner/hooks/profiler.py
627
+ mmcv/runner/hooks/sampler_seed.py
628
+ mmcv/runner/hooks/sync_buffer.py
629
+ mmcv/runner/hooks/logger/__init__.py
630
+ mmcv/runner/hooks/logger/base.py
631
+ mmcv/runner/hooks/logger/dvclive.py
632
+ mmcv/runner/hooks/logger/mlflow.py
633
+ mmcv/runner/hooks/logger/neptune.py
634
+ mmcv/runner/hooks/logger/pavi.py
635
+ mmcv/runner/hooks/logger/segmind.py
636
+ mmcv/runner/hooks/logger/tensorboard.py
637
+ mmcv/runner/hooks/logger/text.py
638
+ mmcv/runner/hooks/logger/wandb.py
639
+ mmcv/runner/optimizer/__init__.py
640
+ mmcv/runner/optimizer/builder.py
641
+ mmcv/runner/optimizer/default_constructor.py
642
+ mmcv/tensorrt/__init__.py
643
+ mmcv/tensorrt/init_plugins.py
644
+ mmcv/tensorrt/preprocess.py
645
+ mmcv/tensorrt/tensorrt_utils.py
646
+ mmcv/utils/__init__.py
647
+ mmcv/utils/config.py
648
+ mmcv/utils/env.py
649
+ mmcv/utils/ext_loader.py
650
+ mmcv/utils/hub.py
651
+ mmcv/utils/logging.py
652
+ mmcv/utils/misc.py
653
+ mmcv/utils/parrots_jit.py
654
+ mmcv/utils/parrots_wrapper.py
655
+ mmcv/utils/path.py
656
+ mmcv/utils/progressbar.py
657
+ mmcv/utils/registry.py
658
+ mmcv/utils/seed.py
659
+ mmcv/utils/testing.py
660
+ mmcv/utils/timer.py
661
+ mmcv/utils/trace.py
662
+ mmcv/utils/version_utils.py
663
+ mmcv/video/__init__.py
664
+ mmcv/video/io.py
665
+ mmcv/video/optflow.py
666
+ mmcv/video/processing.py
667
+ mmcv/visualization/__init__.py
668
+ mmcv/visualization/color.py
669
+ mmcv/visualization/image.py
670
+ mmcv/visualization/optflow.py
671
+ requirements/build.txt
672
+ requirements/docs.txt
673
+ requirements/optional.txt
674
+ requirements/runtime.txt
675
+ requirements/test.txt
676
+ tests/test_arraymisc.py
677
+ tests/test_fileclient.py
678
+ tests/test_fileio.py
679
+ tests/test_load_model_zoo.py
680
+ tests/test_parallel.py
681
+ tests/test_visualization.py
682
+ tests/data/batched_nms_data.pkl
683
+ tests/data/color.jpg
684
+ tests/data/color_exif.jpg
685
+ tests/data/filelist.txt
686
+ tests/data/gray_alpha.png
687
+ tests/data/grayscale.jpg
688
+ tests/data/grayscale_dim3.jpg
689
+ tests/data/mapping.txt
690
+ tests/data/optflow.flo
691
+ tests/data/optflow_concat0.jpg
692
+ tests/data/optflow_concat1.jpg
693
+ tests/data/palette.gif
694
+ tests/data/sparse_flow.png
695
+ tests/data/test.mp4
696
+ tests/data/uint16-5channel.tif
697
+ tests/data/config/a.b.py
698
+ tests/data/config/a.py
699
+ tests/data/config/b.json
700
+ tests/data/config/base.py
701
+ tests/data/config/c.yaml
702
+ tests/data/config/code.py
703
+ tests/data/config/d.py
704
+ tests/data/config/delete.py
705
+ tests/data/config/deprecated.py
706
+ tests/data/config/deprecated_as_base.py
707
+ tests/data/config/e.py
708
+ tests/data/config/expected.py
709
+ tests/data/config/f.py
710
+ tests/data/config/g.py
711
+ tests/data/config/h.py
712
+ tests/data/config/i_base.py
713
+ tests/data/config/i_child.py
714
+ tests/data/config/l.py
715
+ tests/data/config/l1.py
716
+ tests/data/config/l2.yaml
717
+ tests/data/config/l3.json
718
+ tests/data/config/l4.py
719
+ tests/data/config/m.py
720
+ tests/data/config/n.py
721
+ tests/data/config/o.json
722
+ tests/data/config/p.yaml
723
+ tests/data/config/q.py
724
+ tests/data/config/r.py
725
+ tests/data/config/s.py
726
+ tests/data/config/t.json
727
+ tests/data/config/t.py
728
+ tests/data/config/t.yaml
729
+ tests/data/config/u.json
730
+ tests/data/config/u.py
731
+ tests/data/config/u.yaml
732
+ tests/data/config/v.py
733
+ tests/data/demo.lmdb/data.mdb
734
+ tests/data/demo.lmdb/lock.mdb
735
+ tests/data/for_3d_ops/features_for_fps_distance.npy
736
+ tests/data/for_3d_ops/fps_idx.npy
737
+ tests/data/for_3d_ops/test_voxel.npy
738
+ tests/data/for_ccattention/ccattention_input.bin
739
+ tests/data/for_ccattention/ccattention_output.bin
740
+ tests/data/for_psa_mask/psa_input.bin
741
+ tests/data/for_psa_mask/psa_output_collect.bin
742
+ tests/data/for_psa_mask/psa_output_distribute.bin
743
+ tests/data/for_scan/.file
744
+ tests/data/for_scan/1.json
745
+ tests/data/for_scan/1.txt
746
+ tests/data/for_scan/2.json
747
+ tests/data/for_scan/2.txt
748
+ tests/data/for_scan/3.TXT
749
+ tests/data/for_scan/a.bin
750
+ tests/data/for_scan/sub/1.json
751
+ tests/data/for_scan/sub/1.txt
752
+ tests/data/model_zoo/deprecated.json
753
+ tests/data/model_zoo/open_mmlab.json
754
+ tests/data/model_zoo/mmcv_home/open_mmlab.json
755
+ tests/data/model_zoo/mmcv_home/test.pth
756
+ tests/data/model_zoo/mmcv_home/val.pth
757
+ tests/data/patches/0.npy
758
+ tests/data/patches/1.npy
759
+ tests/data/patches/2.npy
760
+ tests/data/patches/3.npy
761
+ tests/data/patches/4.npy
762
+ tests/data/patches/pad0_0.npy
763
+ tests/data/patches/pad0_1.npy
764
+ tests/data/patches/pad0_2.npy
765
+ tests/data/patches/pad0_3.npy
766
+ tests/data/patches/pad0_4.npy
767
+ tests/data/patches/pad_0.npy
768
+ tests/data/patches/pad_1.npy
769
+ tests/data/patches/pad_2.npy
770
+ tests/data/patches/pad_3.npy
771
+ tests/data/patches/pad_4.npy
772
+ tests/data/patches/scale_0.npy
773
+ tests/data/patches/scale_1.npy
774
+ tests/data/patches/scale_2.npy
775
+ tests/data/patches/scale_3.npy
776
+ tests/data/patches/scale_4.npy
777
+ tests/data/scripts/hello.py
778
+ tests/test_cnn/test_build_layers.py
779
+ tests/test_cnn/test_context_block.py
780
+ tests/test_cnn/test_conv2d_adaptive_padding.py
781
+ tests/test_cnn/test_conv_module.py
782
+ tests/test_cnn/test_depthwise_seperable_conv_module.py
783
+ tests/test_cnn/test_flops_counter.py
784
+ tests/test_cnn/test_fuse_conv_bn.py
785
+ tests/test_cnn/test_generalized_attention.py
786
+ tests/test_cnn/test_hsigmoid.py
787
+ tests/test_cnn/test_hswish.py
788
+ tests/test_cnn/test_model_registry.py
789
+ tests/test_cnn/test_non_local.py
790
+ tests/test_cnn/test_revert_syncbn.py
791
+ tests/test_cnn/test_scale.py
792
+ tests/test_cnn/test_swish.py
793
+ tests/test_cnn/test_transformer.py
794
+ tests/test_cnn/test_weight_init.py
795
+ tests/test_cnn/test_wrappers.py
796
+ tests/test_image/test_colorspace.py
797
+ tests/test_image/test_geometric.py
798
+ tests/test_image/test_image_misc.py
799
+ tests/test_image/test_io.py
800
+ tests/test_image/test_photometric.py
801
+ tests/test_ops/test_active_rotated_filter.py
802
+ tests/test_ops/test_assign_score_withk.py
803
+ tests/test_ops/test_ball_query.py
804
+ tests/test_ops/test_bbox.py
805
+ tests/test_ops/test_bilinear_grid_sample.py
806
+ tests/test_ops/test_border_align.py
807
+ tests/test_ops/test_box_iou_rotated.py
808
+ tests/test_ops/test_carafe.py
809
+ tests/test_ops/test_cc_attention.py
810
+ tests/test_ops/test_contour_expand.py
811
+ tests/test_ops/test_convex_iou.py
812
+ tests/test_ops/test_corner_pool.py
813
+ tests/test_ops/test_correlation.py
814
+ tests/test_ops/test_deform_conv.py
815
+ tests/test_ops/test_deform_roi_pool.py
816
+ tests/test_ops/test_focal_loss.py
817
+ tests/test_ops/test_furthest_point_sample.py
818
+ tests/test_ops/test_fused_bias_leakyrelu.py
819
+ tests/test_ops/test_gather_points.py
820
+ tests/test_ops/test_group_points.py
821
+ tests/test_ops/test_info.py
822
+ tests/test_ops/test_iou3d.py
823
+ tests/test_ops/test_knn.py
824
+ tests/test_ops/test_masked_conv2d.py
825
+ tests/test_ops/test_merge_cells.py
826
+ tests/test_ops/test_min_area_polygons.py
827
+ tests/test_ops/test_modulated_deform_conv.py
828
+ tests/test_ops/test_ms_deformable_attn.py
829
+ tests/test_ops/test_nms.py
830
+ tests/test_ops/test_nms_rotated.py
831
+ tests/test_ops/test_onnx.py
832
+ tests/test_ops/test_pixel_group.py
833
+ tests/test_ops/test_points_in_polygons.py
834
+ tests/test_ops/test_psa_mask.py
835
+ tests/test_ops/test_riroi_align_rotated.py
836
+ tests/test_ops/test_roi_align.py
837
+ tests/test_ops/test_roi_align_rotated.py
838
+ tests/test_ops/test_roi_pool.py
839
+ tests/test_ops/test_roiaware_pool3d.py
840
+ tests/test_ops/test_roipoint_pool3d.py
841
+ tests/test_ops/test_rotated_feature_align.py
842
+ tests/test_ops/test_saconv.py
843
+ tests/test_ops/test_scatter_points.py
844
+ tests/test_ops/test_spconv.py
845
+ tests/test_ops/test_syncbn.py
846
+ tests/test_ops/test_tensorrt.py
847
+ tests/test_ops/test_tensorrt_preprocess.py
848
+ tests/test_ops/test_three_interpolate.py
849
+ tests/test_ops/test_three_nn.py
850
+ tests/test_ops/test_tin_shift.py
851
+ tests/test_ops/test_upfirdn2d.py
852
+ tests/test_ops/test_voxelization.py
853
+ tests/test_runner/test_basemodule.py
854
+ tests/test_runner/test_checkpoint.py
855
+ tests/test_runner/test_dist_utils.py
856
+ tests/test_runner/test_eval_hook.py
857
+ tests/test_runner/test_fp16.py
858
+ tests/test_runner/test_hooks.py
859
+ tests/test_runner/test_optimizer.py
860
+ tests/test_runner/test_runner.py
861
+ tests/test_runner/test_utils.py
862
+ tests/test_utils/test_config.py
863
+ tests/test_utils/test_env.py
864
+ tests/test_utils/test_hub.py
865
+ tests/test_utils/test_logging.py
866
+ tests/test_utils/test_misc.py
867
+ tests/test_utils/test_parrots_jit.py
868
+ tests/test_utils/test_path.py
869
+ tests/test_utils/test_progressbar.py
870
+ tests/test_utils/test_registry.py
871
+ tests/test_utils/test_testing.py
872
+ tests/test_utils/test_timer.py
873
+ tests/test_utils/test_trace.py
874
+ tests/test_utils/test_version_utils.py
875
+ tests/test_video/test_optflow.py
876
+ tests/test_video/test_processing.py
877
+ tests/test_video/test_reader.py
groundingLMM/mmcv/mmcv.egg-info/dependency_links.txt ADDED
@@ -0,0 +1 @@
 
 
1
+
groundingLMM/mmcv/mmcv.egg-info/not-zip-safe ADDED
@@ -0,0 +1 @@
 
 
1
+
groundingLMM/mmcv/mmcv.egg-info/requires.txt ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ addict
2
+ numpy
3
+ packaging
4
+ Pillow
5
+ pyyaml
6
+ yapf
7
+
8
+ [:sys_platform == "win32"]
9
+ regex
10
+
11
+ [all]
12
+ pytest-runner
13
+ ninja
14
+ addict
15
+ numpy
16
+ packaging
17
+ Pillow
18
+ pyyaml
19
+ yapf
20
+ coverage
21
+ lmdb
22
+ onnx==1.7.0
23
+ onnxoptimizer
24
+ onnxruntime>=1.8.0
25
+ pytest
26
+ PyTurboJPEG
27
+ scipy
28
+ tifffile
29
+
30
+ [all:sys_platform == "win32"]
31
+ regex
32
+
33
+ [build]
34
+ pytest-runner
35
+
36
+ [optional]
37
+ ninja
38
+
39
+ [tests]
40
+ coverage
41
+ lmdb
42
+ onnx==1.7.0
43
+ onnxoptimizer
44
+ onnxruntime>=1.8.0
45
+ pytest
46
+ PyTurboJPEG
47
+ scipy
48
+ tifffile
groundingLMM/mmcv/mmcv.egg-info/top_level.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ mmcv
groundingLMM/mmcv/mmcv/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ # flake8: noqa
3
+ from .arraymisc import *
4
+ from .fileio import *
5
+ from .image import *
6
+ from .utils import *
7
+ from .version import *
8
+ from .video import *
9
+ from .visualization import *
10
+
11
+ # The following modules are not imported to this level, so mmcv may be used
12
+ # without PyTorch.
13
+ # - runner
14
+ # - parallel
15
+ # - op
groundingLMM/mmcv/mmcv/arraymisc/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .quantization import dequantize, quantize
3
+
4
+ __all__ = ['quantize', 'dequantize']
groundingLMM/mmcv/mmcv/arraymisc/quantization.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import numpy as np
3
+
4
+
5
+ def quantize(arr, min_val, max_val, levels, dtype=np.int64):
6
+ """Quantize an array of (-inf, inf) to [0, levels-1].
7
+
8
+ Args:
9
+ arr (ndarray): Input array.
10
+ min_val (scalar): Minimum value to be clipped.
11
+ max_val (scalar): Maximum value to be clipped.
12
+ levels (int): Quantization levels.
13
+ dtype (np.type): The type of the quantized array.
14
+
15
+ Returns:
16
+ tuple: Quantized array.
17
+ """
18
+ if not (isinstance(levels, int) and levels > 1):
19
+ raise ValueError(
20
+ f'levels must be a positive integer, but got {levels}')
21
+ if min_val >= max_val:
22
+ raise ValueError(
23
+ f'min_val ({min_val}) must be smaller than max_val ({max_val})')
24
+
25
+ arr = np.clip(arr, min_val, max_val) - min_val
26
+ quantized_arr = np.minimum(
27
+ np.floor(levels * arr / (max_val - min_val)).astype(dtype), levels - 1)
28
+
29
+ return quantized_arr
30
+
31
+
32
+ def dequantize(arr, min_val, max_val, levels, dtype=np.float64):
33
+ """Dequantize an array.
34
+
35
+ Args:
36
+ arr (ndarray): Input array.
37
+ min_val (scalar): Minimum value to be clipped.
38
+ max_val (scalar): Maximum value to be clipped.
39
+ levels (int): Quantization levels.
40
+ dtype (np.type): The type of the dequantized array.
41
+
42
+ Returns:
43
+ tuple: Dequantized array.
44
+ """
45
+ if not (isinstance(levels, int) and levels > 1):
46
+ raise ValueError(
47
+ f'levels must be a positive integer, but got {levels}')
48
+ if min_val >= max_val:
49
+ raise ValueError(
50
+ f'min_val ({min_val}) must be smaller than max_val ({max_val})')
51
+
52
+ dequantized_arr = (arr + 0.5).astype(dtype) * (max_val -
53
+ min_val) / levels + min_val
54
+
55
+ return dequantized_arr
groundingLMM/mmcv/mmcv/fileio/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .file_client import BaseStorageBackend, FileClient
3
+ from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
4
+ from .io import dump, load, register_handler
5
+ from .parse import dict_from_file, list_from_file
6
+
7
+ __all__ = [
8
+ 'BaseStorageBackend', 'FileClient', 'load', 'dump', 'register_handler',
9
+ 'BaseFileHandler', 'JsonHandler', 'PickleHandler', 'YamlHandler',
10
+ 'list_from_file', 'dict_from_file'
11
+ ]
groundingLMM/mmcv/mmcv/fileio/file_client.py ADDED
@@ -0,0 +1,1149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import inspect
3
+ import os
4
+ import os.path as osp
5
+ import re
6
+ import tempfile
7
+ import warnings
8
+ from abc import ABCMeta, abstractmethod
9
+ from contextlib import contextmanager
10
+ from pathlib import Path
11
+ from typing import Iterable, Iterator, Optional, Tuple, Union
12
+ from urllib.request import urlopen
13
+
14
+ import mmcv
15
+ from mmcv.utils.misc import has_method
16
+ from mmcv.utils.path import is_filepath
17
+
18
+
19
+ class BaseStorageBackend(metaclass=ABCMeta):
20
+ """Abstract class of storage backends.
21
+
22
+ All backends need to implement two apis: ``get()`` and ``get_text()``.
23
+ ``get()`` reads the file as a byte stream and ``get_text()`` reads the file
24
+ as texts.
25
+ """
26
+
27
+ # a flag to indicate whether the backend can create a symlink for a file
28
+ _allow_symlink = False
29
+
30
+ @property
31
+ def name(self):
32
+ return self.__class__.__name__
33
+
34
+ @property
35
+ def allow_symlink(self):
36
+ return self._allow_symlink
37
+
38
+ @abstractmethod
39
+ def get(self, filepath):
40
+ pass
41
+
42
+ @abstractmethod
43
+ def get_text(self, filepath):
44
+ pass
45
+
46
+
47
+ class CephBackend(BaseStorageBackend):
48
+ """Ceph storage backend (for internal use).
49
+
50
+ Args:
51
+ path_mapping (dict|None): path mapping dict from local path to Petrel
52
+ path. When ``path_mapping={'src': 'dst'}``, ``src`` in ``filepath``
53
+ will be replaced by ``dst``. Default: None.
54
+
55
+ .. warning::
56
+ :class:`mmcv.fileio.file_client.CephBackend` will be deprecated,
57
+ please use :class:`mmcv.fileio.file_client.PetrelBackend` instead.
58
+ """
59
+
60
+ def __init__(self, path_mapping=None):
61
+ try:
62
+ import ceph
63
+ except ImportError:
64
+ raise ImportError('Please install ceph to enable CephBackend.')
65
+
66
+ warnings.warn(
67
+ 'CephBackend will be deprecated, please use PetrelBackend instead',
68
+ DeprecationWarning)
69
+ self._client = ceph.S3Client()
70
+ assert isinstance(path_mapping, dict) or path_mapping is None
71
+ self.path_mapping = path_mapping
72
+
73
+ def get(self, filepath):
74
+ filepath = str(filepath)
75
+ if self.path_mapping is not None:
76
+ for k, v in self.path_mapping.items():
77
+ filepath = filepath.replace(k, v)
78
+ value = self._client.Get(filepath)
79
+ value_buf = memoryview(value)
80
+ return value_buf
81
+
82
+ def get_text(self, filepath, encoding=None):
83
+ raise NotImplementedError
84
+
85
+
86
+ class PetrelBackend(BaseStorageBackend):
87
+ """Petrel storage backend (for internal use).
88
+
89
+ PetrelBackend supports reading and writing data to multiple clusters.
90
+ If the file path contains the cluster name, PetrelBackend will read data
91
+ from specified cluster or write data to it. Otherwise, PetrelBackend will
92
+ access the default cluster.
93
+
94
+ Args:
95
+ path_mapping (dict, optional): Path mapping dict from local path to
96
+ Petrel path. When ``path_mapping={'src': 'dst'}``, ``src`` in
97
+ ``filepath`` will be replaced by ``dst``. Default: None.
98
+ enable_mc (bool, optional): Whether to enable memcached support.
99
+ Default: True.
100
+
101
+ Examples:
102
+ >>> filepath1 = 's3://path/of/file'
103
+ >>> filepath2 = 'cluster-name:s3://path/of/file'
104
+ >>> client = PetrelBackend()
105
+ >>> client.get(filepath1) # get data from default cluster
106
+ >>> client.get(filepath2) # get data from 'cluster-name' cluster
107
+ """
108
+
109
+ def __init__(self,
110
+ path_mapping: Optional[dict] = None,
111
+ enable_mc: bool = True):
112
+ try:
113
+ from petrel_client import client
114
+ except ImportError:
115
+ raise ImportError('Please install petrel_client to enable '
116
+ 'PetrelBackend.')
117
+
118
+ self._client = client.Client(enable_mc=enable_mc)
119
+ assert isinstance(path_mapping, dict) or path_mapping is None
120
+ self.path_mapping = path_mapping
121
+
122
+ def _map_path(self, filepath: Union[str, Path]) -> str:
123
+ """Map ``filepath`` to a string path whose prefix will be replaced by
124
+ :attr:`self.path_mapping`.
125
+
126
+ Args:
127
+ filepath (str): Path to be mapped.
128
+ """
129
+ filepath = str(filepath)
130
+ if self.path_mapping is not None:
131
+ for k, v in self.path_mapping.items():
132
+ filepath = filepath.replace(k, v)
133
+ return filepath
134
+
135
+ def _format_path(self, filepath: str) -> str:
136
+ """Convert a ``filepath`` to standard format of petrel oss.
137
+
138
+ If the ``filepath`` is concatenated by ``os.path.join``, in a Windows
139
+ environment, the ``filepath`` will be the format of
140
+ 's3://bucket_name\\image.jpg'. By invoking :meth:`_format_path`, the
141
+ above ``filepath`` will be converted to 's3://bucket_name/image.jpg'.
142
+
143
+ Args:
144
+ filepath (str): Path to be formatted.
145
+ """
146
+ return re.sub(r'\\+', '/', filepath)
147
+
148
+ def get(self, filepath: Union[str, Path]) -> memoryview:
149
+ """Read data from a given ``filepath`` with 'rb' mode.
150
+
151
+ Args:
152
+ filepath (str or Path): Path to read data.
153
+
154
+ Returns:
155
+ memoryview: A memory view of expected bytes object to avoid
156
+ copying. The memoryview object can be converted to bytes by
157
+ ``value_buf.tobytes()``.
158
+ """
159
+ filepath = self._map_path(filepath)
160
+ filepath = self._format_path(filepath)
161
+ value = self._client.Get(filepath)
162
+ value_buf = memoryview(value)
163
+ return value_buf
164
+
165
+ def get_text(self,
166
+ filepath: Union[str, Path],
167
+ encoding: str = 'utf-8') -> str:
168
+ """Read data from a given ``filepath`` with 'r' mode.
169
+
170
+ Args:
171
+ filepath (str or Path): Path to read data.
172
+ encoding (str): The encoding format used to open the ``filepath``.
173
+ Default: 'utf-8'.
174
+
175
+ Returns:
176
+ str: Expected text reading from ``filepath``.
177
+ """
178
+ return str(self.get(filepath), encoding=encoding)
179
+
180
+ def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
181
+ """Save data to a given ``filepath``.
182
+
183
+ Args:
184
+ obj (bytes): Data to be saved.
185
+ filepath (str or Path): Path to write data.
186
+ """
187
+ filepath = self._map_path(filepath)
188
+ filepath = self._format_path(filepath)
189
+ self._client.put(filepath, obj)
190
+
191
+ def put_text(self,
192
+ obj: str,
193
+ filepath: Union[str, Path],
194
+ encoding: str = 'utf-8') -> None:
195
+ """Save data to a given ``filepath``.
196
+
197
+ Args:
198
+ obj (str): Data to be written.
199
+ filepath (str or Path): Path to write data.
200
+ encoding (str): The encoding format used to encode the ``obj``.
201
+ Default: 'utf-8'.
202
+ """
203
+ self.put(bytes(obj, encoding=encoding), filepath)
204
+
205
+ def remove(self, filepath: Union[str, Path]) -> None:
206
+ """Remove a file.
207
+
208
+ Args:
209
+ filepath (str or Path): Path to be removed.
210
+ """
211
+ if not has_method(self._client, 'delete'):
212
+ raise NotImplementedError(
213
+ ('Current version of Petrel Python SDK has not supported '
214
+ 'the `delete` method, please use a higher version or dev'
215
+ ' branch instead.'))
216
+
217
+ filepath = self._map_path(filepath)
218
+ filepath = self._format_path(filepath)
219
+ self._client.delete(filepath)
220
+
221
+ def exists(self, filepath: Union[str, Path]) -> bool:
222
+ """Check whether a file path exists.
223
+
224
+ Args:
225
+ filepath (str or Path): Path to be checked whether exists.
226
+
227
+ Returns:
228
+ bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
229
+ """
230
+ if not (has_method(self._client, 'contains')
231
+ and has_method(self._client, 'isdir')):
232
+ raise NotImplementedError(
233
+ ('Current version of Petrel Python SDK has not supported '
234
+ 'the `contains` and `isdir` methods, please use a higher'
235
+ 'version or dev branch instead.'))
236
+
237
+ filepath = self._map_path(filepath)
238
+ filepath = self._format_path(filepath)
239
+ return self._client.contains(filepath) or self._client.isdir(filepath)
240
+
241
+ def isdir(self, filepath: Union[str, Path]) -> bool:
242
+ """Check whether a file path is a directory.
243
+
244
+ Args:
245
+ filepath (str or Path): Path to be checked whether it is a
246
+ directory.
247
+
248
+ Returns:
249
+ bool: Return ``True`` if ``filepath`` points to a directory,
250
+ ``False`` otherwise.
251
+ """
252
+ if not has_method(self._client, 'isdir'):
253
+ raise NotImplementedError(
254
+ ('Current version of Petrel Python SDK has not supported '
255
+ 'the `isdir` method, please use a higher version or dev'
256
+ ' branch instead.'))
257
+
258
+ filepath = self._map_path(filepath)
259
+ filepath = self._format_path(filepath)
260
+ return self._client.isdir(filepath)
261
+
262
+ def isfile(self, filepath: Union[str, Path]) -> bool:
263
+ """Check whether a file path is a file.
264
+
265
+ Args:
266
+ filepath (str or Path): Path to be checked whether it is a file.
267
+
268
+ Returns:
269
+ bool: Return ``True`` if ``filepath`` points to a file, ``False``
270
+ otherwise.
271
+ """
272
+ if not has_method(self._client, 'contains'):
273
+ raise NotImplementedError(
274
+ ('Current version of Petrel Python SDK has not supported '
275
+ 'the `contains` method, please use a higher version or '
276
+ 'dev branch instead.'))
277
+
278
+ filepath = self._map_path(filepath)
279
+ filepath = self._format_path(filepath)
280
+ return self._client.contains(filepath)
281
+
282
+ def join_path(self, filepath: Union[str, Path],
283
+ *filepaths: Union[str, Path]) -> str:
284
+ """Concatenate all file paths.
285
+
286
+ Args:
287
+ filepath (str or Path): Path to be concatenated.
288
+
289
+ Returns:
290
+ str: The result after concatenation.
291
+ """
292
+ filepath = self._format_path(self._map_path(filepath))
293
+ if filepath.endswith('/'):
294
+ filepath = filepath[:-1]
295
+ formatted_paths = [filepath]
296
+ for path in filepaths:
297
+ formatted_paths.append(self._format_path(self._map_path(path)))
298
+ return '/'.join(formatted_paths)
299
+
300
+ @contextmanager
301
+ def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]:
302
+ """Download a file from ``filepath`` and return a temporary path.
303
+
304
+ ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
305
+ can be called with ``with`` statement, and when exists from the
306
+ ``with`` statement, the temporary path will be released.
307
+
308
+ Args:
309
+ filepath (str | Path): Download a file from ``filepath``.
310
+
311
+ Examples:
312
+ >>> client = PetrelBackend()
313
+ >>> # After existing from the ``with`` clause,
314
+ >>> # the path will be removed
315
+ >>> with client.get_local_path('s3://path/of/your/file') as path:
316
+ ... # do something here
317
+
318
+ Yields:
319
+ Iterable[str]: Only yield one temporary path.
320
+ """
321
+ filepath = self._map_path(filepath)
322
+ filepath = self._format_path(filepath)
323
+ assert self.isfile(filepath)
324
+ try:
325
+ f = tempfile.NamedTemporaryFile(delete=False)
326
+ f.write(self.get(filepath))
327
+ f.close()
328
+ yield f.name
329
+ finally:
330
+ os.remove(f.name)
331
+
332
+ def list_dir_or_file(self,
333
+ dir_path: Union[str, Path],
334
+ list_dir: bool = True,
335
+ list_file: bool = True,
336
+ suffix: Optional[Union[str, Tuple[str]]] = None,
337
+ recursive: bool = False) -> Iterator[str]:
338
+ """Scan a directory to find the interested directories or files in
339
+ arbitrary order.
340
+
341
+ Note:
342
+ Petrel has no concept of directories but it simulates the directory
343
+ hierarchy in the filesystem through public prefixes. In addition,
344
+ if the returned path ends with '/', it means the path is a public
345
+ prefix which is a logical directory.
346
+
347
+ Note:
348
+ :meth:`list_dir_or_file` returns the path relative to ``dir_path``.
349
+ In addition, the returned path of directory will not contains the
350
+ suffix '/' which is consistent with other backends.
351
+
352
+ Args:
353
+ dir_path (str | Path): Path of the directory.
354
+ list_dir (bool): List the directories. Default: True.
355
+ list_file (bool): List the path of files. Default: True.
356
+ suffix (str or tuple[str], optional): File suffix
357
+ that we are interested in. Default: None.
358
+ recursive (bool): If set to True, recursively scan the
359
+ directory. Default: False.
360
+
361
+ Yields:
362
+ Iterable[str]: A relative path to ``dir_path``.
363
+ """
364
+ if not has_method(self._client, 'list'):
365
+ raise NotImplementedError(
366
+ ('Current version of Petrel Python SDK has not supported '
367
+ 'the `list` method, please use a higher version or dev'
368
+ ' branch instead.'))
369
+
370
+ dir_path = self._map_path(dir_path)
371
+ dir_path = self._format_path(dir_path)
372
+ if list_dir and suffix is not None:
373
+ raise TypeError(
374
+ '`list_dir` should be False when `suffix` is not None')
375
+
376
+ if (suffix is not None) and not isinstance(suffix, (str, tuple)):
377
+ raise TypeError('`suffix` must be a string or tuple of strings')
378
+
379
+ # Petrel's simulated directory hierarchy assumes that directory paths
380
+ # should end with `/`
381
+ if not dir_path.endswith('/'):
382
+ dir_path += '/'
383
+
384
+ root = dir_path
385
+
386
+ def _list_dir_or_file(dir_path, list_dir, list_file, suffix,
387
+ recursive):
388
+ for path in self._client.list(dir_path):
389
+ # the `self.isdir` is not used here to determine whether path
390
+ # is a directory, because `self.isdir` relies on
391
+ # `self._client.list`
392
+ if path.endswith('/'): # a directory path
393
+ next_dir_path = self.join_path(dir_path, path)
394
+ if list_dir:
395
+ # get the relative path and exclude the last
396
+ # character '/'
397
+ rel_dir = next_dir_path[len(root):-1]
398
+ yield rel_dir
399
+ if recursive:
400
+ yield from _list_dir_or_file(next_dir_path, list_dir,
401
+ list_file, suffix,
402
+ recursive)
403
+ else: # a file path
404
+ absolute_path = self.join_path(dir_path, path)
405
+ rel_path = absolute_path[len(root):]
406
+ if (suffix is None
407
+ or rel_path.endswith(suffix)) and list_file:
408
+ yield rel_path
409
+
410
+ return _list_dir_or_file(dir_path, list_dir, list_file, suffix,
411
+ recursive)
412
+
413
+
414
+ class MemcachedBackend(BaseStorageBackend):
415
+ """Memcached storage backend.
416
+
417
+ Attributes:
418
+ server_list_cfg (str): Config file for memcached server list.
419
+ client_cfg (str): Config file for memcached client.
420
+ sys_path (str | None): Additional path to be appended to `sys.path`.
421
+ Default: None.
422
+ """
423
+
424
+ def __init__(self, server_list_cfg, client_cfg, sys_path=None):
425
+ if sys_path is not None:
426
+ import sys
427
+ sys.path.append(sys_path)
428
+ try:
429
+ import mc
430
+ except ImportError:
431
+ raise ImportError(
432
+ 'Please install memcached to enable MemcachedBackend.')
433
+
434
+ self.server_list_cfg = server_list_cfg
435
+ self.client_cfg = client_cfg
436
+ self._client = mc.MemcachedClient.GetInstance(self.server_list_cfg,
437
+ self.client_cfg)
438
+ # mc.pyvector servers as a point which points to a memory cache
439
+ self._mc_buffer = mc.pyvector()
440
+
441
+ def get(self, filepath):
442
+ filepath = str(filepath)
443
+ import mc
444
+ self._client.Get(filepath, self._mc_buffer)
445
+ value_buf = mc.ConvertBuffer(self._mc_buffer)
446
+ return value_buf
447
+
448
+ def get_text(self, filepath, encoding=None):
449
+ raise NotImplementedError
450
+
451
+
452
+ class LmdbBackend(BaseStorageBackend):
453
+ """Lmdb storage backend.
454
+
455
+ Args:
456
+ db_path (str): Lmdb database path.
457
+ readonly (bool, optional): Lmdb environment parameter. If True,
458
+ disallow any write operations. Default: True.
459
+ lock (bool, optional): Lmdb environment parameter. If False, when
460
+ concurrent access occurs, do not lock the database. Default: False.
461
+ readahead (bool, optional): Lmdb environment parameter. If False,
462
+ disable the OS filesystem readahead mechanism, which may improve
463
+ random read performance when a database is larger than RAM.
464
+ Default: False.
465
+
466
+ Attributes:
467
+ db_path (str): Lmdb database path.
468
+ """
469
+
470
+ def __init__(self,
471
+ db_path,
472
+ readonly=True,
473
+ lock=False,
474
+ readahead=False,
475
+ **kwargs):
476
+ try:
477
+ import lmdb
478
+ except ImportError:
479
+ raise ImportError('Please install lmdb to enable LmdbBackend.')
480
+
481
+ self.db_path = str(db_path)
482
+ self._client = lmdb.open(
483
+ self.db_path,
484
+ readonly=readonly,
485
+ lock=lock,
486
+ readahead=readahead,
487
+ **kwargs)
488
+
489
+ def get(self, filepath):
490
+ """Get values according to the filepath.
491
+
492
+ Args:
493
+ filepath (str | obj:`Path`): Here, filepath is the lmdb key.
494
+ """
495
+ filepath = str(filepath)
496
+ with self._client.begin(write=False) as txn:
497
+ value_buf = txn.get(filepath.encode('ascii'))
498
+ return value_buf
499
+
500
+ def get_text(self, filepath, encoding=None):
501
+ raise NotImplementedError
502
+
503
+
504
+ class HardDiskBackend(BaseStorageBackend):
505
+ """Raw hard disks storage backend."""
506
+
507
+ _allow_symlink = True
508
+
509
+ def get(self, filepath: Union[str, Path]) -> bytes:
510
+ """Read data from a given ``filepath`` with 'rb' mode.
511
+
512
+ Args:
513
+ filepath (str or Path): Path to read data.
514
+
515
+ Returns:
516
+ bytes: Expected bytes object.
517
+ """
518
+ with open(filepath, 'rb') as f:
519
+ value_buf = f.read()
520
+ return value_buf
521
+
522
+ def get_text(self,
523
+ filepath: Union[str, Path],
524
+ encoding: str = 'utf-8') -> str:
525
+ """Read data from a given ``filepath`` with 'r' mode.
526
+
527
+ Args:
528
+ filepath (str or Path): Path to read data.
529
+ encoding (str): The encoding format used to open the ``filepath``.
530
+ Default: 'utf-8'.
531
+
532
+ Returns:
533
+ str: Expected text reading from ``filepath``.
534
+ """
535
+ with open(filepath, 'r', encoding=encoding) as f:
536
+ value_buf = f.read()
537
+ return value_buf
538
+
539
+ def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
540
+ """Write data to a given ``filepath`` with 'wb' mode.
541
+
542
+ Note:
543
+ ``put`` will create a directory if the directory of ``filepath``
544
+ does not exist.
545
+
546
+ Args:
547
+ obj (bytes): Data to be written.
548
+ filepath (str or Path): Path to write data.
549
+ """
550
+ mmcv.mkdir_or_exist(osp.dirname(filepath))
551
+ with open(filepath, 'wb') as f:
552
+ f.write(obj)
553
+
554
+ def put_text(self,
555
+ obj: str,
556
+ filepath: Union[str, Path],
557
+ encoding: str = 'utf-8') -> None:
558
+ """Write data to a given ``filepath`` with 'w' mode.
559
+
560
+ Note:
561
+ ``put_text`` will create a directory if the directory of
562
+ ``filepath`` does not exist.
563
+
564
+ Args:
565
+ obj (str): Data to be written.
566
+ filepath (str or Path): Path to write data.
567
+ encoding (str): The encoding format used to open the ``filepath``.
568
+ Default: 'utf-8'.
569
+ """
570
+ mmcv.mkdir_or_exist(osp.dirname(filepath))
571
+ with open(filepath, 'w', encoding=encoding) as f:
572
+ f.write(obj)
573
+
574
+ def remove(self, filepath: Union[str, Path]) -> None:
575
+ """Remove a file.
576
+
577
+ Args:
578
+ filepath (str or Path): Path to be removed.
579
+ """
580
+ os.remove(filepath)
581
+
582
+ def exists(self, filepath: Union[str, Path]) -> bool:
583
+ """Check whether a file path exists.
584
+
585
+ Args:
586
+ filepath (str or Path): Path to be checked whether exists.
587
+
588
+ Returns:
589
+ bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
590
+ """
591
+ return osp.exists(filepath)
592
+
593
+ def isdir(self, filepath: Union[str, Path]) -> bool:
594
+ """Check whether a file path is a directory.
595
+
596
+ Args:
597
+ filepath (str or Path): Path to be checked whether it is a
598
+ directory.
599
+
600
+ Returns:
601
+ bool: Return ``True`` if ``filepath`` points to a directory,
602
+ ``False`` otherwise.
603
+ """
604
+ return osp.isdir(filepath)
605
+
606
+ def isfile(self, filepath: Union[str, Path]) -> bool:
607
+ """Check whether a file path is a file.
608
+
609
+ Args:
610
+ filepath (str or Path): Path to be checked whether it is a file.
611
+
612
+ Returns:
613
+ bool: Return ``True`` if ``filepath`` points to a file, ``False``
614
+ otherwise.
615
+ """
616
+ return osp.isfile(filepath)
617
+
618
+ def join_path(self, filepath: Union[str, Path],
619
+ *filepaths: Union[str, Path]) -> str:
620
+ """Concatenate all file paths.
621
+
622
+ Join one or more filepath components intelligently. The return value
623
+ is the concatenation of filepath and any members of *filepaths.
624
+
625
+ Args:
626
+ filepath (str or Path): Path to be concatenated.
627
+
628
+ Returns:
629
+ str: The result of concatenation.
630
+ """
631
+ return osp.join(filepath, *filepaths)
632
+
633
+ @contextmanager
634
+ def get_local_path(
635
+ self, filepath: Union[str, Path]) -> Iterable[Union[str, Path]]:
636
+ """Only for unified API and do nothing."""
637
+ yield filepath
638
+
639
+ def list_dir_or_file(self,
640
+ dir_path: Union[str, Path],
641
+ list_dir: bool = True,
642
+ list_file: bool = True,
643
+ suffix: Optional[Union[str, Tuple[str]]] = None,
644
+ recursive: bool = False) -> Iterator[str]:
645
+ """Scan a directory to find the interested directories or files in
646
+ arbitrary order.
647
+
648
+ Note:
649
+ :meth:`list_dir_or_file` returns the path relative to ``dir_path``.
650
+
651
+ Args:
652
+ dir_path (str | Path): Path of the directory.
653
+ list_dir (bool): List the directories. Default: True.
654
+ list_file (bool): List the path of files. Default: True.
655
+ suffix (str or tuple[str], optional): File suffix
656
+ that we are interested in. Default: None.
657
+ recursive (bool): If set to True, recursively scan the
658
+ directory. Default: False.
659
+
660
+ Yields:
661
+ Iterable[str]: A relative path to ``dir_path``.
662
+ """
663
+ if list_dir and suffix is not None:
664
+ raise TypeError('`suffix` should be None when `list_dir` is True')
665
+
666
+ if (suffix is not None) and not isinstance(suffix, (str, tuple)):
667
+ raise TypeError('`suffix` must be a string or tuple of strings')
668
+
669
+ root = dir_path
670
+
671
+ def _list_dir_or_file(dir_path, list_dir, list_file, suffix,
672
+ recursive):
673
+ for entry in os.scandir(dir_path):
674
+ if not entry.name.startswith('.') and entry.is_file():
675
+ rel_path = osp.relpath(entry.path, root)
676
+ if (suffix is None
677
+ or rel_path.endswith(suffix)) and list_file:
678
+ yield rel_path
679
+ elif osp.isdir(entry.path):
680
+ if list_dir:
681
+ rel_dir = osp.relpath(entry.path, root)
682
+ yield rel_dir
683
+ if recursive:
684
+ yield from _list_dir_or_file(entry.path, list_dir,
685
+ list_file, suffix,
686
+ recursive)
687
+
688
+ return _list_dir_or_file(dir_path, list_dir, list_file, suffix,
689
+ recursive)
690
+
691
+
692
+ class HTTPBackend(BaseStorageBackend):
693
+ """HTTP and HTTPS storage bachend."""
694
+
695
+ def get(self, filepath):
696
+ value_buf = urlopen(filepath).read()
697
+ return value_buf
698
+
699
+ def get_text(self, filepath, encoding='utf-8'):
700
+ value_buf = urlopen(filepath).read()
701
+ return value_buf.decode(encoding)
702
+
703
+ @contextmanager
704
+ def get_local_path(self, filepath: str) -> Iterable[str]:
705
+ """Download a file from ``filepath``.
706
+
707
+ ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
708
+ can be called with ``with`` statement, and when exists from the
709
+ ``with`` statement, the temporary path will be released.
710
+
711
+ Args:
712
+ filepath (str): Download a file from ``filepath``.
713
+
714
+ Examples:
715
+ >>> client = HTTPBackend()
716
+ >>> # After existing from the ``with`` clause,
717
+ >>> # the path will be removed
718
+ >>> with client.get_local_path('http://path/of/your/file') as path:
719
+ ... # do something here
720
+ """
721
+ try:
722
+ f = tempfile.NamedTemporaryFile(delete=False)
723
+ f.write(self.get(filepath))
724
+ f.close()
725
+ yield f.name
726
+ finally:
727
+ os.remove(f.name)
728
+
729
+
730
+ class FileClient:
731
+ """A general file client to access files in different backends.
732
+
733
+ The client loads a file or text in a specified backend from its path
734
+ and returns it as a binary or text file. There are two ways to choose a
735
+ backend, the name of backend and the prefix of path. Although both of them
736
+ can be used to choose a storage backend, ``backend`` has a higher priority
737
+ that is if they are all set, the storage backend will be chosen by the
738
+ backend argument. If they are all `None`, the disk backend will be chosen.
739
+ Note that It can also register other backend accessor with a given name,
740
+ prefixes, and backend class. In addition, We use the singleton pattern to
741
+ avoid repeated object creation. If the arguments are the same, the same
742
+ object will be returned.
743
+
744
+ Args:
745
+ backend (str, optional): The storage backend type. Options are "disk",
746
+ "ceph", "memcached", "lmdb", "http" and "petrel". Default: None.
747
+ prefix (str, optional): The prefix of the registered storage backend.
748
+ Options are "s3", "http", "https". Default: None.
749
+
750
+ Examples:
751
+ >>> # only set backend
752
+ >>> file_client = FileClient(backend='petrel')
753
+ >>> # only set prefix
754
+ >>> file_client = FileClient(prefix='s3')
755
+ >>> # set both backend and prefix but use backend to choose client
756
+ >>> file_client = FileClient(backend='petrel', prefix='s3')
757
+ >>> # if the arguments are the same, the same object is returned
758
+ >>> file_client1 = FileClient(backend='petrel')
759
+ >>> file_client1 is file_client
760
+ True
761
+
762
+ Attributes:
763
+ client (:obj:`BaseStorageBackend`): The backend object.
764
+ """
765
+
766
+ _backends = {
767
+ 'disk': HardDiskBackend,
768
+ 'ceph': CephBackend,
769
+ 'memcached': MemcachedBackend,
770
+ 'lmdb': LmdbBackend,
771
+ 'petrel': PetrelBackend,
772
+ 'http': HTTPBackend,
773
+ }
774
+ # This collection is used to record the overridden backends, and when a
775
+ # backend appears in the collection, the singleton pattern is disabled for
776
+ # that backend, because if the singleton pattern is used, then the object
777
+ # returned will be the backend before overwriting
778
+ _overridden_backends = set()
779
+ _prefix_to_backends = {
780
+ 's3': PetrelBackend,
781
+ 'http': HTTPBackend,
782
+ 'https': HTTPBackend,
783
+ }
784
+ _overridden_prefixes = set()
785
+
786
+ _instances = {}
787
+
788
+ def __new__(cls, backend=None, prefix=None, **kwargs):
789
+ if backend is None and prefix is None:
790
+ backend = 'disk'
791
+ if backend is not None and backend not in cls._backends:
792
+ raise ValueError(
793
+ f'Backend {backend} is not supported. Currently supported ones'
794
+ f' are {list(cls._backends.keys())}')
795
+ if prefix is not None and prefix not in cls._prefix_to_backends:
796
+ raise ValueError(
797
+ f'prefix {prefix} is not supported. Currently supported ones '
798
+ f'are {list(cls._prefix_to_backends.keys())}')
799
+
800
+ # concatenate the arguments to a unique key for determining whether
801
+ # objects with the same arguments were created
802
+ arg_key = f'{backend}:{prefix}'
803
+ for key, value in kwargs.items():
804
+ arg_key += f':{key}:{value}'
805
+
806
+ # if a backend was overridden, it will create a new object
807
+ if (arg_key in cls._instances
808
+ and backend not in cls._overridden_backends
809
+ and prefix not in cls._overridden_prefixes):
810
+ _instance = cls._instances[arg_key]
811
+ else:
812
+ # create a new object and put it to _instance
813
+ _instance = super().__new__(cls)
814
+ if backend is not None:
815
+ _instance.client = cls._backends[backend](**kwargs)
816
+ else:
817
+ _instance.client = cls._prefix_to_backends[prefix](**kwargs)
818
+
819
+ cls._instances[arg_key] = _instance
820
+
821
+ return _instance
822
+
823
+ @property
824
+ def name(self):
825
+ return self.client.name
826
+
827
+ @property
828
+ def allow_symlink(self):
829
+ return self.client.allow_symlink
830
+
831
+ @staticmethod
832
+ def parse_uri_prefix(uri: Union[str, Path]) -> Optional[str]:
833
+ """Parse the prefix of a uri.
834
+
835
+ Args:
836
+ uri (str | Path): Uri to be parsed that contains the file prefix.
837
+
838
+ Examples:
839
+ >>> FileClient.parse_uri_prefix('s3://path/of/your/file')
840
+ 's3'
841
+
842
+ Returns:
843
+ str | None: Return the prefix of uri if the uri contains '://' else
844
+ ``None``.
845
+ """
846
+ assert is_filepath(uri)
847
+ uri = str(uri)
848
+ if '://' not in uri:
849
+ return None
850
+ else:
851
+ prefix, _ = uri.split('://')
852
+ # In the case of PetrelBackend, the prefix may contains the cluster
853
+ # name like clusterName:s3
854
+ if ':' in prefix:
855
+ _, prefix = prefix.split(':')
856
+ return prefix
857
+
858
+ @classmethod
859
+ def infer_client(cls,
860
+ file_client_args: Optional[dict] = None,
861
+ uri: Optional[Union[str, Path]] = None) -> 'FileClient':
862
+ """Infer a suitable file client based on the URI and arguments.
863
+
864
+ Args:
865
+ file_client_args (dict, optional): Arguments to instantiate a
866
+ FileClient. Default: None.
867
+ uri (str | Path, optional): Uri to be parsed that contains the file
868
+ prefix. Default: None.
869
+
870
+ Examples:
871
+ >>> uri = 's3://path/of/your/file'
872
+ >>> file_client = FileClient.infer_client(uri=uri)
873
+ >>> file_client_args = {'backend': 'petrel'}
874
+ >>> file_client = FileClient.infer_client(file_client_args)
875
+
876
+ Returns:
877
+ FileClient: Instantiated FileClient object.
878
+ """
879
+ assert file_client_args is not None or uri is not None
880
+ if file_client_args is None:
881
+ file_prefix = cls.parse_uri_prefix(uri) # type: ignore
882
+ return cls(prefix=file_prefix)
883
+ else:
884
+ return cls(**file_client_args)
885
+
886
+ @classmethod
887
+ def _register_backend(cls, name, backend, force=False, prefixes=None):
888
+ if not isinstance(name, str):
889
+ raise TypeError('the backend name should be a string, '
890
+ f'but got {type(name)}')
891
+ if not inspect.isclass(backend):
892
+ raise TypeError(
893
+ f'backend should be a class but got {type(backend)}')
894
+ if not issubclass(backend, BaseStorageBackend):
895
+ raise TypeError(
896
+ f'backend {backend} is not a subclass of BaseStorageBackend')
897
+ if not force and name in cls._backends:
898
+ raise KeyError(
899
+ f'{name} is already registered as a storage backend, '
900
+ 'add "force=True" if you want to override it')
901
+
902
+ if name in cls._backends and force:
903
+ cls._overridden_backends.add(name)
904
+ cls._backends[name] = backend
905
+
906
+ if prefixes is not None:
907
+ if isinstance(prefixes, str):
908
+ prefixes = [prefixes]
909
+ else:
910
+ assert isinstance(prefixes, (list, tuple))
911
+ for prefix in prefixes:
912
+ if prefix not in cls._prefix_to_backends:
913
+ cls._prefix_to_backends[prefix] = backend
914
+ elif (prefix in cls._prefix_to_backends) and force:
915
+ cls._overridden_prefixes.add(prefix)
916
+ cls._prefix_to_backends[prefix] = backend
917
+ else:
918
+ raise KeyError(
919
+ f'{prefix} is already registered as a storage backend,'
920
+ ' add "force=True" if you want to override it')
921
+
922
+ @classmethod
923
+ def register_backend(cls, name, backend=None, force=False, prefixes=None):
924
+ """Register a backend to FileClient.
925
+
926
+ This method can be used as a normal class method or a decorator.
927
+
928
+ .. code-block:: python
929
+
930
+ class NewBackend(BaseStorageBackend):
931
+
932
+ def get(self, filepath):
933
+ return filepath
934
+
935
+ def get_text(self, filepath):
936
+ return filepath
937
+
938
+ FileClient.register_backend('new', NewBackend)
939
+
940
+ or
941
+
942
+ .. code-block:: python
943
+
944
+ @FileClient.register_backend('new')
945
+ class NewBackend(BaseStorageBackend):
946
+
947
+ def get(self, filepath):
948
+ return filepath
949
+
950
+ def get_text(self, filepath):
951
+ return filepath
952
+
953
+ Args:
954
+ name (str): The name of the registered backend.
955
+ backend (class, optional): The backend class to be registered,
956
+ which must be a subclass of :class:`BaseStorageBackend`.
957
+ When this method is used as a decorator, backend is None.
958
+ Defaults to None.
959
+ force (bool, optional): Whether to override the backend if the name
960
+ has already been registered. Defaults to False.
961
+ prefixes (str or list[str] or tuple[str], optional): The prefixes
962
+ of the registered storage backend. Default: None.
963
+ `New in version 1.3.15.`
964
+ """
965
+ if backend is not None:
966
+ cls._register_backend(
967
+ name, backend, force=force, prefixes=prefixes)
968
+ return
969
+
970
+ def _register(backend_cls):
971
+ cls._register_backend(
972
+ name, backend_cls, force=force, prefixes=prefixes)
973
+ return backend_cls
974
+
975
+ return _register
976
+
977
+ def get(self, filepath: Union[str, Path]) -> Union[bytes, memoryview]:
978
+ """Read data from a given ``filepath`` with 'rb' mode.
979
+
980
+ Note:
981
+ There are two types of return values for ``get``, one is ``bytes``
982
+ and the other is ``memoryview``. The advantage of using memoryview
983
+ is that you can avoid copying, and if you want to convert it to
984
+ ``bytes``, you can use ``.tobytes()``.
985
+
986
+ Args:
987
+ filepath (str or Path): Path to read data.
988
+
989
+ Returns:
990
+ bytes | memoryview: Expected bytes object or a memory view of the
991
+ bytes object.
992
+ """
993
+ return self.client.get(filepath)
994
+
995
+ def get_text(self, filepath: Union[str, Path], encoding='utf-8') -> str:
996
+ """Read data from a given ``filepath`` with 'r' mode.
997
+
998
+ Args:
999
+ filepath (str or Path): Path to read data.
1000
+ encoding (str): The encoding format used to open the ``filepath``.
1001
+ Default: 'utf-8'.
1002
+
1003
+ Returns:
1004
+ str: Expected text reading from ``filepath``.
1005
+ """
1006
+ return self.client.get_text(filepath, encoding)
1007
+
1008
+ def put(self, obj: bytes, filepath: Union[str, Path]) -> None:
1009
+ """Write data to a given ``filepath`` with 'wb' mode.
1010
+
1011
+ Note:
1012
+ ``put`` should create a directory if the directory of ``filepath``
1013
+ does not exist.
1014
+
1015
+ Args:
1016
+ obj (bytes): Data to be written.
1017
+ filepath (str or Path): Path to write data.
1018
+ """
1019
+ self.client.put(obj, filepath)
1020
+
1021
+ def put_text(self, obj: str, filepath: Union[str, Path]) -> None:
1022
+ """Write data to a given ``filepath`` with 'w' mode.
1023
+
1024
+ Note:
1025
+ ``put_text`` should create a directory if the directory of
1026
+ ``filepath`` does not exist.
1027
+
1028
+ Args:
1029
+ obj (str): Data to be written.
1030
+ filepath (str or Path): Path to write data.
1031
+ encoding (str, optional): The encoding format used to open the
1032
+ `filepath`. Default: 'utf-8'.
1033
+ """
1034
+ self.client.put_text(obj, filepath)
1035
+
1036
+ def remove(self, filepath: Union[str, Path]) -> None:
1037
+ """Remove a file.
1038
+
1039
+ Args:
1040
+ filepath (str, Path): Path to be removed.
1041
+ """
1042
+ self.client.remove(filepath)
1043
+
1044
+ def exists(self, filepath: Union[str, Path]) -> bool:
1045
+ """Check whether a file path exists.
1046
+
1047
+ Args:
1048
+ filepath (str or Path): Path to be checked whether exists.
1049
+
1050
+ Returns:
1051
+ bool: Return ``True`` if ``filepath`` exists, ``False`` otherwise.
1052
+ """
1053
+ return self.client.exists(filepath)
1054
+
1055
+ def isdir(self, filepath: Union[str, Path]) -> bool:
1056
+ """Check whether a file path is a directory.
1057
+
1058
+ Args:
1059
+ filepath (str or Path): Path to be checked whether it is a
1060
+ directory.
1061
+
1062
+ Returns:
1063
+ bool: Return ``True`` if ``filepath`` points to a directory,
1064
+ ``False`` otherwise.
1065
+ """
1066
+ return self.client.isdir(filepath)
1067
+
1068
+ def isfile(self, filepath: Union[str, Path]) -> bool:
1069
+ """Check whether a file path is a file.
1070
+
1071
+ Args:
1072
+ filepath (str or Path): Path to be checked whether it is a file.
1073
+
1074
+ Returns:
1075
+ bool: Return ``True`` if ``filepath`` points to a file, ``False``
1076
+ otherwise.
1077
+ """
1078
+ return self.client.isfile(filepath)
1079
+
1080
+ def join_path(self, filepath: Union[str, Path],
1081
+ *filepaths: Union[str, Path]) -> str:
1082
+ """Concatenate all file paths.
1083
+
1084
+ Join one or more filepath components intelligently. The return value
1085
+ is the concatenation of filepath and any members of *filepaths.
1086
+
1087
+ Args:
1088
+ filepath (str or Path): Path to be concatenated.
1089
+
1090
+ Returns:
1091
+ str: The result of concatenation.
1092
+ """
1093
+ return self.client.join_path(filepath, *filepaths)
1094
+
1095
+ @contextmanager
1096
+ def get_local_path(self, filepath: Union[str, Path]) -> Iterable[str]:
1097
+ """Download data from ``filepath`` and write the data to local path.
1098
+
1099
+ ``get_local_path`` is decorated by :meth:`contxtlib.contextmanager`. It
1100
+ can be called with ``with`` statement, and when exists from the
1101
+ ``with`` statement, the temporary path will be released.
1102
+
1103
+ Note:
1104
+ If the ``filepath`` is a local path, just return itself.
1105
+
1106
+ .. warning::
1107
+ ``get_local_path`` is an experimental interface that may change in
1108
+ the future.
1109
+
1110
+ Args:
1111
+ filepath (str or Path): Path to be read data.
1112
+
1113
+ Examples:
1114
+ >>> file_client = FileClient(prefix='s3')
1115
+ >>> with file_client.get_local_path('s3://bucket/abc.jpg') as path:
1116
+ ... # do something here
1117
+
1118
+ Yields:
1119
+ Iterable[str]: Only yield one path.
1120
+ """
1121
+ with self.client.get_local_path(str(filepath)) as local_path:
1122
+ yield local_path
1123
+
1124
+ def list_dir_or_file(self,
1125
+ dir_path: Union[str, Path],
1126
+ list_dir: bool = True,
1127
+ list_file: bool = True,
1128
+ suffix: Optional[Union[str, Tuple[str]]] = None,
1129
+ recursive: bool = False) -> Iterator[str]:
1130
+ """Scan a directory to find the interested directories or files in
1131
+ arbitrary order.
1132
+
1133
+ Note:
1134
+ :meth:`list_dir_or_file` returns the path relative to ``dir_path``.
1135
+
1136
+ Args:
1137
+ dir_path (str | Path): Path of the directory.
1138
+ list_dir (bool): List the directories. Default: True.
1139
+ list_file (bool): List the path of files. Default: True.
1140
+ suffix (str or tuple[str], optional): File suffix
1141
+ that we are interested in. Default: None.
1142
+ recursive (bool): If set to True, recursively scan the
1143
+ directory. Default: False.
1144
+
1145
+ Yields:
1146
+ Iterable[str]: A relative path to ``dir_path``.
1147
+ """
1148
+ yield from self.client.list_dir_or_file(dir_path, list_dir, list_file,
1149
+ suffix, recursive)
groundingLMM/mmcv/mmcv/fileio/io.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from io import BytesIO, StringIO
3
+ from pathlib import Path
4
+
5
+ from ..utils import is_list_of, is_str
6
+ from .file_client import FileClient
7
+ from .handlers import BaseFileHandler, JsonHandler, PickleHandler, YamlHandler
8
+
9
+ file_handlers = {
10
+ 'json': JsonHandler(),
11
+ 'yaml': YamlHandler(),
12
+ 'yml': YamlHandler(),
13
+ 'pickle': PickleHandler(),
14
+ 'pkl': PickleHandler()
15
+ }
16
+
17
+
18
+ def load(file, file_format=None, file_client_args=None, **kwargs):
19
+ """Load data from json/yaml/pickle files.
20
+
21
+ This method provides a unified api for loading data from serialized files.
22
+
23
+ Note:
24
+ In v1.3.16 and later, ``load`` supports loading data from serialized
25
+ files those can be storaged in different backends.
26
+
27
+ Args:
28
+ file (str or :obj:`Path` or file-like object): Filename or a file-like
29
+ object.
30
+ file_format (str, optional): If not specified, the file format will be
31
+ inferred from the file extension, otherwise use the specified one.
32
+ Currently supported formats include "json", "yaml/yml" and
33
+ "pickle/pkl".
34
+ file_client_args (dict, optional): Arguments to instantiate a
35
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
36
+ Default: None.
37
+
38
+ Examples:
39
+ >>> load('/path/of/your/file') # file is storaged in disk
40
+ >>> load('https://path/of/your/file') # file is storaged in Internet
41
+ >>> load('s3://path/of/your/file') # file is storaged in petrel
42
+
43
+ Returns:
44
+ The content from the file.
45
+ """
46
+ if isinstance(file, Path):
47
+ file = str(file)
48
+ if file_format is None and is_str(file):
49
+ file_format = file.split('.')[-1]
50
+ if file_format not in file_handlers:
51
+ raise TypeError(f'Unsupported format: {file_format}')
52
+
53
+ handler = file_handlers[file_format]
54
+ if is_str(file):
55
+ file_client = FileClient.infer_client(file_client_args, file)
56
+ if handler.str_like:
57
+ with StringIO(file_client.get_text(file)) as f:
58
+ obj = handler.load_from_fileobj(f, **kwargs)
59
+ else:
60
+ with BytesIO(file_client.get(file)) as f:
61
+ obj = handler.load_from_fileobj(f, **kwargs)
62
+ elif hasattr(file, 'read'):
63
+ obj = handler.load_from_fileobj(file, **kwargs)
64
+ else:
65
+ raise TypeError('"file" must be a filepath str or a file-object')
66
+ return obj
67
+
68
+
69
+ def dump(obj, file=None, file_format=None, file_client_args=None, **kwargs):
70
+ """Dump data to json/yaml/pickle strings or files.
71
+
72
+ This method provides a unified api for dumping data as strings or to files,
73
+ and also supports custom arguments for each file format.
74
+
75
+ Note:
76
+ In v1.3.16 and later, ``dump`` supports dumping data as strings or to
77
+ files which is saved to different backends.
78
+
79
+ Args:
80
+ obj (any): The python object to be dumped.
81
+ file (str or :obj:`Path` or file-like object, optional): If not
82
+ specified, then the object is dumped to a str, otherwise to a file
83
+ specified by the filename or file-like object.
84
+ file_format (str, optional): Same as :func:`load`.
85
+ file_client_args (dict, optional): Arguments to instantiate a
86
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
87
+ Default: None.
88
+
89
+ Examples:
90
+ >>> dump('hello world', '/path/of/your/file') # disk
91
+ >>> dump('hello world', 's3://path/of/your/file') # ceph or petrel
92
+
93
+ Returns:
94
+ bool: True for success, False otherwise.
95
+ """
96
+ if isinstance(file, Path):
97
+ file = str(file)
98
+ if file_format is None:
99
+ if is_str(file):
100
+ file_format = file.split('.')[-1]
101
+ elif file is None:
102
+ raise ValueError(
103
+ 'file_format must be specified since file is None')
104
+ if file_format not in file_handlers:
105
+ raise TypeError(f'Unsupported format: {file_format}')
106
+
107
+ handler = file_handlers[file_format]
108
+ if file is None:
109
+ return handler.dump_to_str(obj, **kwargs)
110
+ elif is_str(file):
111
+ file_client = FileClient.infer_client(file_client_args, file)
112
+ if handler.str_like:
113
+ with StringIO() as f:
114
+ handler.dump_to_fileobj(obj, f, **kwargs)
115
+ file_client.put_text(f.getvalue(), file)
116
+ else:
117
+ with BytesIO() as f:
118
+ handler.dump_to_fileobj(obj, f, **kwargs)
119
+ file_client.put(f.getvalue(), file)
120
+ elif hasattr(file, 'write'):
121
+ handler.dump_to_fileobj(obj, file, **kwargs)
122
+ else:
123
+ raise TypeError('"file" must be a filename str or a file-object')
124
+
125
+
126
+ def _register_handler(handler, file_formats):
127
+ """Register a handler for some file extensions.
128
+
129
+ Args:
130
+ handler (:obj:`BaseFileHandler`): Handler to be registered.
131
+ file_formats (str or list[str]): File formats to be handled by this
132
+ handler.
133
+ """
134
+ if not isinstance(handler, BaseFileHandler):
135
+ raise TypeError(
136
+ f'handler must be a child of BaseFileHandler, not {type(handler)}')
137
+ if isinstance(file_formats, str):
138
+ file_formats = [file_formats]
139
+ if not is_list_of(file_formats, str):
140
+ raise TypeError('file_formats must be a str or a list of str')
141
+ for ext in file_formats:
142
+ file_handlers[ext] = handler
143
+
144
+
145
+ def register_handler(file_formats, **kwargs):
146
+
147
+ def wrap(cls):
148
+ _register_handler(cls(**kwargs), file_formats)
149
+ return cls
150
+
151
+ return wrap
groundingLMM/mmcv/mmcv/fileio/parse.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+
3
+ from io import StringIO
4
+
5
+ from .file_client import FileClient
6
+
7
+
8
+ def list_from_file(filename,
9
+ prefix='',
10
+ offset=0,
11
+ max_num=0,
12
+ encoding='utf-8',
13
+ file_client_args=None):
14
+ """Load a text file and parse the content as a list of strings.
15
+
16
+ Note:
17
+ In v1.3.16 and later, ``list_from_file`` supports loading a text file
18
+ which can be storaged in different backends and parsing the content as
19
+ a list for strings.
20
+
21
+ Args:
22
+ filename (str): Filename.
23
+ prefix (str): The prefix to be inserted to the beginning of each item.
24
+ offset (int): The offset of lines.
25
+ max_num (int): The maximum number of lines to be read,
26
+ zeros and negatives mean no limitation.
27
+ encoding (str): Encoding used to open the file. Default utf-8.
28
+ file_client_args (dict, optional): Arguments to instantiate a
29
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
30
+ Default: None.
31
+
32
+ Examples:
33
+ >>> list_from_file('/path/of/your/file') # disk
34
+ ['hello', 'world']
35
+ >>> list_from_file('s3://path/of/your/file') # ceph or petrel
36
+ ['hello', 'world']
37
+
38
+ Returns:
39
+ list[str]: A list of strings.
40
+ """
41
+ cnt = 0
42
+ item_list = []
43
+ file_client = FileClient.infer_client(file_client_args, filename)
44
+ with StringIO(file_client.get_text(filename, encoding)) as f:
45
+ for _ in range(offset):
46
+ f.readline()
47
+ for line in f:
48
+ if 0 < max_num <= cnt:
49
+ break
50
+ item_list.append(prefix + line.rstrip('\n\r'))
51
+ cnt += 1
52
+ return item_list
53
+
54
+
55
+ def dict_from_file(filename,
56
+ key_type=str,
57
+ encoding='utf-8',
58
+ file_client_args=None):
59
+ """Load a text file and parse the content as a dict.
60
+
61
+ Each line of the text file will be two or more columns split by
62
+ whitespaces or tabs. The first column will be parsed as dict keys, and
63
+ the following columns will be parsed as dict values.
64
+
65
+ Note:
66
+ In v1.3.16 and later, ``dict_from_file`` supports loading a text file
67
+ which can be storaged in different backends and parsing the content as
68
+ a dict.
69
+
70
+ Args:
71
+ filename(str): Filename.
72
+ key_type(type): Type of the dict keys. str is user by default and
73
+ type conversion will be performed if specified.
74
+ encoding (str): Encoding used to open the file. Default utf-8.
75
+ file_client_args (dict, optional): Arguments to instantiate a
76
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
77
+ Default: None.
78
+
79
+ Examples:
80
+ >>> dict_from_file('/path/of/your/file') # disk
81
+ {'key1': 'value1', 'key2': 'value2'}
82
+ >>> dict_from_file('s3://path/of/your/file') # ceph or petrel
83
+ {'key1': 'value1', 'key2': 'value2'}
84
+
85
+ Returns:
86
+ dict: The parsed contents.
87
+ """
88
+ mapping = {}
89
+ file_client = FileClient.infer_client(file_client_args, filename)
90
+ with StringIO(file_client.get_text(filename, encoding)) as f:
91
+ for line in f:
92
+ items = line.rstrip('\n').split()
93
+ assert len(items) >= 2
94
+ key = key_type(items[0])
95
+ val = items[1:] if len(items) > 2 else items[1]
96
+ mapping[key] = val
97
+ return mapping
groundingLMM/mmcv/mmcv/image/__init__.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .colorspace import (bgr2gray, bgr2hls, bgr2hsv, bgr2rgb, bgr2ycbcr,
3
+ gray2bgr, gray2rgb, hls2bgr, hsv2bgr, imconvert,
4
+ rgb2bgr, rgb2gray, rgb2ycbcr, ycbcr2bgr, ycbcr2rgb)
5
+ from .geometric import (cutout, imcrop, imflip, imflip_, impad,
6
+ impad_to_multiple, imrescale, imresize, imresize_like,
7
+ imresize_to_multiple, imrotate, imshear, imtranslate,
8
+ rescale_size)
9
+ from .io import imfrombytes, imread, imwrite, supported_backends, use_backend
10
+ from .misc import tensor2imgs
11
+ from .photometric import (adjust_brightness, adjust_color, adjust_contrast,
12
+ adjust_lighting, adjust_sharpness, auto_contrast,
13
+ clahe, imdenormalize, imequalize, iminvert,
14
+ imnormalize, imnormalize_, lut_transform, posterize,
15
+ solarize)
16
+
17
+ __all__ = [
18
+ 'bgr2gray', 'bgr2hls', 'bgr2hsv', 'bgr2rgb', 'gray2bgr', 'gray2rgb',
19
+ 'hls2bgr', 'hsv2bgr', 'imconvert', 'rgb2bgr', 'rgb2gray', 'imrescale',
20
+ 'imresize', 'imresize_like', 'imresize_to_multiple', 'rescale_size',
21
+ 'imcrop', 'imflip', 'imflip_', 'impad', 'impad_to_multiple', 'imrotate',
22
+ 'imfrombytes', 'imread', 'imwrite', 'supported_backends', 'use_backend',
23
+ 'imdenormalize', 'imnormalize', 'imnormalize_', 'iminvert', 'posterize',
24
+ 'solarize', 'rgb2ycbcr', 'bgr2ycbcr', 'ycbcr2rgb', 'ycbcr2bgr',
25
+ 'tensor2imgs', 'imshear', 'imtranslate', 'adjust_color', 'imequalize',
26
+ 'adjust_brightness', 'adjust_contrast', 'lut_transform', 'clahe',
27
+ 'adjust_sharpness', 'auto_contrast', 'cutout', 'adjust_lighting'
28
+ ]
groundingLMM/mmcv/mmcv/image/colorspace.py ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import cv2
3
+ import numpy as np
4
+
5
+
6
+ def imconvert(img, src, dst):
7
+ """Convert an image from the src colorspace to dst colorspace.
8
+
9
+ Args:
10
+ img (ndarray): The input image.
11
+ src (str): The source colorspace, e.g., 'rgb', 'hsv'.
12
+ dst (str): The destination colorspace, e.g., 'rgb', 'hsv'.
13
+
14
+ Returns:
15
+ ndarray: The converted image.
16
+ """
17
+ code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
18
+ out_img = cv2.cvtColor(img, code)
19
+ return out_img
20
+
21
+
22
+ def bgr2gray(img, keepdim=False):
23
+ """Convert a BGR image to grayscale image.
24
+
25
+ Args:
26
+ img (ndarray): The input image.
27
+ keepdim (bool): If False (by default), then return the grayscale image
28
+ with 2 dims, otherwise 3 dims.
29
+
30
+ Returns:
31
+ ndarray: The converted grayscale image.
32
+ """
33
+ out_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
34
+ if keepdim:
35
+ out_img = out_img[..., None]
36
+ return out_img
37
+
38
+
39
+ def rgb2gray(img, keepdim=False):
40
+ """Convert a RGB image to grayscale image.
41
+
42
+ Args:
43
+ img (ndarray): The input image.
44
+ keepdim (bool): If False (by default), then return the grayscale image
45
+ with 2 dims, otherwise 3 dims.
46
+
47
+ Returns:
48
+ ndarray: The converted grayscale image.
49
+ """
50
+ out_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
51
+ if keepdim:
52
+ out_img = out_img[..., None]
53
+ return out_img
54
+
55
+
56
+ def gray2bgr(img):
57
+ """Convert a grayscale image to BGR image.
58
+
59
+ Args:
60
+ img (ndarray): The input image.
61
+
62
+ Returns:
63
+ ndarray: The converted BGR image.
64
+ """
65
+ img = img[..., None] if img.ndim == 2 else img
66
+ out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
67
+ return out_img
68
+
69
+
70
+ def gray2rgb(img):
71
+ """Convert a grayscale image to RGB image.
72
+
73
+ Args:
74
+ img (ndarray): The input image.
75
+
76
+ Returns:
77
+ ndarray: The converted RGB image.
78
+ """
79
+ img = img[..., None] if img.ndim == 2 else img
80
+ out_img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
81
+ return out_img
82
+
83
+
84
+ def _convert_input_type_range(img):
85
+ """Convert the type and range of the input image.
86
+
87
+ It converts the input image to np.float32 type and range of [0, 1].
88
+ It is mainly used for pre-processing the input image in colorspace
89
+ conversion functions such as rgb2ycbcr and ycbcr2rgb.
90
+
91
+ Args:
92
+ img (ndarray): The input image. It accepts:
93
+ 1. np.uint8 type with range [0, 255];
94
+ 2. np.float32 type with range [0, 1].
95
+
96
+ Returns:
97
+ (ndarray): The converted image with type of np.float32 and range of
98
+ [0, 1].
99
+ """
100
+ img_type = img.dtype
101
+ img = img.astype(np.float32)
102
+ if img_type == np.float32:
103
+ pass
104
+ elif img_type == np.uint8:
105
+ img /= 255.
106
+ else:
107
+ raise TypeError('The img type should be np.float32 or np.uint8, '
108
+ f'but got {img_type}')
109
+ return img
110
+
111
+
112
+ def _convert_output_type_range(img, dst_type):
113
+ """Convert the type and range of the image according to dst_type.
114
+
115
+ It converts the image to desired type and range. If `dst_type` is np.uint8,
116
+ images will be converted to np.uint8 type with range [0, 255]. If
117
+ `dst_type` is np.float32, it converts the image to np.float32 type with
118
+ range [0, 1].
119
+ It is mainly used for post-processing images in colorspace conversion
120
+ functions such as rgb2ycbcr and ycbcr2rgb.
121
+
122
+ Args:
123
+ img (ndarray): The image to be converted with np.float32 type and
124
+ range [0, 255].
125
+ dst_type (np.uint8 | np.float32): If dst_type is np.uint8, it
126
+ converts the image to np.uint8 type with range [0, 255]. If
127
+ dst_type is np.float32, it converts the image to np.float32 type
128
+ with range [0, 1].
129
+
130
+ Returns:
131
+ (ndarray): The converted image with desired type and range.
132
+ """
133
+ if dst_type not in (np.uint8, np.float32):
134
+ raise TypeError('The dst_type should be np.float32 or np.uint8, '
135
+ f'but got {dst_type}')
136
+ if dst_type == np.uint8:
137
+ img = img.round()
138
+ else:
139
+ img /= 255.
140
+ return img.astype(dst_type)
141
+
142
+
143
+ def rgb2ycbcr(img, y_only=False):
144
+ """Convert a RGB image to YCbCr image.
145
+
146
+ This function produces the same results as Matlab's `rgb2ycbcr` function.
147
+ It implements the ITU-R BT.601 conversion for standard-definition
148
+ television. See more details in
149
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
150
+
151
+ It differs from a similar function in cv2.cvtColor: `RGB <-> YCrCb`.
152
+ In OpenCV, it implements a JPEG conversion. See more details in
153
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
154
+
155
+ Args:
156
+ img (ndarray): The input image. It accepts:
157
+ 1. np.uint8 type with range [0, 255];
158
+ 2. np.float32 type with range [0, 1].
159
+ y_only (bool): Whether to only return Y channel. Default: False.
160
+
161
+ Returns:
162
+ ndarray: The converted YCbCr image. The output image has the same type
163
+ and range as input image.
164
+ """
165
+ img_type = img.dtype
166
+ img = _convert_input_type_range(img)
167
+ if y_only:
168
+ out_img = np.dot(img, [65.481, 128.553, 24.966]) + 16.0
169
+ else:
170
+ out_img = np.matmul(
171
+ img, [[65.481, -37.797, 112.0], [128.553, -74.203, -93.786],
172
+ [24.966, 112.0, -18.214]]) + [16, 128, 128]
173
+ out_img = _convert_output_type_range(out_img, img_type)
174
+ return out_img
175
+
176
+
177
+ def bgr2ycbcr(img, y_only=False):
178
+ """Convert a BGR image to YCbCr image.
179
+
180
+ The bgr version of rgb2ycbcr.
181
+ It implements the ITU-R BT.601 conversion for standard-definition
182
+ television. See more details in
183
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
184
+
185
+ It differs from a similar function in cv2.cvtColor: `BGR <-> YCrCb`.
186
+ In OpenCV, it implements a JPEG conversion. See more details in
187
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
188
+
189
+ Args:
190
+ img (ndarray): The input image. It accepts:
191
+ 1. np.uint8 type with range [0, 255];
192
+ 2. np.float32 type with range [0, 1].
193
+ y_only (bool): Whether to only return Y channel. Default: False.
194
+
195
+ Returns:
196
+ ndarray: The converted YCbCr image. The output image has the same type
197
+ and range as input image.
198
+ """
199
+ img_type = img.dtype
200
+ img = _convert_input_type_range(img)
201
+ if y_only:
202
+ out_img = np.dot(img, [24.966, 128.553, 65.481]) + 16.0
203
+ else:
204
+ out_img = np.matmul(
205
+ img, [[24.966, 112.0, -18.214], [128.553, -74.203, -93.786],
206
+ [65.481, -37.797, 112.0]]) + [16, 128, 128]
207
+ out_img = _convert_output_type_range(out_img, img_type)
208
+ return out_img
209
+
210
+
211
+ def ycbcr2rgb(img):
212
+ """Convert a YCbCr image to RGB image.
213
+
214
+ This function produces the same results as Matlab's ycbcr2rgb function.
215
+ It implements the ITU-R BT.601 conversion for standard-definition
216
+ television. See more details in
217
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
218
+
219
+ It differs from a similar function in cv2.cvtColor: `YCrCb <-> RGB`.
220
+ In OpenCV, it implements a JPEG conversion. See more details in
221
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
222
+
223
+ Args:
224
+ img (ndarray): The input image. It accepts:
225
+ 1. np.uint8 type with range [0, 255];
226
+ 2. np.float32 type with range [0, 1].
227
+
228
+ Returns:
229
+ ndarray: The converted RGB image. The output image has the same type
230
+ and range as input image.
231
+ """
232
+ img_type = img.dtype
233
+ img = _convert_input_type_range(img) * 255
234
+ out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
235
+ [0, -0.00153632, 0.00791071],
236
+ [0.00625893, -0.00318811, 0]]) * 255.0 + [
237
+ -222.921, 135.576, -276.836
238
+ ]
239
+ out_img = _convert_output_type_range(out_img, img_type)
240
+ return out_img
241
+
242
+
243
+ def ycbcr2bgr(img):
244
+ """Convert a YCbCr image to BGR image.
245
+
246
+ The bgr version of ycbcr2rgb.
247
+ It implements the ITU-R BT.601 conversion for standard-definition
248
+ television. See more details in
249
+ https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion.
250
+
251
+ It differs from a similar function in cv2.cvtColor: `YCrCb <-> BGR`.
252
+ In OpenCV, it implements a JPEG conversion. See more details in
253
+ https://en.wikipedia.org/wiki/YCbCr#JPEG_conversion.
254
+
255
+ Args:
256
+ img (ndarray): The input image. It accepts:
257
+ 1. np.uint8 type with range [0, 255];
258
+ 2. np.float32 type with range [0, 1].
259
+
260
+ Returns:
261
+ ndarray: The converted BGR image. The output image has the same type
262
+ and range as input image.
263
+ """
264
+ img_type = img.dtype
265
+ img = _convert_input_type_range(img) * 255
266
+ out_img = np.matmul(img, [[0.00456621, 0.00456621, 0.00456621],
267
+ [0.00791071, -0.00153632, 0],
268
+ [0, -0.00318811, 0.00625893]]) * 255.0 + [
269
+ -276.836, 135.576, -222.921
270
+ ]
271
+ out_img = _convert_output_type_range(out_img, img_type)
272
+ return out_img
273
+
274
+
275
+ def convert_color_factory(src, dst):
276
+
277
+ code = getattr(cv2, f'COLOR_{src.upper()}2{dst.upper()}')
278
+
279
+ def convert_color(img):
280
+ out_img = cv2.cvtColor(img, code)
281
+ return out_img
282
+
283
+ convert_color.__doc__ = f"""Convert a {src.upper()} image to {dst.upper()}
284
+ image.
285
+
286
+ Args:
287
+ img (ndarray or str): The input image.
288
+
289
+ Returns:
290
+ ndarray: The converted {dst.upper()} image.
291
+ """
292
+
293
+ return convert_color
294
+
295
+
296
+ bgr2rgb = convert_color_factory('bgr', 'rgb')
297
+
298
+ rgb2bgr = convert_color_factory('rgb', 'bgr')
299
+
300
+ bgr2hsv = convert_color_factory('bgr', 'hsv')
301
+
302
+ hsv2bgr = convert_color_factory('hsv', 'bgr')
303
+
304
+ bgr2hls = convert_color_factory('bgr', 'hls')
305
+
306
+ hls2bgr = convert_color_factory('hls', 'bgr')
groundingLMM/mmcv/mmcv/image/geometric.py ADDED
@@ -0,0 +1,728 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import numbers
3
+
4
+ import cv2
5
+ import numpy as np
6
+
7
+ from ..utils import to_2tuple
8
+ from .io import imread_backend
9
+
10
+ try:
11
+ from PIL import Image
12
+ except ImportError:
13
+ Image = None
14
+
15
+
16
+ def _scale_size(size, scale):
17
+ """Rescale a size by a ratio.
18
+
19
+ Args:
20
+ size (tuple[int]): (w, h).
21
+ scale (float | tuple(float)): Scaling factor.
22
+
23
+ Returns:
24
+ tuple[int]: scaled size.
25
+ """
26
+ if isinstance(scale, (float, int)):
27
+ scale = (scale, scale)
28
+ w, h = size
29
+ return int(w * float(scale[0]) + 0.5), int(h * float(scale[1]) + 0.5)
30
+
31
+
32
+ cv2_interp_codes = {
33
+ 'nearest': cv2.INTER_NEAREST,
34
+ 'bilinear': cv2.INTER_LINEAR,
35
+ 'bicubic': cv2.INTER_CUBIC,
36
+ 'area': cv2.INTER_AREA,
37
+ 'lanczos': cv2.INTER_LANCZOS4
38
+ }
39
+
40
+ if Image is not None:
41
+ pillow_interp_codes = {
42
+ 'nearest': Image.NEAREST,
43
+ 'bilinear': Image.BILINEAR,
44
+ 'bicubic': Image.BICUBIC,
45
+ 'box': Image.BOX,
46
+ 'lanczos': Image.LANCZOS,
47
+ 'hamming': Image.HAMMING
48
+ }
49
+
50
+
51
+ def imresize(img,
52
+ size,
53
+ return_scale=False,
54
+ interpolation='bilinear',
55
+ out=None,
56
+ backend=None):
57
+ """Resize image to a given size.
58
+
59
+ Args:
60
+ img (ndarray): The input image.
61
+ size (tuple[int]): Target size (w, h).
62
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
63
+ interpolation (str): Interpolation method, accepted values are
64
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
65
+ backend, "nearest", "bilinear" for 'pillow' backend.
66
+ out (ndarray): The output destination.
67
+ backend (str | None): The image resize backend type. Options are `cv2`,
68
+ `pillow`, `None`. If backend is None, the global imread_backend
69
+ specified by ``mmcv.use_backend()`` will be used. Default: None.
70
+
71
+ Returns:
72
+ tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
73
+ `resized_img`.
74
+ """
75
+ h, w = img.shape[:2]
76
+ if backend is None:
77
+ backend = imread_backend
78
+ if backend not in ['cv2', 'pillow']:
79
+ raise ValueError(f'backend: {backend} is not supported for resize.'
80
+ f"Supported backends are 'cv2', 'pillow'")
81
+
82
+ if backend == 'pillow':
83
+ assert img.dtype == np.uint8, 'Pillow backend only support uint8 type'
84
+ pil_image = Image.fromarray(img)
85
+ pil_image = pil_image.resize(size, pillow_interp_codes[interpolation])
86
+ resized_img = np.array(pil_image)
87
+ else:
88
+ resized_img = cv2.resize(
89
+ img, size, dst=out, interpolation=cv2_interp_codes[interpolation])
90
+ if not return_scale:
91
+ return resized_img
92
+ else:
93
+ w_scale = size[0] / w
94
+ h_scale = size[1] / h
95
+ return resized_img, w_scale, h_scale
96
+
97
+
98
+ def imresize_to_multiple(img,
99
+ divisor,
100
+ size=None,
101
+ scale_factor=None,
102
+ keep_ratio=False,
103
+ return_scale=False,
104
+ interpolation='bilinear',
105
+ out=None,
106
+ backend=None):
107
+ """Resize image according to a given size or scale factor and then rounds
108
+ up the the resized or rescaled image size to the nearest value that can be
109
+ divided by the divisor.
110
+
111
+ Args:
112
+ img (ndarray): The input image.
113
+ divisor (int | tuple): Resized image size will be a multiple of
114
+ divisor. If divisor is a tuple, divisor should be
115
+ (w_divisor, h_divisor).
116
+ size (None | int | tuple[int]): Target size (w, h). Default: None.
117
+ scale_factor (None | float | tuple[float]): Multiplier for spatial
118
+ size. Should match input size if it is a tuple and the 2D style is
119
+ (w_scale_factor, h_scale_factor). Default: None.
120
+ keep_ratio (bool): Whether to keep the aspect ratio when resizing the
121
+ image. Default: False.
122
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
123
+ interpolation (str): Interpolation method, accepted values are
124
+ "nearest", "bilinear", "bicubic", "area", "lanczos" for 'cv2'
125
+ backend, "nearest", "bilinear" for 'pillow' backend.
126
+ out (ndarray): The output destination.
127
+ backend (str | None): The image resize backend type. Options are `cv2`,
128
+ `pillow`, `None`. If backend is None, the global imread_backend
129
+ specified by ``mmcv.use_backend()`` will be used. Default: None.
130
+
131
+ Returns:
132
+ tuple | ndarray: (`resized_img`, `w_scale`, `h_scale`) or
133
+ `resized_img`.
134
+ """
135
+ h, w = img.shape[:2]
136
+ if size is not None and scale_factor is not None:
137
+ raise ValueError('only one of size or scale_factor should be defined')
138
+ elif size is None and scale_factor is None:
139
+ raise ValueError('one of size or scale_factor should be defined')
140
+ elif size is not None:
141
+ size = to_2tuple(size)
142
+ if keep_ratio:
143
+ size = rescale_size((w, h), size, return_scale=False)
144
+ else:
145
+ size = _scale_size((w, h), scale_factor)
146
+
147
+ divisor = to_2tuple(divisor)
148
+ size = tuple([int(np.ceil(s / d)) * d for s, d in zip(size, divisor)])
149
+ resized_img, w_scale, h_scale = imresize(
150
+ img,
151
+ size,
152
+ return_scale=True,
153
+ interpolation=interpolation,
154
+ out=out,
155
+ backend=backend)
156
+ if return_scale:
157
+ return resized_img, w_scale, h_scale
158
+ else:
159
+ return resized_img
160
+
161
+
162
+ def imresize_like(img,
163
+ dst_img,
164
+ return_scale=False,
165
+ interpolation='bilinear',
166
+ backend=None):
167
+ """Resize image to the same size of a given image.
168
+
169
+ Args:
170
+ img (ndarray): The input image.
171
+ dst_img (ndarray): The target image.
172
+ return_scale (bool): Whether to return `w_scale` and `h_scale`.
173
+ interpolation (str): Same as :func:`resize`.
174
+ backend (str | None): Same as :func:`resize`.
175
+
176
+ Returns:
177
+ tuple or ndarray: (`resized_img`, `w_scale`, `h_scale`) or
178
+ `resized_img`.
179
+ """
180
+ h, w = dst_img.shape[:2]
181
+ return imresize(img, (w, h), return_scale, interpolation, backend=backend)
182
+
183
+
184
+ def rescale_size(old_size, scale, return_scale=False):
185
+ """Calculate the new size to be rescaled to.
186
+
187
+ Args:
188
+ old_size (tuple[int]): The old size (w, h) of image.
189
+ scale (float | tuple[int]): The scaling factor or maximum size.
190
+ If it is a float number, then the image will be rescaled by this
191
+ factor, else if it is a tuple of 2 integers, then the image will
192
+ be rescaled as large as possible within the scale.
193
+ return_scale (bool): Whether to return the scaling factor besides the
194
+ rescaled image size.
195
+
196
+ Returns:
197
+ tuple[int]: The new rescaled image size.
198
+ """
199
+ w, h = old_size
200
+ if isinstance(scale, (float, int)):
201
+ if scale <= 0:
202
+ raise ValueError(f'Invalid scale {scale}, must be positive.')
203
+ scale_factor = scale
204
+ elif isinstance(scale, tuple):
205
+ max_long_edge = max(scale)
206
+ max_short_edge = min(scale)
207
+ scale_factor = min(max_long_edge / max(h, w),
208
+ max_short_edge / min(h, w))
209
+ else:
210
+ raise TypeError(
211
+ f'Scale must be a number or tuple of int, but got {type(scale)}')
212
+
213
+ new_size = _scale_size((w, h), scale_factor)
214
+
215
+ if return_scale:
216
+ return new_size, scale_factor
217
+ else:
218
+ return new_size
219
+
220
+
221
+ def imrescale(img,
222
+ scale,
223
+ return_scale=False,
224
+ interpolation='bilinear',
225
+ backend=None):
226
+ """Resize image while keeping the aspect ratio.
227
+
228
+ Args:
229
+ img (ndarray): The input image.
230
+ scale (float | tuple[int]): The scaling factor or maximum size.
231
+ If it is a float number, then the image will be rescaled by this
232
+ factor, else if it is a tuple of 2 integers, then the image will
233
+ be rescaled as large as possible within the scale.
234
+ return_scale (bool): Whether to return the scaling factor besides the
235
+ rescaled image.
236
+ interpolation (str): Same as :func:`resize`.
237
+ backend (str | None): Same as :func:`resize`.
238
+
239
+ Returns:
240
+ ndarray: The rescaled image.
241
+ """
242
+ h, w = img.shape[:2]
243
+ new_size, scale_factor = rescale_size((w, h), scale, return_scale=True)
244
+ rescaled_img = imresize(
245
+ img, new_size, interpolation=interpolation, backend=backend)
246
+ if return_scale:
247
+ return rescaled_img, scale_factor
248
+ else:
249
+ return rescaled_img
250
+
251
+
252
+ def imflip(img, direction='horizontal'):
253
+ """Flip an image horizontally or vertically.
254
+
255
+ Args:
256
+ img (ndarray): Image to be flipped.
257
+ direction (str): The flip direction, either "horizontal" or
258
+ "vertical" or "diagonal".
259
+
260
+ Returns:
261
+ ndarray: The flipped image.
262
+ """
263
+ assert direction in ['horizontal', 'vertical', 'diagonal']
264
+ if direction == 'horizontal':
265
+ return np.flip(img, axis=1)
266
+ elif direction == 'vertical':
267
+ return np.flip(img, axis=0)
268
+ else:
269
+ return np.flip(img, axis=(0, 1))
270
+
271
+
272
+ def imflip_(img, direction='horizontal'):
273
+ """Inplace flip an image horizontally or vertically.
274
+
275
+ Args:
276
+ img (ndarray): Image to be flipped.
277
+ direction (str): The flip direction, either "horizontal" or
278
+ "vertical" or "diagonal".
279
+
280
+ Returns:
281
+ ndarray: The flipped image (inplace).
282
+ """
283
+ assert direction in ['horizontal', 'vertical', 'diagonal']
284
+ if direction == 'horizontal':
285
+ return cv2.flip(img, 1, img)
286
+ elif direction == 'vertical':
287
+ return cv2.flip(img, 0, img)
288
+ else:
289
+ return cv2.flip(img, -1, img)
290
+
291
+
292
+ def imrotate(img,
293
+ angle,
294
+ center=None,
295
+ scale=1.0,
296
+ border_value=0,
297
+ interpolation='bilinear',
298
+ auto_bound=False):
299
+ """Rotate an image.
300
+
301
+ Args:
302
+ img (ndarray): Image to be rotated.
303
+ angle (float): Rotation angle in degrees, positive values mean
304
+ clockwise rotation.
305
+ center (tuple[float], optional): Center point (w, h) of the rotation in
306
+ the source image. If not specified, the center of the image will be
307
+ used.
308
+ scale (float): Isotropic scale factor.
309
+ border_value (int): Border value.
310
+ interpolation (str): Same as :func:`resize`.
311
+ auto_bound (bool): Whether to adjust the image size to cover the whole
312
+ rotated image.
313
+
314
+ Returns:
315
+ ndarray: The rotated image.
316
+ """
317
+ if center is not None and auto_bound:
318
+ raise ValueError('`auto_bound` conflicts with `center`')
319
+ h, w = img.shape[:2]
320
+ if center is None:
321
+ center = ((w - 1) * 0.5, (h - 1) * 0.5)
322
+ assert isinstance(center, tuple)
323
+
324
+ matrix = cv2.getRotationMatrix2D(center, -angle, scale)
325
+ if auto_bound:
326
+ cos = np.abs(matrix[0, 0])
327
+ sin = np.abs(matrix[0, 1])
328
+ new_w = h * sin + w * cos
329
+ new_h = h * cos + w * sin
330
+ matrix[0, 2] += (new_w - w) * 0.5
331
+ matrix[1, 2] += (new_h - h) * 0.5
332
+ w = int(np.round(new_w))
333
+ h = int(np.round(new_h))
334
+ rotated = cv2.warpAffine(
335
+ img,
336
+ matrix, (w, h),
337
+ flags=cv2_interp_codes[interpolation],
338
+ borderValue=border_value)
339
+ return rotated
340
+
341
+
342
+ def bbox_clip(bboxes, img_shape):
343
+ """Clip bboxes to fit the image shape.
344
+
345
+ Args:
346
+ bboxes (ndarray): Shape (..., 4*k)
347
+ img_shape (tuple[int]): (height, width) of the image.
348
+
349
+ Returns:
350
+ ndarray: Clipped bboxes.
351
+ """
352
+ assert bboxes.shape[-1] % 4 == 0
353
+ cmin = np.empty(bboxes.shape[-1], dtype=bboxes.dtype)
354
+ cmin[0::2] = img_shape[1] - 1
355
+ cmin[1::2] = img_shape[0] - 1
356
+ clipped_bboxes = np.maximum(np.minimum(bboxes, cmin), 0)
357
+ return clipped_bboxes
358
+
359
+
360
+ def bbox_scaling(bboxes, scale, clip_shape=None):
361
+ """Scaling bboxes w.r.t the box center.
362
+
363
+ Args:
364
+ bboxes (ndarray): Shape(..., 4).
365
+ scale (float): Scaling factor.
366
+ clip_shape (tuple[int], optional): If specified, bboxes that exceed the
367
+ boundary will be clipped according to the given shape (h, w).
368
+
369
+ Returns:
370
+ ndarray: Scaled bboxes.
371
+ """
372
+ if float(scale) == 1.0:
373
+ scaled_bboxes = bboxes.copy()
374
+ else:
375
+ w = bboxes[..., 2] - bboxes[..., 0] + 1
376
+ h = bboxes[..., 3] - bboxes[..., 1] + 1
377
+ dw = (w * (scale - 1)) * 0.5
378
+ dh = (h * (scale - 1)) * 0.5
379
+ scaled_bboxes = bboxes + np.stack((-dw, -dh, dw, dh), axis=-1)
380
+ if clip_shape is not None:
381
+ return bbox_clip(scaled_bboxes, clip_shape)
382
+ else:
383
+ return scaled_bboxes
384
+
385
+
386
+ def imcrop(img, bboxes, scale=1.0, pad_fill=None):
387
+ """Crop image patches.
388
+
389
+ 3 steps: scale the bboxes -> clip bboxes -> crop and pad.
390
+
391
+ Args:
392
+ img (ndarray): Image to be cropped.
393
+ bboxes (ndarray): Shape (k, 4) or (4, ), location of cropped bboxes.
394
+ scale (float, optional): Scale ratio of bboxes, the default value
395
+ 1.0 means no padding.
396
+ pad_fill (Number | list[Number]): Value to be filled for padding.
397
+ Default: None, which means no padding.
398
+
399
+ Returns:
400
+ list[ndarray] | ndarray: The cropped image patches.
401
+ """
402
+ chn = 1 if img.ndim == 2 else img.shape[2]
403
+ if pad_fill is not None:
404
+ if isinstance(pad_fill, (int, float)):
405
+ pad_fill = [pad_fill for _ in range(chn)]
406
+ assert len(pad_fill) == chn
407
+
408
+ _bboxes = bboxes[None, ...] if bboxes.ndim == 1 else bboxes
409
+ scaled_bboxes = bbox_scaling(_bboxes, scale).astype(np.int32)
410
+ clipped_bbox = bbox_clip(scaled_bboxes, img.shape)
411
+
412
+ patches = []
413
+ for i in range(clipped_bbox.shape[0]):
414
+ x1, y1, x2, y2 = tuple(clipped_bbox[i, :])
415
+ if pad_fill is None:
416
+ patch = img[y1:y2 + 1, x1:x2 + 1, ...]
417
+ else:
418
+ _x1, _y1, _x2, _y2 = tuple(scaled_bboxes[i, :])
419
+ if chn == 1:
420
+ patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1)
421
+ else:
422
+ patch_shape = (_y2 - _y1 + 1, _x2 - _x1 + 1, chn)
423
+ patch = np.array(
424
+ pad_fill, dtype=img.dtype) * np.ones(
425
+ patch_shape, dtype=img.dtype)
426
+ x_start = 0 if _x1 >= 0 else -_x1
427
+ y_start = 0 if _y1 >= 0 else -_y1
428
+ w = x2 - x1 + 1
429
+ h = y2 - y1 + 1
430
+ patch[y_start:y_start + h, x_start:x_start + w,
431
+ ...] = img[y1:y1 + h, x1:x1 + w, ...]
432
+ patches.append(patch)
433
+
434
+ if bboxes.ndim == 1:
435
+ return patches[0]
436
+ else:
437
+ return patches
438
+
439
+
440
+ def impad(img,
441
+ *,
442
+ shape=None,
443
+ padding=None,
444
+ pad_val=0,
445
+ padding_mode='constant'):
446
+ """Pad the given image to a certain shape or pad on all sides with
447
+ specified padding mode and padding value.
448
+
449
+ Args:
450
+ img (ndarray): Image to be padded.
451
+ shape (tuple[int]): Expected padding shape (h, w). Default: None.
452
+ padding (int or tuple[int]): Padding on each border. If a single int is
453
+ provided this is used to pad all borders. If tuple of length 2 is
454
+ provided this is the padding on left/right and top/bottom
455
+ respectively. If a tuple of length 4 is provided this is the
456
+ padding for the left, top, right and bottom borders respectively.
457
+ Default: None. Note that `shape` and `padding` can not be both
458
+ set.
459
+ pad_val (Number | Sequence[Number]): Values to be filled in padding
460
+ areas when padding_mode is 'constant'. Default: 0.
461
+ padding_mode (str): Type of padding. Should be: constant, edge,
462
+ reflect or symmetric. Default: constant.
463
+
464
+ - constant: pads with a constant value, this value is specified
465
+ with pad_val.
466
+ - edge: pads with the last value at the edge of the image.
467
+ - reflect: pads with reflection of image without repeating the last
468
+ value on the edge. For example, padding [1, 2, 3, 4] with 2
469
+ elements on both sides in reflect mode will result in
470
+ [3, 2, 1, 2, 3, 4, 3, 2].
471
+ - symmetric: pads with reflection of image repeating the last value
472
+ on the edge. For example, padding [1, 2, 3, 4] with 2 elements on
473
+ both sides in symmetric mode will result in
474
+ [2, 1, 1, 2, 3, 4, 4, 3]
475
+
476
+ Returns:
477
+ ndarray: The padded image.
478
+ """
479
+
480
+ assert (shape is not None) ^ (padding is not None)
481
+ if shape is not None:
482
+ padding = (0, 0, shape[1] - img.shape[1], shape[0] - img.shape[0])
483
+
484
+ # check pad_val
485
+ if isinstance(pad_val, tuple):
486
+ assert len(pad_val) == img.shape[-1]
487
+ elif not isinstance(pad_val, numbers.Number):
488
+ raise TypeError('pad_val must be a int or a tuple. '
489
+ f'But received {type(pad_val)}')
490
+
491
+ # check padding
492
+ if isinstance(padding, tuple) and len(padding) in [2, 4]:
493
+ if len(padding) == 2:
494
+ padding = (padding[0], padding[1], padding[0], padding[1])
495
+ elif isinstance(padding, numbers.Number):
496
+ padding = (padding, padding, padding, padding)
497
+ else:
498
+ raise ValueError('Padding must be a int or a 2, or 4 element tuple.'
499
+ f'But received {padding}')
500
+
501
+ # check padding mode
502
+ assert padding_mode in ['constant', 'edge', 'reflect', 'symmetric']
503
+
504
+ border_type = {
505
+ 'constant': cv2.BORDER_CONSTANT,
506
+ 'edge': cv2.BORDER_REPLICATE,
507
+ 'reflect': cv2.BORDER_REFLECT_101,
508
+ 'symmetric': cv2.BORDER_REFLECT
509
+ }
510
+ img = cv2.copyMakeBorder(
511
+ img,
512
+ padding[1],
513
+ padding[3],
514
+ padding[0],
515
+ padding[2],
516
+ border_type[padding_mode],
517
+ value=pad_val)
518
+
519
+ return img
520
+
521
+
522
+ def impad_to_multiple(img, divisor, pad_val=0):
523
+ """Pad an image to ensure each edge to be multiple to some number.
524
+
525
+ Args:
526
+ img (ndarray): Image to be padded.
527
+ divisor (int): Padded image edges will be multiple to divisor.
528
+ pad_val (Number | Sequence[Number]): Same as :func:`impad`.
529
+
530
+ Returns:
531
+ ndarray: The padded image.
532
+ """
533
+ pad_h = int(np.ceil(img.shape[0] / divisor)) * divisor
534
+ pad_w = int(np.ceil(img.shape[1] / divisor)) * divisor
535
+ return impad(img, shape=(pad_h, pad_w), pad_val=pad_val)
536
+
537
+
538
+ def cutout(img, shape, pad_val=0):
539
+ """Randomly cut out a rectangle from the original img.
540
+
541
+ Args:
542
+ img (ndarray): Image to be cutout.
543
+ shape (int | tuple[int]): Expected cutout shape (h, w). If given as a
544
+ int, the value will be used for both h and w.
545
+ pad_val (int | float | tuple[int | float]): Values to be filled in the
546
+ cut area. Defaults to 0.
547
+
548
+ Returns:
549
+ ndarray: The cutout image.
550
+ """
551
+
552
+ channels = 1 if img.ndim == 2 else img.shape[2]
553
+ if isinstance(shape, int):
554
+ cut_h, cut_w = shape, shape
555
+ else:
556
+ assert isinstance(shape, tuple) and len(shape) == 2, \
557
+ f'shape must be a int or a tuple with length 2, but got type ' \
558
+ f'{type(shape)} instead.'
559
+ cut_h, cut_w = shape
560
+ if isinstance(pad_val, (int, float)):
561
+ pad_val = tuple([pad_val] * channels)
562
+ elif isinstance(pad_val, tuple):
563
+ assert len(pad_val) == channels, \
564
+ 'Expected the num of elements in tuple equals the channels' \
565
+ 'of input image. Found {} vs {}'.format(
566
+ len(pad_val), channels)
567
+ else:
568
+ raise TypeError(f'Invalid type {type(pad_val)} for `pad_val`')
569
+
570
+ img_h, img_w = img.shape[:2]
571
+ y0 = np.random.uniform(img_h)
572
+ x0 = np.random.uniform(img_w)
573
+
574
+ y1 = int(max(0, y0 - cut_h / 2.))
575
+ x1 = int(max(0, x0 - cut_w / 2.))
576
+ y2 = min(img_h, y1 + cut_h)
577
+ x2 = min(img_w, x1 + cut_w)
578
+
579
+ if img.ndim == 2:
580
+ patch_shape = (y2 - y1, x2 - x1)
581
+ else:
582
+ patch_shape = (y2 - y1, x2 - x1, channels)
583
+
584
+ img_cutout = img.copy()
585
+ patch = np.array(
586
+ pad_val, dtype=img.dtype) * np.ones(
587
+ patch_shape, dtype=img.dtype)
588
+ img_cutout[y1:y2, x1:x2, ...] = patch
589
+
590
+ return img_cutout
591
+
592
+
593
+ def _get_shear_matrix(magnitude, direction='horizontal'):
594
+ """Generate the shear matrix for transformation.
595
+
596
+ Args:
597
+ magnitude (int | float): The magnitude used for shear.
598
+ direction (str): The flip direction, either "horizontal"
599
+ or "vertical".
600
+
601
+ Returns:
602
+ ndarray: The shear matrix with dtype float32.
603
+ """
604
+ if direction == 'horizontal':
605
+ shear_matrix = np.float32([[1, magnitude, 0], [0, 1, 0]])
606
+ elif direction == 'vertical':
607
+ shear_matrix = np.float32([[1, 0, 0], [magnitude, 1, 0]])
608
+ return shear_matrix
609
+
610
+
611
+ def imshear(img,
612
+ magnitude,
613
+ direction='horizontal',
614
+ border_value=0,
615
+ interpolation='bilinear'):
616
+ """Shear an image.
617
+
618
+ Args:
619
+ img (ndarray): Image to be sheared with format (h, w)
620
+ or (h, w, c).
621
+ magnitude (int | float): The magnitude used for shear.
622
+ direction (str): The flip direction, either "horizontal"
623
+ or "vertical".
624
+ border_value (int | tuple[int]): Value used in case of a
625
+ constant border.
626
+ interpolation (str): Same as :func:`resize`.
627
+
628
+ Returns:
629
+ ndarray: The sheared image.
630
+ """
631
+ assert direction in ['horizontal',
632
+ 'vertical'], f'Invalid direction: {direction}'
633
+ height, width = img.shape[:2]
634
+ if img.ndim == 2:
635
+ channels = 1
636
+ elif img.ndim == 3:
637
+ channels = img.shape[-1]
638
+ if isinstance(border_value, int):
639
+ border_value = tuple([border_value] * channels)
640
+ elif isinstance(border_value, tuple):
641
+ assert len(border_value) == channels, \
642
+ 'Expected the num of elements in tuple equals the channels' \
643
+ 'of input image. Found {} vs {}'.format(
644
+ len(border_value), channels)
645
+ else:
646
+ raise ValueError(
647
+ f'Invalid type {type(border_value)} for `border_value`')
648
+ shear_matrix = _get_shear_matrix(magnitude, direction)
649
+ sheared = cv2.warpAffine(
650
+ img,
651
+ shear_matrix,
652
+ (width, height),
653
+ # Note case when the number elements in `border_value`
654
+ # greater than 3 (e.g. shearing masks whose channels large
655
+ # than 3) will raise TypeError in `cv2.warpAffine`.
656
+ # Here simply slice the first 3 values in `border_value`.
657
+ borderValue=border_value[:3],
658
+ flags=cv2_interp_codes[interpolation])
659
+ return sheared
660
+
661
+
662
+ def _get_translate_matrix(offset, direction='horizontal'):
663
+ """Generate the translate matrix.
664
+
665
+ Args:
666
+ offset (int | float): The offset used for translate.
667
+ direction (str): The translate direction, either
668
+ "horizontal" or "vertical".
669
+
670
+ Returns:
671
+ ndarray: The translate matrix with dtype float32.
672
+ """
673
+ if direction == 'horizontal':
674
+ translate_matrix = np.float32([[1, 0, offset], [0, 1, 0]])
675
+ elif direction == 'vertical':
676
+ translate_matrix = np.float32([[1, 0, 0], [0, 1, offset]])
677
+ return translate_matrix
678
+
679
+
680
+ def imtranslate(img,
681
+ offset,
682
+ direction='horizontal',
683
+ border_value=0,
684
+ interpolation='bilinear'):
685
+ """Translate an image.
686
+
687
+ Args:
688
+ img (ndarray): Image to be translated with format
689
+ (h, w) or (h, w, c).
690
+ offset (int | float): The offset used for translate.
691
+ direction (str): The translate direction, either "horizontal"
692
+ or "vertical".
693
+ border_value (int | tuple[int]): Value used in case of a
694
+ constant border.
695
+ interpolation (str): Same as :func:`resize`.
696
+
697
+ Returns:
698
+ ndarray: The translated image.
699
+ """
700
+ assert direction in ['horizontal',
701
+ 'vertical'], f'Invalid direction: {direction}'
702
+ height, width = img.shape[:2]
703
+ if img.ndim == 2:
704
+ channels = 1
705
+ elif img.ndim == 3:
706
+ channels = img.shape[-1]
707
+ if isinstance(border_value, int):
708
+ border_value = tuple([border_value] * channels)
709
+ elif isinstance(border_value, tuple):
710
+ assert len(border_value) == channels, \
711
+ 'Expected the num of elements in tuple equals the channels' \
712
+ 'of input image. Found {} vs {}'.format(
713
+ len(border_value), channels)
714
+ else:
715
+ raise ValueError(
716
+ f'Invalid type {type(border_value)} for `border_value`.')
717
+ translate_matrix = _get_translate_matrix(offset, direction)
718
+ translated = cv2.warpAffine(
719
+ img,
720
+ translate_matrix,
721
+ (width, height),
722
+ # Note case when the number elements in `border_value`
723
+ # greater than 3 (e.g. translating masks whose channels
724
+ # large than 3) will raise TypeError in `cv2.warpAffine`.
725
+ # Here simply slice the first 3 values in `border_value`.
726
+ borderValue=border_value[:3],
727
+ flags=cv2_interp_codes[interpolation])
728
+ return translated
groundingLMM/mmcv/mmcv/image/io.py ADDED
@@ -0,0 +1,314 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import io
3
+ import os.path as osp
4
+ import warnings
5
+ from pathlib import Path
6
+
7
+ import cv2
8
+ import numpy as np
9
+ from cv2 import (IMREAD_COLOR, IMREAD_GRAYSCALE, IMREAD_IGNORE_ORIENTATION,
10
+ IMREAD_UNCHANGED)
11
+
12
+ from mmcv.fileio import FileClient
13
+ from mmcv.utils import is_filepath, is_str
14
+
15
+ try:
16
+ from turbojpeg import TJCS_RGB, TJPF_BGR, TJPF_GRAY, TurboJPEG
17
+ except ImportError:
18
+ TJCS_RGB = TJPF_GRAY = TJPF_BGR = TurboJPEG = None
19
+
20
+ try:
21
+ from PIL import Image, ImageOps
22
+ except ImportError:
23
+ Image = None
24
+
25
+ try:
26
+ import tifffile
27
+ except ImportError:
28
+ tifffile = None
29
+
30
+ jpeg = None
31
+ supported_backends = ['cv2', 'turbojpeg', 'pillow', 'tifffile']
32
+
33
+ imread_flags = {
34
+ 'color': IMREAD_COLOR,
35
+ 'grayscale': IMREAD_GRAYSCALE,
36
+ 'unchanged': IMREAD_UNCHANGED,
37
+ 'color_ignore_orientation': IMREAD_IGNORE_ORIENTATION | IMREAD_COLOR,
38
+ 'grayscale_ignore_orientation':
39
+ IMREAD_IGNORE_ORIENTATION | IMREAD_GRAYSCALE
40
+ }
41
+
42
+ imread_backend = 'cv2'
43
+
44
+
45
+ def use_backend(backend):
46
+ """Select a backend for image decoding.
47
+
48
+ Args:
49
+ backend (str): The image decoding backend type. Options are `cv2`,
50
+ `pillow`, `turbojpeg` (see https://github.com/lilohuang/PyTurboJPEG)
51
+ and `tifffile`. `turbojpeg` is faster but it only supports `.jpeg`
52
+ file format.
53
+ """
54
+ assert backend in supported_backends
55
+ global imread_backend
56
+ imread_backend = backend
57
+ if imread_backend == 'turbojpeg':
58
+ if TurboJPEG is None:
59
+ raise ImportError('`PyTurboJPEG` is not installed')
60
+ global jpeg
61
+ if jpeg is None:
62
+ jpeg = TurboJPEG()
63
+ elif imread_backend == 'pillow':
64
+ if Image is None:
65
+ raise ImportError('`Pillow` is not installed')
66
+ elif imread_backend == 'tifffile':
67
+ if tifffile is None:
68
+ raise ImportError('`tifffile` is not installed')
69
+
70
+
71
+ def _jpegflag(flag='color', channel_order='bgr'):
72
+ channel_order = channel_order.lower()
73
+ if channel_order not in ['rgb', 'bgr']:
74
+ raise ValueError('channel order must be either "rgb" or "bgr"')
75
+
76
+ if flag == 'color':
77
+ if channel_order == 'bgr':
78
+ return TJPF_BGR
79
+ elif channel_order == 'rgb':
80
+ return TJCS_RGB
81
+ elif flag == 'grayscale':
82
+ return TJPF_GRAY
83
+ else:
84
+ raise ValueError('flag must be "color" or "grayscale"')
85
+
86
+
87
+ def _pillow2array(img, flag='color', channel_order='bgr'):
88
+ """Convert a pillow image to numpy array.
89
+
90
+ Args:
91
+ img (:obj:`PIL.Image.Image`): The image loaded using PIL
92
+ flag (str): Flags specifying the color type of a loaded image,
93
+ candidates are 'color', 'grayscale' and 'unchanged'.
94
+ Default to 'color'.
95
+ channel_order (str): The channel order of the output image array,
96
+ candidates are 'bgr' and 'rgb'. Default to 'bgr'.
97
+
98
+ Returns:
99
+ np.ndarray: The converted numpy array
100
+ """
101
+ channel_order = channel_order.lower()
102
+ if channel_order not in ['rgb', 'bgr']:
103
+ raise ValueError('channel order must be either "rgb" or "bgr"')
104
+
105
+ if flag == 'unchanged':
106
+ array = np.array(img)
107
+ if array.ndim >= 3 and array.shape[2] >= 3: # color image
108
+ array[:, :, :3] = array[:, :, (2, 1, 0)] # RGB to BGR
109
+ else:
110
+ # Handle exif orientation tag
111
+ if flag in ['color', 'grayscale']:
112
+ img = ImageOps.exif_transpose(img)
113
+ # If the image mode is not 'RGB', convert it to 'RGB' first.
114
+ if img.mode != 'RGB':
115
+ if img.mode != 'LA':
116
+ # Most formats except 'LA' can be directly converted to RGB
117
+ img = img.convert('RGB')
118
+ else:
119
+ # When the mode is 'LA', the default conversion will fill in
120
+ # the canvas with black, which sometimes shadows black objects
121
+ # in the foreground.
122
+ #
123
+ # Therefore, a random color (124, 117, 104) is used for canvas
124
+ img_rgba = img.convert('RGBA')
125
+ img = Image.new('RGB', img_rgba.size, (124, 117, 104))
126
+ img.paste(img_rgba, mask=img_rgba.split()[3]) # 3 is alpha
127
+ if flag in ['color', 'color_ignore_orientation']:
128
+ array = np.array(img)
129
+ if channel_order != 'rgb':
130
+ array = array[:, :, ::-1] # RGB to BGR
131
+ elif flag in ['grayscale', 'grayscale_ignore_orientation']:
132
+ img = img.convert('L')
133
+ array = np.array(img)
134
+ else:
135
+ raise ValueError(
136
+ 'flag must be "color", "grayscale", "unchanged", '
137
+ f'"color_ignore_orientation" or "grayscale_ignore_orientation"'
138
+ f' but got {flag}')
139
+ return array
140
+
141
+
142
+ def imread(img_or_path,
143
+ flag='color',
144
+ channel_order='bgr',
145
+ backend=None,
146
+ file_client_args=None):
147
+ """Read an image.
148
+
149
+ Note:
150
+ In v1.4.1 and later, add `file_client_args` parameters.
151
+
152
+ Args:
153
+ img_or_path (ndarray or str or Path): Either a numpy array or str or
154
+ pathlib.Path. If it is a numpy array (loaded image), then
155
+ it will be returned as is.
156
+ flag (str): Flags specifying the color type of a loaded image,
157
+ candidates are `color`, `grayscale`, `unchanged`,
158
+ `color_ignore_orientation` and `grayscale_ignore_orientation`.
159
+ By default, `cv2` and `pillow` backend would rotate the image
160
+ according to its EXIF info unless called with `unchanged` or
161
+ `*_ignore_orientation` flags. `turbojpeg` and `tifffile` backend
162
+ always ignore image's EXIF info regardless of the flag.
163
+ The `turbojpeg` backend only supports `color` and `grayscale`.
164
+ channel_order (str): Order of channel, candidates are `bgr` and `rgb`.
165
+ backend (str | None): The image decoding backend type. Options are
166
+ `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`.
167
+ If backend is None, the global imread_backend specified by
168
+ ``mmcv.use_backend()`` will be used. Default: None.
169
+ file_client_args (dict | None): Arguments to instantiate a
170
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
171
+ Default: None.
172
+
173
+ Returns:
174
+ ndarray: Loaded image array.
175
+
176
+ Examples:
177
+ >>> import mmcv
178
+ >>> img_path = '/path/to/img.jpg'
179
+ >>> img = mmcv.imread(img_path)
180
+ >>> img = mmcv.imread(img_path, flag='color', channel_order='rgb',
181
+ ... backend='cv2')
182
+ >>> img = mmcv.imread(img_path, flag='color', channel_order='bgr',
183
+ ... backend='pillow')
184
+ >>> s3_img_path = 's3://bucket/img.jpg'
185
+ >>> # infer the file backend by the prefix s3
186
+ >>> img = mmcv.imread(s3_img_path)
187
+ >>> # manually set the file backend petrel
188
+ >>> img = mmcv.imread(s3_img_path, file_client_args={
189
+ ... 'backend': 'petrel'})
190
+ >>> http_img_path = 'http://path/to/img.jpg'
191
+ >>> img = mmcv.imread(http_img_path)
192
+ >>> img = mmcv.imread(http_img_path, file_client_args={
193
+ ... 'backend': 'http'})
194
+ """
195
+
196
+ if isinstance(img_or_path, Path):
197
+ img_or_path = str(img_or_path)
198
+
199
+ if isinstance(img_or_path, np.ndarray):
200
+ return img_or_path
201
+ elif is_str(img_or_path):
202
+ file_client = FileClient.infer_client(file_client_args, img_or_path)
203
+ img_bytes = file_client.get(img_or_path)
204
+ return imfrombytes(img_bytes, flag, channel_order, backend)
205
+ else:
206
+ raise TypeError('"img" must be a numpy array or a str or '
207
+ 'a pathlib.Path object')
208
+
209
+
210
+ def imfrombytes(content, flag='color', channel_order='bgr', backend=None):
211
+ """Read an image from bytes.
212
+
213
+ Args:
214
+ content (bytes): Image bytes got from files or other streams.
215
+ flag (str): Same as :func:`imread`.
216
+ channel_order (str): The channel order of the output, candidates
217
+ are 'bgr' and 'rgb'. Default to 'bgr'.
218
+ backend (str | None): The image decoding backend type. Options are
219
+ `cv2`, `pillow`, `turbojpeg`, `tifffile`, `None`. If backend is
220
+ None, the global imread_backend specified by ``mmcv.use_backend()``
221
+ will be used. Default: None.
222
+
223
+ Returns:
224
+ ndarray: Loaded image array.
225
+
226
+ Examples:
227
+ >>> img_path = '/path/to/img.jpg'
228
+ >>> with open(img_path, 'rb') as f:
229
+ >>> img_buff = f.read()
230
+ >>> img = mmcv.imfrombytes(img_buff)
231
+ >>> img = mmcv.imfrombytes(img_buff, flag='color', channel_order='rgb')
232
+ >>> img = mmcv.imfrombytes(img_buff, backend='pillow')
233
+ >>> img = mmcv.imfrombytes(img_buff, backend='cv2')
234
+ """
235
+
236
+ if backend is None:
237
+ backend = imread_backend
238
+ if backend not in supported_backends:
239
+ raise ValueError(
240
+ f'backend: {backend} is not supported. Supported '
241
+ "backends are 'cv2', 'turbojpeg', 'pillow', 'tifffile'")
242
+ if backend == 'turbojpeg':
243
+ img = jpeg.decode(content, _jpegflag(flag, channel_order))
244
+ if img.shape[-1] == 1:
245
+ img = img[:, :, 0]
246
+ return img
247
+ elif backend == 'pillow':
248
+ with io.BytesIO(content) as buff:
249
+ img = Image.open(buff)
250
+ img = _pillow2array(img, flag, channel_order)
251
+ return img
252
+ elif backend == 'tifffile':
253
+ with io.BytesIO(content) as buff:
254
+ img = tifffile.imread(buff)
255
+ return img
256
+ else:
257
+ img_np = np.frombuffer(content, np.uint8)
258
+ flag = imread_flags[flag] if is_str(flag) else flag
259
+ img = cv2.imdecode(img_np, flag)
260
+ if flag == IMREAD_COLOR and channel_order == 'rgb':
261
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img)
262
+ return img
263
+
264
+
265
+ def imwrite(img,
266
+ file_path,
267
+ params=None,
268
+ auto_mkdir=None,
269
+ file_client_args=None):
270
+ """Write image to file.
271
+
272
+ Note:
273
+ In v1.4.1 and later, add `file_client_args` parameters.
274
+
275
+ Warning:
276
+ The parameter `auto_mkdir` will be deprecated in the future and every
277
+ file clients will make directory automatically.
278
+
279
+ Args:
280
+ img (ndarray): Image array to be written.
281
+ file_path (str): Image file path.
282
+ params (None or list): Same as opencv :func:`imwrite` interface.
283
+ auto_mkdir (bool): If the parent folder of `file_path` does not exist,
284
+ whether to create it automatically. It will be deprecated.
285
+ file_client_args (dict | None): Arguments to instantiate a
286
+ FileClient. See :class:`mmcv.fileio.FileClient` for details.
287
+ Default: None.
288
+
289
+ Returns:
290
+ bool: Successful or not.
291
+
292
+ Examples:
293
+ >>> # write to hard disk client
294
+ >>> ret = mmcv.imwrite(img, '/path/to/img.jpg')
295
+ >>> # infer the file backend by the prefix s3
296
+ >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg')
297
+ >>> # manually set the file backend petrel
298
+ >>> ret = mmcv.imwrite(img, 's3://bucket/img.jpg', file_client_args={
299
+ ... 'backend': 'petrel'})
300
+ """
301
+ assert is_filepath(file_path)
302
+ file_path = str(file_path)
303
+ if auto_mkdir is not None:
304
+ warnings.warn(
305
+ 'The parameter `auto_mkdir` will be deprecated in the future and '
306
+ 'every file clients will make directory automatically.')
307
+ file_client = FileClient.infer_client(file_client_args, file_path)
308
+ img_ext = osp.splitext(file_path)[-1]
309
+ # Encode image according to image suffix.
310
+ # For example, if image path is '/path/your/img.jpg', the encode
311
+ # format is '.jpg'.
312
+ flag, img_buff = cv2.imencode(img_ext, img, params)
313
+ file_client.put(img_buff.tobytes(), file_path)
314
+ return flag
groundingLMM/mmcv/mmcv/image/misc.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import numpy as np
3
+
4
+ import mmcv
5
+
6
+ try:
7
+ import torch
8
+ except ImportError:
9
+ torch = None
10
+
11
+
12
+ def tensor2imgs(tensor, mean=None, std=None, to_rgb=True):
13
+ """Convert tensor to 3-channel images or 1-channel gray images.
14
+
15
+ Args:
16
+ tensor (torch.Tensor): Tensor that contains multiple images, shape (
17
+ N, C, H, W). :math:`C` can be either 3 or 1.
18
+ mean (tuple[float], optional): Mean of images. If None,
19
+ (0, 0, 0) will be used for tensor with 3-channel,
20
+ while (0, ) for tensor with 1-channel. Defaults to None.
21
+ std (tuple[float], optional): Standard deviation of images. If None,
22
+ (1, 1, 1) will be used for tensor with 3-channel,
23
+ while (1, ) for tensor with 1-channel. Defaults to None.
24
+ to_rgb (bool, optional): Whether the tensor was converted to RGB
25
+ format in the first place. If so, convert it back to BGR.
26
+ For the tensor with 1 channel, it must be False. Defaults to True.
27
+
28
+ Returns:
29
+ list[np.ndarray]: A list that contains multiple images.
30
+ """
31
+
32
+ if torch is None:
33
+ raise RuntimeError('pytorch is not installed')
34
+ assert torch.is_tensor(tensor) and tensor.ndim == 4
35
+ channels = tensor.size(1)
36
+ assert channels in [1, 3]
37
+ if mean is None:
38
+ mean = (0, ) * channels
39
+ if std is None:
40
+ std = (1, ) * channels
41
+ assert (channels == len(mean) == len(std) == 3) or \
42
+ (channels == len(mean) == len(std) == 1 and not to_rgb)
43
+
44
+ num_imgs = tensor.size(0)
45
+ mean = np.array(mean, dtype=np.float32)
46
+ std = np.array(std, dtype=np.float32)
47
+ imgs = []
48
+ for img_id in range(num_imgs):
49
+ img = tensor[img_id, ...].cpu().numpy().transpose(1, 2, 0)
50
+ img = mmcv.imdenormalize(
51
+ img, mean, std, to_bgr=to_rgb).astype(np.uint8)
52
+ imgs.append(np.ascontiguousarray(img))
53
+ return imgs
groundingLMM/mmcv/mmcv/image/photometric.py ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import cv2
3
+ import numpy as np
4
+
5
+ from ..utils import is_tuple_of
6
+ from .colorspace import bgr2gray, gray2bgr
7
+
8
+
9
+ def imnormalize(img, mean, std, to_rgb=True):
10
+ """Normalize an image with mean and std.
11
+
12
+ Args:
13
+ img (ndarray): Image to be normalized.
14
+ mean (ndarray): The mean to be used for normalize.
15
+ std (ndarray): The std to be used for normalize.
16
+ to_rgb (bool): Whether to convert to rgb.
17
+
18
+ Returns:
19
+ ndarray: The normalized image.
20
+ """
21
+ img = img.copy().astype(np.float32)
22
+ return imnormalize_(img, mean, std, to_rgb)
23
+
24
+
25
+ def imnormalize_(img, mean, std, to_rgb=True):
26
+ """Inplace normalize an image with mean and std.
27
+
28
+ Args:
29
+ img (ndarray): Image to be normalized.
30
+ mean (ndarray): The mean to be used for normalize.
31
+ std (ndarray): The std to be used for normalize.
32
+ to_rgb (bool): Whether to convert to rgb.
33
+
34
+ Returns:
35
+ ndarray: The normalized image.
36
+ """
37
+ # cv2 inplace normalization does not accept uint8
38
+ assert img.dtype != np.uint8
39
+ mean = np.float64(mean.reshape(1, -1))
40
+ stdinv = 1 / np.float64(std.reshape(1, -1))
41
+ if to_rgb:
42
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace
43
+ cv2.subtract(img, mean, img) # inplace
44
+ cv2.multiply(img, stdinv, img) # inplace
45
+ return img
46
+
47
+
48
+ def imdenormalize(img, mean, std, to_bgr=True):
49
+ assert img.dtype != np.uint8
50
+ mean = mean.reshape(1, -1).astype(np.float64)
51
+ std = std.reshape(1, -1).astype(np.float64)
52
+ img = cv2.multiply(img, std) # make a copy
53
+ cv2.add(img, mean, img) # inplace
54
+ if to_bgr:
55
+ cv2.cvtColor(img, cv2.COLOR_RGB2BGR, img) # inplace
56
+ return img
57
+
58
+
59
+ def iminvert(img):
60
+ """Invert (negate) an image.
61
+
62
+ Args:
63
+ img (ndarray): Image to be inverted.
64
+
65
+ Returns:
66
+ ndarray: The inverted image.
67
+ """
68
+ return np.full_like(img, 255) - img
69
+
70
+
71
+ def solarize(img, thr=128):
72
+ """Solarize an image (invert all pixel values above a threshold)
73
+
74
+ Args:
75
+ img (ndarray): Image to be solarized.
76
+ thr (int): Threshold for solarizing (0 - 255).
77
+
78
+ Returns:
79
+ ndarray: The solarized image.
80
+ """
81
+ img = np.where(img < thr, img, 255 - img)
82
+ return img
83
+
84
+
85
+ def posterize(img, bits):
86
+ """Posterize an image (reduce the number of bits for each color channel)
87
+
88
+ Args:
89
+ img (ndarray): Image to be posterized.
90
+ bits (int): Number of bits (1 to 8) to use for posterizing.
91
+
92
+ Returns:
93
+ ndarray: The posterized image.
94
+ """
95
+ shift = 8 - bits
96
+ img = np.left_shift(np.right_shift(img, shift), shift)
97
+ return img
98
+
99
+
100
+ def adjust_color(img, alpha=1, beta=None, gamma=0):
101
+ r"""It blends the source image and its gray image:
102
+
103
+ .. math::
104
+ output = img * alpha + gray\_img * beta + gamma
105
+
106
+ Args:
107
+ img (ndarray): The input source image.
108
+ alpha (int | float): Weight for the source image. Default 1.
109
+ beta (int | float): Weight for the converted gray image.
110
+ If None, it's assigned the value (1 - `alpha`).
111
+ gamma (int | float): Scalar added to each sum.
112
+ Same as :func:`cv2.addWeighted`. Default 0.
113
+
114
+ Returns:
115
+ ndarray: Colored image which has the same size and dtype as input.
116
+ """
117
+ gray_img = bgr2gray(img)
118
+ gray_img = np.tile(gray_img[..., None], [1, 1, 3])
119
+ if beta is None:
120
+ beta = 1 - alpha
121
+ colored_img = cv2.addWeighted(img, alpha, gray_img, beta, gamma)
122
+ if not colored_img.dtype == np.uint8:
123
+ # Note when the dtype of `img` is not the default `np.uint8`
124
+ # (e.g. np.float32), the value in `colored_img` got from cv2
125
+ # is not guaranteed to be in range [0, 255], so here clip
126
+ # is needed.
127
+ colored_img = np.clip(colored_img, 0, 255)
128
+ return colored_img
129
+
130
+
131
+ def imequalize(img):
132
+ """Equalize the image histogram.
133
+
134
+ This function applies a non-linear mapping to the input image,
135
+ in order to create a uniform distribution of grayscale values
136
+ in the output image.
137
+
138
+ Args:
139
+ img (ndarray): Image to be equalized.
140
+
141
+ Returns:
142
+ ndarray: The equalized image.
143
+ """
144
+
145
+ def _scale_channel(im, c):
146
+ """Scale the data in the corresponding channel."""
147
+ im = im[:, :, c]
148
+ # Compute the histogram of the image channel.
149
+ histo = np.histogram(im, 256, (0, 255))[0]
150
+ # For computing the step, filter out the nonzeros.
151
+ nonzero_histo = histo[histo > 0]
152
+ step = (np.sum(nonzero_histo) - nonzero_histo[-1]) // 255
153
+ if not step:
154
+ lut = np.array(range(256))
155
+ else:
156
+ # Compute the cumulative sum, shifted by step // 2
157
+ # and then normalized by step.
158
+ lut = (np.cumsum(histo) + (step // 2)) // step
159
+ # Shift lut, prepending with 0.
160
+ lut = np.concatenate([[0], lut[:-1]], 0)
161
+ # handle potential integer overflow
162
+ lut[lut > 255] = 255
163
+ # If step is zero, return the original image.
164
+ # Otherwise, index from lut.
165
+ return np.where(np.equal(step, 0), im, lut[im])
166
+
167
+ # Scales each channel independently and then stacks
168
+ # the result.
169
+ s1 = _scale_channel(img, 0)
170
+ s2 = _scale_channel(img, 1)
171
+ s3 = _scale_channel(img, 2)
172
+ equalized_img = np.stack([s1, s2, s3], axis=-1)
173
+ return equalized_img.astype(img.dtype)
174
+
175
+
176
+ def adjust_brightness(img, factor=1.):
177
+ """Adjust image brightness.
178
+
179
+ This function controls the brightness of an image. An
180
+ enhancement factor of 0.0 gives a black image.
181
+ A factor of 1.0 gives the original image. This function
182
+ blends the source image and the degenerated black image:
183
+
184
+ .. math::
185
+ output = img * factor + degenerated * (1 - factor)
186
+
187
+ Args:
188
+ img (ndarray): Image to be brightened.
189
+ factor (float): A value controls the enhancement.
190
+ Factor 1.0 returns the original image, lower
191
+ factors mean less color (brightness, contrast,
192
+ etc), and higher values more. Default 1.
193
+
194
+ Returns:
195
+ ndarray: The brightened image.
196
+ """
197
+ degenerated = np.zeros_like(img)
198
+ # Note manually convert the dtype to np.float32, to
199
+ # achieve as close results as PIL.ImageEnhance.Brightness.
200
+ # Set beta=1-factor, and gamma=0
201
+ brightened_img = cv2.addWeighted(
202
+ img.astype(np.float32), factor, degenerated.astype(np.float32),
203
+ 1 - factor, 0)
204
+ brightened_img = np.clip(brightened_img, 0, 255)
205
+ return brightened_img.astype(img.dtype)
206
+
207
+
208
+ def adjust_contrast(img, factor=1.):
209
+ """Adjust image contrast.
210
+
211
+ This function controls the contrast of an image. An
212
+ enhancement factor of 0.0 gives a solid grey
213
+ image. A factor of 1.0 gives the original image. It
214
+ blends the source image and the degenerated mean image:
215
+
216
+ .. math::
217
+ output = img * factor + degenerated * (1 - factor)
218
+
219
+ Args:
220
+ img (ndarray): Image to be contrasted. BGR order.
221
+ factor (float): Same as :func:`mmcv.adjust_brightness`.
222
+
223
+ Returns:
224
+ ndarray: The contrasted image.
225
+ """
226
+ gray_img = bgr2gray(img)
227
+ hist = np.histogram(gray_img, 256, (0, 255))[0]
228
+ mean = round(np.sum(gray_img) / np.sum(hist))
229
+ degenerated = (np.ones_like(img[..., 0]) * mean).astype(img.dtype)
230
+ degenerated = gray2bgr(degenerated)
231
+ contrasted_img = cv2.addWeighted(
232
+ img.astype(np.float32), factor, degenerated.astype(np.float32),
233
+ 1 - factor, 0)
234
+ contrasted_img = np.clip(contrasted_img, 0, 255)
235
+ return contrasted_img.astype(img.dtype)
236
+
237
+
238
+ def auto_contrast(img, cutoff=0):
239
+ """Auto adjust image contrast.
240
+
241
+ This function maximize (normalize) image contrast by first removing cutoff
242
+ percent of the lightest and darkest pixels from the histogram and remapping
243
+ the image so that the darkest pixel becomes black (0), and the lightest
244
+ becomes white (255).
245
+
246
+ Args:
247
+ img (ndarray): Image to be contrasted. BGR order.
248
+ cutoff (int | float | tuple): The cutoff percent of the lightest and
249
+ darkest pixels to be removed. If given as tuple, it shall be
250
+ (low, high). Otherwise, the single value will be used for both.
251
+ Defaults to 0.
252
+
253
+ Returns:
254
+ ndarray: The contrasted image.
255
+ """
256
+
257
+ def _auto_contrast_channel(im, c, cutoff):
258
+ im = im[:, :, c]
259
+ # Compute the histogram of the image channel.
260
+ histo = np.histogram(im, 256, (0, 255))[0]
261
+ # Remove cut-off percent pixels from histo
262
+ histo_sum = np.cumsum(histo)
263
+ cut_low = histo_sum[-1] * cutoff[0] // 100
264
+ cut_high = histo_sum[-1] - histo_sum[-1] * cutoff[1] // 100
265
+ histo_sum = np.clip(histo_sum, cut_low, cut_high) - cut_low
266
+ histo = np.concatenate([[histo_sum[0]], np.diff(histo_sum)], 0)
267
+
268
+ # Compute mapping
269
+ low, high = np.nonzero(histo)[0][0], np.nonzero(histo)[0][-1]
270
+ # If all the values have been cut off, return the origin img
271
+ if low >= high:
272
+ return im
273
+ scale = 255.0 / (high - low)
274
+ offset = -low * scale
275
+ lut = np.array(range(256))
276
+ lut = lut * scale + offset
277
+ lut = np.clip(lut, 0, 255)
278
+ return lut[im]
279
+
280
+ if isinstance(cutoff, (int, float)):
281
+ cutoff = (cutoff, cutoff)
282
+ else:
283
+ assert isinstance(cutoff, tuple), 'cutoff must be of type int, ' \
284
+ f'float or tuple, but got {type(cutoff)} instead.'
285
+ # Auto adjusts contrast for each channel independently and then stacks
286
+ # the result.
287
+ s1 = _auto_contrast_channel(img, 0, cutoff)
288
+ s2 = _auto_contrast_channel(img, 1, cutoff)
289
+ s3 = _auto_contrast_channel(img, 2, cutoff)
290
+ contrasted_img = np.stack([s1, s2, s3], axis=-1)
291
+ return contrasted_img.astype(img.dtype)
292
+
293
+
294
+ def adjust_sharpness(img, factor=1., kernel=None):
295
+ """Adjust image sharpness.
296
+
297
+ This function controls the sharpness of an image. An
298
+ enhancement factor of 0.0 gives a blurred image. A
299
+ factor of 1.0 gives the original image. And a factor
300
+ of 2.0 gives a sharpened image. It blends the source
301
+ image and the degenerated mean image:
302
+
303
+ .. math::
304
+ output = img * factor + degenerated * (1 - factor)
305
+
306
+ Args:
307
+ img (ndarray): Image to be sharpened. BGR order.
308
+ factor (float): Same as :func:`mmcv.adjust_brightness`.
309
+ kernel (np.ndarray, optional): Filter kernel to be applied on the img
310
+ to obtain the degenerated img. Defaults to None.
311
+
312
+ Note:
313
+ No value sanity check is enforced on the kernel set by users. So with
314
+ an inappropriate kernel, the ``adjust_sharpness`` may fail to perform
315
+ the function its name indicates but end up performing whatever
316
+ transform determined by the kernel.
317
+
318
+ Returns:
319
+ ndarray: The sharpened image.
320
+ """
321
+
322
+ if kernel is None:
323
+ # adopted from PIL.ImageFilter.SMOOTH
324
+ kernel = np.array([[1., 1., 1.], [1., 5., 1.], [1., 1., 1.]]) / 13
325
+ assert isinstance(kernel, np.ndarray), \
326
+ f'kernel must be of type np.ndarray, but got {type(kernel)} instead.'
327
+ assert kernel.ndim == 2, \
328
+ f'kernel must have a dimension of 2, but got {kernel.ndim} instead.'
329
+
330
+ degenerated = cv2.filter2D(img, -1, kernel)
331
+ sharpened_img = cv2.addWeighted(
332
+ img.astype(np.float32), factor, degenerated.astype(np.float32),
333
+ 1 - factor, 0)
334
+ sharpened_img = np.clip(sharpened_img, 0, 255)
335
+ return sharpened_img.astype(img.dtype)
336
+
337
+
338
+ def adjust_lighting(img, eigval, eigvec, alphastd=0.1, to_rgb=True):
339
+ """AlexNet-style PCA jitter.
340
+
341
+ This data augmentation is proposed in `ImageNet Classification with Deep
342
+ Convolutional Neural Networks
343
+ <https://dl.acm.org/doi/pdf/10.1145/3065386>`_.
344
+
345
+ Args:
346
+ img (ndarray): Image to be adjusted lighting. BGR order.
347
+ eigval (ndarray): the eigenvalue of the convariance matrix of pixel
348
+ values, respectively.
349
+ eigvec (ndarray): the eigenvector of the convariance matrix of pixel
350
+ values, respectively.
351
+ alphastd (float): The standard deviation for distribution of alpha.
352
+ Defaults to 0.1
353
+ to_rgb (bool): Whether to convert img to rgb.
354
+
355
+ Returns:
356
+ ndarray: The adjusted image.
357
+ """
358
+ assert isinstance(eigval, np.ndarray) and isinstance(eigvec, np.ndarray), \
359
+ f'eigval and eigvec should both be of type np.ndarray, got ' \
360
+ f'{type(eigval)} and {type(eigvec)} instead.'
361
+
362
+ assert eigval.ndim == 1 and eigvec.ndim == 2
363
+ assert eigvec.shape == (3, eigval.shape[0])
364
+ n_eigval = eigval.shape[0]
365
+ assert isinstance(alphastd, float), 'alphastd should be of type float, ' \
366
+ f'got {type(alphastd)} instead.'
367
+
368
+ img = img.copy().astype(np.float32)
369
+ if to_rgb:
370
+ cv2.cvtColor(img, cv2.COLOR_BGR2RGB, img) # inplace
371
+
372
+ alpha = np.random.normal(0, alphastd, n_eigval)
373
+ alter = eigvec \
374
+ * np.broadcast_to(alpha.reshape(1, n_eigval), (3, n_eigval)) \
375
+ * np.broadcast_to(eigval.reshape(1, n_eigval), (3, n_eigval))
376
+ alter = np.broadcast_to(alter.sum(axis=1).reshape(1, 1, 3), img.shape)
377
+ img_adjusted = img + alter
378
+ return img_adjusted
379
+
380
+
381
+ def lut_transform(img, lut_table):
382
+ """Transform array by look-up table.
383
+
384
+ The function lut_transform fills the output array with values from the
385
+ look-up table. Indices of the entries are taken from the input array.
386
+
387
+ Args:
388
+ img (ndarray): Image to be transformed.
389
+ lut_table (ndarray): look-up table of 256 elements; in case of
390
+ multi-channel input array, the table should either have a single
391
+ channel (in this case the same table is used for all channels) or
392
+ the same number of channels as in the input array.
393
+
394
+ Returns:
395
+ ndarray: The transformed image.
396
+ """
397
+ assert isinstance(img, np.ndarray)
398
+ assert 0 <= np.min(img) and np.max(img) <= 255
399
+ assert isinstance(lut_table, np.ndarray)
400
+ assert lut_table.shape == (256, )
401
+
402
+ return cv2.LUT(np.array(img, dtype=np.uint8), lut_table)
403
+
404
+
405
+ def clahe(img, clip_limit=40.0, tile_grid_size=(8, 8)):
406
+ """Use CLAHE method to process the image.
407
+
408
+ See `ZUIDERVELD,K. Contrast Limited Adaptive Histogram Equalization[J].
409
+ Graphics Gems, 1994:474-485.` for more information.
410
+
411
+ Args:
412
+ img (ndarray): Image to be processed.
413
+ clip_limit (float): Threshold for contrast limiting. Default: 40.0.
414
+ tile_grid_size (tuple[int]): Size of grid for histogram equalization.
415
+ Input image will be divided into equally sized rectangular tiles.
416
+ It defines the number of tiles in row and column. Default: (8, 8).
417
+
418
+ Returns:
419
+ ndarray: The processed image.
420
+ """
421
+ assert isinstance(img, np.ndarray)
422
+ assert img.ndim == 2
423
+ assert isinstance(clip_limit, (float, int))
424
+ assert is_tuple_of(tile_grid_size, int)
425
+ assert len(tile_grid_size) == 2
426
+
427
+ clahe = cv2.createCLAHE(clip_limit, tile_grid_size)
428
+ return clahe.apply(np.array(img, dtype=np.uint8))
groundingLMM/mmcv/mmcv/model_zoo/deprecated.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "resnet50_caffe": "detectron/resnet50_caffe",
3
+ "resnet50_caffe_bgr": "detectron2/resnet50_caffe_bgr",
4
+ "resnet101_caffe": "detectron/resnet101_caffe",
5
+ "resnet101_caffe_bgr": "detectron2/resnet101_caffe_bgr"
6
+ }
groundingLMM/mmcv/mmcv/model_zoo/mmcls.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vgg11": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_batch256_imagenet_20210208-4271cd6c.pth",
3
+ "vgg13": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_batch256_imagenet_20210208-4d1d6080.pth",
4
+ "vgg16": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_batch256_imagenet_20210208-db26f1a5.pth",
5
+ "vgg19": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_batch256_imagenet_20210208-e6920e4a.pth",
6
+ "vgg11_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg11_bn_batch256_imagenet_20210207-f244902c.pth",
7
+ "vgg13_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg13_bn_batch256_imagenet_20210207-1a8b7864.pth",
8
+ "vgg16_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg16_bn_batch256_imagenet_20210208-7e55cd29.pth",
9
+ "vgg19_bn": "https://download.openmmlab.com/mmclassification/v0/vgg/vgg19_bn_batch256_imagenet_20210208-da620c4f.pth",
10
+ "resnet18": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth",
11
+ "resnet34": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet34_8xb32_in1k_20210831-f257d4e6.pth",
12
+ "resnet50": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth",
13
+ "resnet101": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_8xb32_in1k_20210831-539c63f8.pth",
14
+ "resnet152": "https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_8xb32_in1k_20210901-4d7582fa.pth",
15
+ "resnet50_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_b32x8_imagenet_20210531-db14775a.pth",
16
+ "resnet101_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_b32x8_imagenet_20210531-6e13bcd3.pth",
17
+ "resnet152_v1d": "https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_b32x8_imagenet_20210531-278cf22a.pth",
18
+ "resnext50_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext50_32x4d_b32x8_imagenet_20210429-56066e27.pth",
19
+ "resnext101_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x4d_b32x8_imagenet_20210506-e0fa3dd5.pth",
20
+ "resnext101_32x8d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext101_32x8d_b32x8_imagenet_20210506-23a247d5.pth",
21
+ "resnext152_32x4d": "https://download.openmmlab.com/mmclassification/v0/resnext/resnext152_32x4d_b32x8_imagenet_20210524-927787be.pth",
22
+ "se-resnet50": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet50_batch256_imagenet_20200804-ae206104.pth",
23
+ "se-resnet101": "https://download.openmmlab.com/mmclassification/v0/se-resnet/se-resnet101_batch256_imagenet_20200804-ba5b51d4.pth",
24
+ "resnest50": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth",
25
+ "resnest101": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth",
26
+ "resnest200": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth",
27
+ "resnest269": "https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth",
28
+ "shufflenet_v1": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth",
29
+ "shufflenet_v2": "https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth",
30
+ "mobilenet_v2": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth",
31
+ "mobilenet_v3_small": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth",
32
+ "mobilenet_v3_large": "https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_large-3ea3c186.pth",
33
+ "repvgg_A0": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A0_3rdparty_4xb64-coslr-120e_in1k_20210909-883ab98c.pth",
34
+ "repvgg_A1": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A1_3rdparty_4xb64-coslr-120e_in1k_20210909-24003a24.pth",
35
+ "repvgg_A2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-A2_3rdparty_4xb64-coslr-120e_in1k_20210909-97d7695a.pth",
36
+ "repvgg_B0": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B0_3rdparty_4xb64-coslr-120e_in1k_20210909-446375f4.pth",
37
+ "repvgg_B1": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1_3rdparty_4xb64-coslr-120e_in1k_20210909-750cdf67.pth",
38
+ "repvgg_B1g2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g2_3rdparty_4xb64-coslr-120e_in1k_20210909-344f6422.pth",
39
+ "repvgg_B1g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B1g4_3rdparty_4xb64-coslr-120e_in1k_20210909-d4c1a642.pth",
40
+ "repvgg_B2": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2_3rdparty_4xb64-coslr-120e_in1k_20210909-bd6b937c.pth",
41
+ "repvgg_B2g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B2g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-7b7955f0.pth",
42
+ "repvgg_B3": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-dda968bf.pth",
43
+ "repvgg_B3g4": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-B3g4_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-4e54846a.pth",
44
+ "repvgg_D2se": "https://download.openmmlab.com/mmclassification/v0/repvgg/repvgg-D2se_3rdparty_4xb64-autoaug-lbs-mixup-coslr-200e_in1k_20210909-cf3139b7.pth",
45
+ "res2net101_w26": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net101-w26-s4_3rdparty_8xb32_in1k_20210927-870b6c36.pth",
46
+ "res2net50_w14": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w14-s8_3rdparty_8xb32_in1k_20210927-bc967bf1.pth",
47
+ "res2net50_w26": "https://download.openmmlab.com/mmclassification/v0/res2net/res2net50-w26-s8_3rdparty_8xb32_in1k_20210927-f547a94b.pth",
48
+ "swin_tiny": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth",
49
+ "swin_small": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth",
50
+ "swin_base": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_base_patch4_window7_224_22kto1k-f967f799.pth",
51
+ "swin_large": "https://download.openmmlab.com/mmclassification/v0/swin-transformer/convert/swin_large_patch4_window7_224_22kto1k-5f0996db.pth",
52
+ "t2t_vit_t_14": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-14_3rdparty_8xb64_in1k_20210928-b7c09b62.pth",
53
+ "t2t_vit_t_19": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-19_3rdparty_8xb64_in1k_20210928-7f1478d5.pth",
54
+ "t2t_vit_t_24": "https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-24_3rdparty_8xb64_in1k_20210928-fe95a61b.pth",
55
+ "tnt_small": "https://download.openmmlab.com/mmclassification/v0/tnt/tnt-small-p16_3rdparty_in1k_20210903-c56ee7df.pth",
56
+ "vit_base_p16": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-98e8652b.pth",
57
+ "vit_base_p32": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p32_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-9cea8599.pth",
58
+ "vit_large_p16": "https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-large-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-b20ba619.pth"
59
+ }
groundingLMM/mmcv/mmcv/model_zoo/open_mmlab.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "vgg16_caffe": "https://download.openmmlab.com/pretrain/third_party/vgg16_caffe-292e1171.pth",
3
+ "detectron/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_caffe-788b5fa3.pth",
4
+ "detectron2/resnet50_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet50_msra-5891d200.pth",
5
+ "detectron/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_caffe-3ad79236.pth",
6
+ "detectron2/resnet101_caffe": "https://download.openmmlab.com/pretrain/third_party/resnet101_msra-6cc46731.pth",
7
+ "detectron2/resnext101_32x8d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x8d-1516f1aa.pth",
8
+ "resnext50_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext50-32x4d-0ab1a123.pth",
9
+ "resnext101_32x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d-a5af3160.pth",
10
+ "resnext101_64x4d": "https://download.openmmlab.com/pretrain/third_party/resnext101_64x4d-ee2c6f71.pth",
11
+ "contrib/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_thangvubk-ad1730dd.pth",
12
+ "detectron/resnet50_gn": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn-9186a21c.pth",
13
+ "detectron/resnet101_gn": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn-cac0ab98.pth",
14
+ "jhu/resnet50_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet50_gn_ws-15beedd8.pth",
15
+ "jhu/resnet101_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnet101_gn_ws-3e3c308c.pth",
16
+ "jhu/resnext50_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn_ws-0d87ac85.pth",
17
+ "jhu/resnext101_32x4d_gn_ws": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn_ws-34ac1a9e.pth",
18
+ "jhu/resnext50_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext50_32x4d_gn-c7e8b754.pth",
19
+ "jhu/resnext101_32x4d_gn": "https://download.openmmlab.com/pretrain/third_party/resnext101_32x4d_gn-ac3bb84e.pth",
20
+ "msra/hrnetv2_w18_small": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18_small-b5a04e21.pth",
21
+ "msra/hrnetv2_w18": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w18-00eb2006.pth",
22
+ "msra/hrnetv2_w32": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w32-dc9eeb4f.pth",
23
+ "msra/hrnetv2_w40": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w40-ed0b031c.pth",
24
+ "msra/hrnetv2_w48": "https://download.openmmlab.com/pretrain/third_party/hrnetv2_w48-d2186c55.pth",
25
+ "bninception_caffe": "https://download.openmmlab.com/pretrain/third_party/bn_inception_caffe-ed2e8665.pth",
26
+ "kin400/i3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/i3d_r50_f32s2_k400-2c57e077.pth",
27
+ "kin400/nl3d_r50_f32s2_k400": "https://download.openmmlab.com/pretrain/third_party/nl3d_r50_f32s2_k400-fa7e7caa.pth",
28
+ "res2net101_v1d_26w_4s": "https://download.openmmlab.com/pretrain/third_party/res2net101_v1d_26w_4s_mmdetv2-f0a600f9.pth",
29
+ "regnetx_400mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_400mf-a5b10d96.pth",
30
+ "regnetx_800mf": "https://download.openmmlab.com/pretrain/third_party/regnetx_800mf-1f4be4c7.pth",
31
+ "regnetx_1.6gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_1.6gf-5791c176.pth",
32
+ "regnetx_3.2gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_3.2gf-c2599b0f.pth",
33
+ "regnetx_4.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_4.0gf-a88f671e.pth",
34
+ "regnetx_6.4gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_6.4gf-006af45d.pth",
35
+ "regnetx_8.0gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_8.0gf-3c68abe7.pth",
36
+ "regnetx_12gf": "https://download.openmmlab.com/pretrain/third_party/regnetx_12gf-4c2a3350.pth",
37
+ "resnet18_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet18_v1c-b5776b93.pth",
38
+ "resnet50_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet50_v1c-2cccc1ad.pth",
39
+ "resnet101_v1c": "https://download.openmmlab.com/pretrain/third_party/resnet101_v1c-e67eebb6.pth",
40
+ "mmedit/vgg16": "https://download.openmmlab.com/mmediting/third_party/vgg_state_dict.pth",
41
+ "mmedit/res34_en_nomixup": "https://download.openmmlab.com/mmediting/third_party/model_best_resnet34_En_nomixup.pth",
42
+ "mmedit/mobilenet_v2": "https://download.openmmlab.com/mmediting/third_party/mobilenet_v2.pth",
43
+ "contrib/mobilenet_v3_large": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_large-bc2c3fd3.pth",
44
+ "contrib/mobilenet_v3_small": "https://download.openmmlab.com/pretrain/third_party/mobilenet_v3_small-47085aa1.pth",
45
+ "resnest50": "https://download.openmmlab.com/pretrain/third_party/resnest50_d2-7497a55b.pth",
46
+ "resnest101": "https://download.openmmlab.com/pretrain/third_party/resnest101_d2-f3b931b2.pth",
47
+ "resnest200": "https://download.openmmlab.com/pretrain/third_party/resnest200_d2-ca88e41f.pth",
48
+ "darknet53": "https://download.openmmlab.com/pretrain/third_party/darknet53-a628ea1b.pth",
49
+ "mmdet/mobilenet_v2": "https://download.openmmlab.com/mmdetection/v2.0/third_party/mobilenet_v2_batch256_imagenet-ff34753d.pth"
50
+ }
groundingLMM/mmcv/mmcv/onnx/__init__.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .info import is_custom_op_loaded
3
+ from .symbolic import register_extra_symbolics
4
+
5
+ __all__ = ['register_extra_symbolics', 'is_custom_op_loaded']
groundingLMM/mmcv/mmcv/onnx/info.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import os
3
+ import warnings
4
+
5
+ import torch
6
+
7
+
8
+ def is_custom_op_loaded():
9
+
10
+ # Following strings of text style are from colorama package
11
+ bright_style, reset_style = '\x1b[1m', '\x1b[0m'
12
+ red_text, blue_text = '\x1b[31m', '\x1b[34m'
13
+ white_background = '\x1b[107m'
14
+
15
+ msg = white_background + bright_style + red_text
16
+ msg += 'DeprecationWarning: This function will be deprecated in future. '
17
+ msg += blue_text + 'Welcome to use the unified model deployment toolbox '
18
+ msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
19
+ msg += reset_style
20
+ warnings.warn(msg)
21
+
22
+ flag = False
23
+ try:
24
+ from ..tensorrt import is_tensorrt_plugin_loaded
25
+ flag = is_tensorrt_plugin_loaded()
26
+ except (ImportError, ModuleNotFoundError):
27
+ pass
28
+ if not flag:
29
+ try:
30
+ from ..ops import get_onnxruntime_op_path
31
+ ort_lib_path = get_onnxruntime_op_path()
32
+ flag = os.path.exists(ort_lib_path)
33
+ except (ImportError, ModuleNotFoundError):
34
+ pass
35
+ return flag or torch.__version__ == 'parrots'
groundingLMM/mmcv/mmcv/onnx/symbolic.py ADDED
@@ -0,0 +1,509 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ """Modified from https://github.com/pytorch/pytorch."""
3
+ import os
4
+ import warnings
5
+
6
+ import numpy as np
7
+ import torch
8
+ from torch.nn.modules.utils import _pair, _single, _triple
9
+ from torch.onnx.symbolic_helper import parse_args
10
+ from torch.onnx.symbolic_registry import register_op
11
+
12
+ from .onnx_utils import symbolic_helper as sym_help
13
+
14
+
15
+ def _interpolate(name, dim, interpolate_mode):
16
+
17
+ def symbolic_fn(g, input, output_size, *args):
18
+ scales, align_corners = sym_help._get_interpolate_attributes(
19
+ g, interpolate_mode, args)
20
+ align_corners = sym_help._maybe_get_scalar(align_corners)
21
+ transformation_mode = 'asymmetric' \
22
+ if interpolate_mode == 'nearest' \
23
+ else 'align_corners' if align_corners else 'pytorch_half_pixel'
24
+ empty_tensor = g.op(
25
+ 'Constant', value_t=torch.tensor([], dtype=torch.float32))
26
+
27
+ if scales is None:
28
+ if 'ONNX_BACKEND' in os.environ and os.environ[
29
+ 'ONNX_BACKEND'] == 'TensorRT':
30
+ input_size = input.type().sizes()
31
+ # slice the first two dim
32
+ input_size = input_size[:2]
33
+ # convert output_size to int type
34
+ output_size = sym_help._maybe_get_const(output_size, 'is')
35
+ input_size.extend(output_size)
36
+ output_size = g.op(
37
+ 'Constant',
38
+ value_t=torch.tensor(input_size, dtype=torch.int64))
39
+ else:
40
+ input_size = g.op('Shape', input)
41
+ input_size_beg = sym_help._slice_helper(
42
+ g, input_size, axes=[0], ends=[2], starts=[0])
43
+ output_size = g.op(
44
+ 'Cast',
45
+ output_size,
46
+ to_i=sym_help.cast_pytorch_to_onnx['Long'])
47
+ output_size = g.op(
48
+ 'Concat', input_size_beg, output_size, axis_i=0)
49
+ scales = g.op(
50
+ 'Constant', value_t=torch.tensor([], dtype=torch.float32))
51
+ return g.op(
52
+ 'Resize',
53
+ input,
54
+ empty_tensor,
55
+ # roi only takes effect with
56
+ # coordinate_transformation_mode="tf_crop_and_resize"
57
+ scales, # scales is not needed since we are sending out_size
58
+ output_size,
59
+ coordinate_transformation_mode_s=transformation_mode,
60
+ cubic_coeff_a_f=-0.75, # only valid when mode="cubic"
61
+ mode_s=interpolate_mode, # nearest, linear, or cubic
62
+ nearest_mode_s='floor') # only valid when mode="nearest"
63
+ else:
64
+ return g.op(
65
+ 'Resize',
66
+ input,
67
+ empty_tensor,
68
+ # roi only takes effect with
69
+ # coordinate_transformation_mode="tf_crop_and_resize"
70
+ scales, # scales is not needed since we are sending out_size
71
+ coordinate_transformation_mode_s=transformation_mode,
72
+ cubic_coeff_a_f=-0.75, # only valid when mode="cubic"
73
+ mode_s=interpolate_mode, # nearest, linear, or cubic
74
+ nearest_mode_s='floor') # only valid when mode="nearest"
75
+
76
+ return symbolic_fn
77
+
78
+
79
+ upsample_nearest1d = _interpolate('upsample_nearest1d', 3, 'nearest')
80
+ upsample_nearest2d = _interpolate('upsample_nearest2d', 4, 'nearest')
81
+ upsample_nearest3d = _interpolate('upsample_nearest3d', 5, 'nearest')
82
+ upsample_linear1d = _interpolate('upsample_linear1d', 3, 'linear')
83
+ upsample_bilinear2d = _interpolate('upsample_bilinear2d', 4, 'linear')
84
+ upsample_trilinear3d = _interpolate('upsample_trilinear3d', 5, 'linear')
85
+ upsample_bicubic2d = _interpolate('upsample_bicubic2d', 4, 'cubic')
86
+
87
+
88
+ @parse_args('v', 'v', 'i', 'i', 'i', 'none')
89
+ def topk(g, self, k, dim, largest, sorted, out=None):
90
+ return sym_help._topk_helper(
91
+ g, self, k, dim, largest=largest, sorted=sorted, out=out)
92
+
93
+
94
+ def masked_select(g, self, mask):
95
+ from torch.onnx.symbolic_opset9 import expand_as, nonzero
96
+ index = nonzero(g, expand_as(g, mask, self))
97
+ return g.op('GatherND', self, index)
98
+
99
+
100
+ def _prepare_onnx_paddings(g, dim, pad):
101
+ pad_len = torch.onnx.symbolic_opset9.size(
102
+ g, pad, g.op('Constant', value_t=torch.tensor([0])))
103
+ # Set extension = [0] * (dim * 2 - len(pad))
104
+ extension = g.op(
105
+ 'Sub',
106
+ g.op('Mul',
107
+ g.op('Constant', value_t=torch.tensor(dim, dtype=torch.int64)),
108
+ g.op('Constant', value_t=torch.tensor(2, dtype=torch.int64))),
109
+ pad_len)
110
+ pad = g.op('Cast', pad, to_i=sym_help.cast_pytorch_to_onnx['Long'])
111
+ paddings = g.op(
112
+ 'Concat',
113
+ pad,
114
+ g.op(
115
+ 'ConstantOfShape',
116
+ extension,
117
+ value_t=torch.tensor([0], dtype=torch.int64)),
118
+ axis_i=0)
119
+ paddings = g.op('Reshape', paddings,
120
+ g.op('Constant', value_t=torch.tensor([-1, 2])))
121
+ paddings = g.op(
122
+ 'Transpose',
123
+ torch.onnx.symbolic_opset10.flip(g, paddings, [0]),
124
+ perm_i=[1, 0])
125
+ paddings = g.op('Reshape', paddings,
126
+ g.op('Constant', value_t=torch.tensor([-1])))
127
+ padding_c = g.op(
128
+ 'Cast', paddings, to_i=sym_help.cast_pytorch_to_onnx['Long'])
129
+ return padding_c
130
+
131
+
132
+ def constant_pad_nd(g, input, padding, value=None):
133
+ mode = 'constant'
134
+ value = sym_help._maybe_get_scalar(value)
135
+ value = sym_help._if_scalar_type_as(g, value, input)
136
+ pad = _prepare_onnx_paddings(g, input.type().dim(), padding)
137
+ return g.op('Pad', input, pad, value, mode_s=mode)
138
+
139
+
140
+ def reflection_pad(g, input, padding):
141
+ mode = 'reflect'
142
+ paddings = _prepare_onnx_paddings(g, input.type().dim(), padding)
143
+ return g.op('Pad', input, paddings, mode_s=mode)
144
+
145
+
146
+ reflection_pad1d = reflection_pad
147
+ reflection_pad2d = reflection_pad
148
+ reflection_pad3d = reflection_pad
149
+
150
+
151
+ def _avg_pool(name, tuple_fn):
152
+
153
+ @parse_args('v', 'is', 'is', 'is', 'i', 'i', 'none')
154
+ def symbolic_fn(g,
155
+ input,
156
+ kernel_size,
157
+ stride,
158
+ padding,
159
+ ceil_mode,
160
+ count_include_pad,
161
+ divisor_override=None):
162
+ padding = sym_help._avgpool_helper(tuple_fn, padding, kernel_size,
163
+ stride, divisor_override, name)
164
+ if not stride:
165
+ stride = kernel_size
166
+ if count_include_pad:
167
+ input = g.op(
168
+ 'Pad',
169
+ input,
170
+ g.op(
171
+ 'Constant',
172
+ value_t=torch.tensor(((0, ) * 2 + padding) * 2)),
173
+ mode_s='constant')
174
+ padding = (0, ) * len(padding)
175
+ output = g.op(
176
+ 'AveragePool',
177
+ input,
178
+ kernel_shape_i=tuple_fn(kernel_size),
179
+ strides_i=tuple_fn(stride),
180
+ pads_i=padding * 2,
181
+ ceil_mode_i=ceil_mode)
182
+ return output
183
+
184
+ return symbolic_fn
185
+
186
+
187
+ avg_pool1d = _avg_pool('avg_pool1d', _single)
188
+ avg_pool2d = _avg_pool('avg_pool2d', _pair)
189
+ avg_pool3d = _avg_pool('avg_pool3d', _triple)
190
+
191
+
192
+ def _get_im2col_indices_along_dim(g, input_d, kernel_size_d, dilation_d,
193
+ padding_d, stride_d):
194
+ # Input is always 4-D (N, C, H, W)
195
+ # Calculate indices of sliding blocks along spatial dimension
196
+ # Slide kernel over input each dim d:
197
+ # each dimension d ranges from 0 to
198
+ # input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1)
199
+ # with steps = stride
200
+
201
+ blocks_d = g.op('Add', input_d,
202
+ g.op('Constant', value_t=torch.tensor(padding_d * 2)))
203
+ blocks_d = g.op(
204
+ 'Sub', blocks_d,
205
+ g.op(
206
+ 'Constant',
207
+ value_t=torch.tensor(dilation_d * (kernel_size_d - 1))))
208
+
209
+ # Stride kernel over input and find starting indices along dim d
210
+ blocks_d_indices = g.op('Range', g.op('Constant', value_t=torch.tensor(0)),
211
+ blocks_d,
212
+ g.op('Constant', value_t=torch.tensor(stride_d)))
213
+
214
+ # Apply dilation on kernel and find its indices along dim d
215
+ kernel_grid = np.arange(0, kernel_size_d * dilation_d, dilation_d)
216
+ kernel_grid = g.op('Constant', value_t=torch.tensor([kernel_grid]))
217
+
218
+ # Broadcast and add kernel staring positions (indices) with
219
+ # kernel_grid along dim d, to get block indices along dim d
220
+ blocks_d_indices = g.op(
221
+ 'Unsqueeze', blocks_d_indices, axes_i=[0]) # Reshape to [1, -1]
222
+ kernel_mask = g.op('Reshape', kernel_grid,
223
+ g.op('Constant', value_t=torch.tensor([-1, 1])))
224
+ block_mask = g.op('Add', blocks_d_indices, kernel_mask)
225
+
226
+ return block_mask
227
+
228
+
229
+ def _get_im2col_padded_input(g, input, padding_h, padding_w):
230
+ # Input is always 4-D tensor (N, C, H, W)
231
+ # Padding tensor has the following format: (padding_h, padding_w)
232
+ # Reshape the padding to follow ONNX format:
233
+ # (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...)
234
+ pad = g.op(
235
+ 'Constant', value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2))
236
+ return g.op('Pad', input, pad)
237
+
238
+
239
+ def _get_im2col_output_shape(g, input, kernel_h, kernel_w):
240
+ batch_dim = size(g, input, g.op('Constant', value_t=torch.tensor(0)))
241
+ channel_dim = size(g, input, g.op('Constant', value_t=torch.tensor(1)))
242
+ channel_unfolded = g.op(
243
+ 'Mul', channel_dim,
244
+ g.op('Constant', value_t=torch.tensor(kernel_h * kernel_w)))
245
+
246
+ return g.op(
247
+ 'Concat',
248
+ g.op('Unsqueeze', batch_dim, axes_i=[0]),
249
+ g.op('Unsqueeze', channel_unfolded, axes_i=[0]),
250
+ g.op('Constant', value_t=torch.tensor([-1])),
251
+ axis_i=0)
252
+
253
+
254
+ def size(g, self, dim=None):
255
+ if dim is None:
256
+ return g.op('Shape', self)
257
+ return sym_help._size_helper(g, self, dim)
258
+
259
+
260
+ @parse_args('v', 'is', 'is', 'is', 'is')
261
+ def im2col(g, input, kernel_size, dilation, padding, stride):
262
+ # Input is always 4-D tensor (N, C, H, W)
263
+ # All other args are int[2]
264
+
265
+ input_h = size(g, input, g.op('Constant', value_t=torch.tensor(2)))
266
+ input_w = size(g, input, g.op('Constant', value_t=torch.tensor(3)))
267
+
268
+ stride_h, stride_w = stride[0], stride[1]
269
+ padding_h, padding_w = padding[0], padding[1]
270
+ dilation_h, dilation_w = dilation[0], dilation[1]
271
+ kernel_h, kernel_w = kernel_size[0], kernel_size[1]
272
+
273
+ blocks_row_indices = _get_im2col_indices_along_dim(g, input_h, kernel_h,
274
+ dilation_h, padding_h,
275
+ stride_h)
276
+ blocks_col_indices = _get_im2col_indices_along_dim(g, input_w, kernel_w,
277
+ dilation_w, padding_w,
278
+ stride_w)
279
+
280
+ output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w)
281
+ padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w)
282
+
283
+ output = g.op('Gather', padded_input, blocks_row_indices, axis_i=2)
284
+ output = g.op('Gather', output, blocks_col_indices, axis_i=4)
285
+ output = g.op('Transpose', output, perm_i=[0, 1, 2, 4, 3, 5])
286
+ return g.op('Reshape', output, output_shape)
287
+
288
+
289
+ @parse_args('v', 'i')
290
+ def one_hot(g, self, num_classes):
291
+ values = g.op('Constant', value_t=torch.LongTensor([0, 1]))
292
+ depth = g.op('Constant', value_t=torch.LongTensor([num_classes]))
293
+ return g.op('OneHot', self, depth, values, axis_i=-1)
294
+
295
+
296
+ @parse_args('v', 'i', 'none')
297
+ def softmax(g, input, dim, dtype=None):
298
+ input_dim = input.type().dim()
299
+ if input_dim:
300
+ # TODO: remove this as onnx opset 11 spec allows negative axes
301
+ if dim < 0:
302
+ dim = input_dim + dim
303
+ if input_dim == dim + 1:
304
+ softmax = g.op('Softmax', input, axis_i=dim)
305
+ if dtype and dtype.node().kind() != 'prim::Constant':
306
+ parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
307
+ softmax = g.op(
308
+ 'Cast',
309
+ softmax,
310
+ to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
311
+ return softmax
312
+
313
+ max_value = g.op('ReduceMax', input, axes_i=[dim], keepdims_i=1)
314
+ input = g.op('Sub', input, max_value)
315
+ exp = g.op('Exp', input)
316
+ sum = g.op('ReduceSum', exp, axes_i=[dim])
317
+ softmax = g.op('Div', exp, sum)
318
+ if dtype and dtype.node().kind() != 'prim::Constant':
319
+ parsed_dtype = sym_help._get_const(dtype, 'i', 'dtype')
320
+ softmax = g.op(
321
+ 'Cast', softmax, to_i=sym_help.scalar_type_to_onnx[parsed_dtype])
322
+ return softmax
323
+
324
+
325
+ def _adaptive_pool(name, type, tuple_fn, fn=None):
326
+
327
+ @parse_args('v', 'is')
328
+ def symbolic_fn(g, input, output_size):
329
+ if output_size == [1] * len(output_size) and type == 'AveragePool':
330
+ return g.op('GlobalAveragePool', input)
331
+ if not input.isCompleteTensor():
332
+ if output_size == [1] * len(output_size):
333
+ return g.op('GlobalMaxPool', input), None
334
+ raise NotImplementedError(
335
+ '[Adaptive pool]:input size not accessible')
336
+ dim = input.type().sizes()[2:]
337
+ if output_size == [1] * len(output_size) and type == 'MaxPool':
338
+ return g.op('GlobalMaxPool', input), None
339
+
340
+ # compute stride = floor(input_size / output_size)
341
+ s = [int(dim[i] / output_size[i]) for i in range(0, len(dim))]
342
+
343
+ # compute kernel_size = input_size - (output_size - 1) * stride
344
+ k = [dim[i] - (output_size[i] - 1) * s[i] for i in range(0, len(dim))]
345
+
346
+ # call max_poolxd_with_indices to get indices in the output
347
+ if type == 'MaxPool':
348
+ return fn(g, input, k, k, (0, ) * len(dim), (1, ) * len(dim),
349
+ False)
350
+ output = g.op(
351
+ type,
352
+ input,
353
+ kernel_shape_i=tuple_fn(k),
354
+ strides_i=tuple_fn(s),
355
+ ceil_mode_i=False)
356
+ return output
357
+
358
+ return symbolic_fn
359
+
360
+
361
+ adaptive_avg_pool1d = _adaptive_pool('adaptive_avg_pool1d', 'AveragePool',
362
+ _single)
363
+ adaptive_avg_pool2d = _adaptive_pool('adaptive_avg_pool2d', 'AveragePool',
364
+ _pair)
365
+ adaptive_avg_pool3d = _adaptive_pool('adaptive_avg_pool3d', 'AveragePool',
366
+ _triple)
367
+
368
+
369
+ def new_full(g,
370
+ self,
371
+ size,
372
+ fill_value,
373
+ dtype,
374
+ layout,
375
+ device,
376
+ pin_memory=False):
377
+ from torch.onnx.symbolic_opset9 import full
378
+ if dtype is None and self.isCompleteTensor():
379
+ dtype = self.type().scalarType()
380
+ dtype = sym_help.scalar_type_to_onnx.index(
381
+ sym_help.cast_pytorch_to_onnx[dtype])
382
+ return full(g, size, fill_value, dtype, layout, device, pin_memory)
383
+
384
+
385
+ @parse_args('v', 'v', 'i', 'i', 'i')
386
+ def grid_sampler(g,
387
+ input,
388
+ grid,
389
+ interpolation_mode,
390
+ padding_mode,
391
+ align_corners=False):
392
+ return g.op(
393
+ 'mmcv::grid_sampler',
394
+ input,
395
+ grid,
396
+ interpolation_mode_i=interpolation_mode,
397
+ padding_mode_i=padding_mode,
398
+ align_corners_i=align_corners)
399
+
400
+
401
+ @parse_args('v', 'i')
402
+ def cummax(g, input, dim):
403
+ return g.op('mmcv::cummax', input, dim_i=dim, outputs=2)
404
+
405
+
406
+ @parse_args('v', 'i')
407
+ def cummin(g, input, dim):
408
+ return g.op('mmcv::cummin', input, dim_i=dim, outputs=2)
409
+
410
+
411
+ @parse_args('v', 'v', 'is')
412
+ def roll(g, input, shifts, dims):
413
+ from packaging import version
414
+ from torch.onnx.symbolic_opset9 import squeeze
415
+ input_shape = g.op('Shape', input)
416
+
417
+ need_flatten = len(dims) == 0
418
+ # If dims is not specified, the tensor will be flattened before
419
+ # rolling and then restored to the original shape.
420
+ if need_flatten:
421
+ resize_shape = input_shape
422
+ input = g.op('Reshape', input,
423
+ g.op('Constant', value_t=torch.LongTensor([1, -1])))
424
+ input_shape = g.op('Shape', input)
425
+ dims = [1]
426
+
427
+ for index, dim in enumerate(dims):
428
+ end_size = sym_help._slice_helper(
429
+ g, input_shape, axes=[0], ends=[dim + 1], starts=[dim])
430
+ shift_size = sym_help._slice_helper(
431
+ g, shifts, axes=[0], ends=[index + 1], starts=[index])
432
+ slice_size = g.op('Sub', end_size, shift_size)
433
+
434
+ # Can not use Mod because tensorrt does not support
435
+ div_size = g.op('Div', slice_size, end_size)
436
+ slice_size = g.op('Sub', slice_size, g.op('Mul', end_size, div_size))
437
+
438
+ if version.parse(torch.__version__) >= version.parse('1.7.0'):
439
+ # add dim=0 for pytorch 1.9.0
440
+ end_size = squeeze(g, end_size, 0)
441
+ slice_size = squeeze(g, slice_size, 0)
442
+ else:
443
+ end_size = g.op('Squeeze', end_size)
444
+ slice_size = g.op('Squeeze', slice_size)
445
+ dim = torch.LongTensor([dim])
446
+
447
+ input_slice0 = sym_help._slice_helper(
448
+ g,
449
+ input,
450
+ axes=dim,
451
+ starts=torch.LongTensor([0]),
452
+ ends=slice_size,
453
+ dynamic_slice=True)
454
+ input_slice1 = sym_help._slice_helper(
455
+ g,
456
+ input,
457
+ axes=dim,
458
+ ends=end_size,
459
+ starts=slice_size,
460
+ dynamic_slice=True)
461
+
462
+ input = g.op('Concat', input_slice1, input_slice0, axis_i=dim)
463
+
464
+ if need_flatten:
465
+ input = g.op('Reshape', input, resize_shape)
466
+
467
+ return input
468
+
469
+
470
+ def register_extra_symbolics(opset=11):
471
+ # Following strings of text style are from colorama package
472
+ bright_style, reset_style = '\x1b[1m', '\x1b[0m'
473
+ red_text, blue_text = '\x1b[31m', '\x1b[34m'
474
+ white_background = '\x1b[107m'
475
+
476
+ msg = white_background + bright_style + red_text
477
+ msg += 'DeprecationWarning: This function will be deprecated in future. '
478
+ msg += blue_text + 'Welcome to use the unified model deployment toolbox '
479
+ msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'
480
+ msg += reset_style
481
+ warnings.warn(msg)
482
+
483
+ register_op('one_hot', one_hot, '', opset)
484
+ register_op('im2col', im2col, '', opset)
485
+ register_op('topk', topk, '', opset)
486
+ register_op('softmax', softmax, '', opset)
487
+ register_op('constant_pad_nd', constant_pad_nd, '', opset)
488
+ register_op('reflection_pad1d', reflection_pad1d, '', opset)
489
+ register_op('reflection_pad2d', reflection_pad2d, '', opset)
490
+ register_op('reflection_pad3d', reflection_pad3d, '', opset)
491
+ register_op('avg_pool1d', avg_pool1d, '', opset)
492
+ register_op('avg_pool2d', avg_pool2d, '', opset)
493
+ register_op('avg_pool3d', avg_pool3d, '', opset)
494
+ register_op('adaptive_avg_pool1d', adaptive_avg_pool1d, '', opset)
495
+ register_op('adaptive_avg_pool2d', adaptive_avg_pool2d, '', opset)
496
+ register_op('adaptive_avg_pool3d', adaptive_avg_pool3d, '', opset)
497
+ register_op('masked_select', masked_select, '', opset)
498
+ register_op('upsample_nearest1d', upsample_nearest1d, '', opset)
499
+ register_op('upsample_nearest2d', upsample_nearest2d, '', opset)
500
+ register_op('upsample_nearest3d', upsample_nearest3d, '', opset)
501
+ register_op('upsample_linear1d', upsample_linear1d, '', opset)
502
+ register_op('upsample_bilinear2d', upsample_bilinear2d, '', opset)
503
+ register_op('upsample_trilinear3d', upsample_trilinear3d, '', opset)
504
+ register_op('upsample_bicubic2d', upsample_bicubic2d, '', opset)
505
+ register_op('new_full', new_full, '', opset)
506
+ register_op('grid_sampler', grid_sampler, '', opset)
507
+ register_op('cummax', cummax, '', opset)
508
+ register_op('cummin', cummin, '', opset)
509
+ register_op('roll', roll, '', opset)
groundingLMM/mmcv/mmcv/parallel/__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from .collate import collate
3
+ from .data_container import DataContainer
4
+ from .data_parallel import MMDataParallel
5
+ from .distributed import MMDistributedDataParallel
6
+ from .registry import MODULE_WRAPPERS
7
+ from .scatter_gather import scatter, scatter_kwargs
8
+ from .utils import is_module_wrapper
9
+
10
+ __all__ = [
11
+ 'collate', 'DataContainer', 'MMDataParallel', 'MMDistributedDataParallel',
12
+ 'scatter', 'scatter_kwargs', 'is_module_wrapper', 'MODULE_WRAPPERS'
13
+ ]
groundingLMM/mmcv/mmcv/parallel/_functions.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import torch
3
+ from torch.nn.parallel._functions import _get_stream
4
+
5
+
6
+ def scatter(input, devices, streams=None):
7
+ """Scatters tensor across multiple GPUs."""
8
+ if streams is None:
9
+ streams = [None] * len(devices)
10
+
11
+ if isinstance(input, list):
12
+ chunk_size = (len(input) - 1) // len(devices) + 1
13
+ outputs = [
14
+ scatter(input[i], [devices[i // chunk_size]],
15
+ [streams[i // chunk_size]]) for i in range(len(input))
16
+ ]
17
+ return outputs
18
+ elif isinstance(input, torch.Tensor):
19
+ output = input.contiguous()
20
+ # TODO: copy to a pinned buffer first (if copying from CPU)
21
+ stream = streams[0] if output.numel() > 0 else None
22
+ if devices != [-1]:
23
+ with torch.cuda.device(devices[0]), torch.cuda.stream(stream):
24
+ output = output.cuda(devices[0], non_blocking=True)
25
+
26
+ return output
27
+ else:
28
+ raise Exception(f'Unknown type {type(input)}.')
29
+
30
+
31
+ def synchronize_stream(output, devices, streams):
32
+ if isinstance(output, list):
33
+ chunk_size = len(output) // len(devices)
34
+ for i in range(len(devices)):
35
+ for j in range(chunk_size):
36
+ synchronize_stream(output[i * chunk_size + j], [devices[i]],
37
+ [streams[i]])
38
+ elif isinstance(output, torch.Tensor):
39
+ if output.numel() != 0:
40
+ with torch.cuda.device(devices[0]):
41
+ main_stream = torch.cuda.current_stream()
42
+ main_stream.wait_stream(streams[0])
43
+ output.record_stream(main_stream)
44
+ else:
45
+ raise Exception(f'Unknown type {type(output)}.')
46
+
47
+
48
+ def get_input_device(input):
49
+ if isinstance(input, list):
50
+ for item in input:
51
+ input_device = get_input_device(item)
52
+ if input_device != -1:
53
+ return input_device
54
+ return -1
55
+ elif isinstance(input, torch.Tensor):
56
+ return input.get_device() if input.is_cuda else -1
57
+ else:
58
+ raise Exception(f'Unknown type {type(input)}.')
59
+
60
+
61
+ class Scatter:
62
+
63
+ @staticmethod
64
+ def forward(target_gpus, input):
65
+ input_device = get_input_device(input)
66
+ streams = None
67
+ if input_device == -1 and target_gpus != [-1]:
68
+ # Perform CPU to GPU copies in a background stream
69
+ streams = [_get_stream(device) for device in target_gpus]
70
+
71
+ outputs = scatter(input, target_gpus, streams)
72
+ # Synchronize with the copy stream
73
+ if streams is not None:
74
+ synchronize_stream(outputs, target_gpus, streams)
75
+
76
+ return tuple(outputs) if isinstance(outputs, list) else (outputs, )
groundingLMM/mmcv/mmcv/parallel/collate.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from collections.abc import Mapping, Sequence
3
+
4
+ import torch
5
+ import torch.nn.functional as F
6
+ from torch.utils.data.dataloader import default_collate
7
+
8
+ from .data_container import DataContainer
9
+
10
+
11
+ def collate(batch, samples_per_gpu=1):
12
+ """Puts each data field into a tensor/DataContainer with outer dimension
13
+ batch size.
14
+
15
+ Extend default_collate to add support for
16
+ :type:`~mmcv.parallel.DataContainer`. There are 3 cases.
17
+
18
+ 1. cpu_only = True, e.g., meta data
19
+ 2. cpu_only = False, stack = True, e.g., images tensors
20
+ 3. cpu_only = False, stack = False, e.g., gt bboxes
21
+ """
22
+
23
+ if not isinstance(batch, Sequence):
24
+ raise TypeError(f'{batch.dtype} is not supported.')
25
+
26
+ if isinstance(batch[0], DataContainer):
27
+ stacked = []
28
+ if batch[0].cpu_only:
29
+ for i in range(0, len(batch), samples_per_gpu):
30
+ stacked.append(
31
+ [sample.data for sample in batch[i:i + samples_per_gpu]])
32
+ return DataContainer(
33
+ stacked, batch[0].stack, batch[0].padding_value, cpu_only=True)
34
+ elif batch[0].stack:
35
+ for i in range(0, len(batch), samples_per_gpu):
36
+ assert isinstance(batch[i].data, torch.Tensor)
37
+
38
+ if batch[i].pad_dims is not None:
39
+ ndim = batch[i].dim()
40
+ assert ndim > batch[i].pad_dims
41
+ max_shape = [0 for _ in range(batch[i].pad_dims)]
42
+ for dim in range(1, batch[i].pad_dims + 1):
43
+ max_shape[dim - 1] = batch[i].size(-dim)
44
+ for sample in batch[i:i + samples_per_gpu]:
45
+ for dim in range(0, ndim - batch[i].pad_dims):
46
+ assert batch[i].size(dim) == sample.size(dim)
47
+ for dim in range(1, batch[i].pad_dims + 1):
48
+ max_shape[dim - 1] = max(max_shape[dim - 1],
49
+ sample.size(-dim))
50
+ padded_samples = []
51
+ for sample in batch[i:i + samples_per_gpu]:
52
+ pad = [0 for _ in range(batch[i].pad_dims * 2)]
53
+ for dim in range(1, batch[i].pad_dims + 1):
54
+ pad[2 * dim -
55
+ 1] = max_shape[dim - 1] - sample.size(-dim)
56
+ padded_samples.append(
57
+ F.pad(
58
+ sample.data, pad, value=sample.padding_value))
59
+ stacked.append(default_collate(padded_samples))
60
+ elif batch[i].pad_dims is None:
61
+ stacked.append(
62
+ default_collate([
63
+ sample.data
64
+ for sample in batch[i:i + samples_per_gpu]
65
+ ]))
66
+ else:
67
+ raise ValueError(
68
+ 'pad_dims should be either None or integers (1-3)')
69
+
70
+ else:
71
+ for i in range(0, len(batch), samples_per_gpu):
72
+ stacked.append(
73
+ [sample.data for sample in batch[i:i + samples_per_gpu]])
74
+ return DataContainer(stacked, batch[0].stack, batch[0].padding_value)
75
+ elif isinstance(batch[0], Sequence):
76
+ transposed = zip(*batch)
77
+ return [collate(samples, samples_per_gpu) for samples in transposed]
78
+ elif isinstance(batch[0], Mapping):
79
+ return {
80
+ key: collate([d[key] for d in batch], samples_per_gpu)
81
+ for key in batch[0]
82
+ }
83
+ else:
84
+ return default_collate(batch)
groundingLMM/mmcv/mmcv/parallel/data_container.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import functools
3
+
4
+ import torch
5
+
6
+
7
+ def assert_tensor_type(func):
8
+
9
+ @functools.wraps(func)
10
+ def wrapper(*args, **kwargs):
11
+ if not isinstance(args[0].data, torch.Tensor):
12
+ raise AttributeError(
13
+ f'{args[0].__class__.__name__} has no attribute '
14
+ f'{func.__name__} for type {args[0].datatype}')
15
+ return func(*args, **kwargs)
16
+
17
+ return wrapper
18
+
19
+
20
+ class DataContainer:
21
+ """A container for any type of objects.
22
+
23
+ Typically tensors will be stacked in the collate function and sliced along
24
+ some dimension in the scatter function. This behavior has some limitations.
25
+ 1. All tensors have to be the same size.
26
+ 2. Types are limited (numpy array or Tensor).
27
+
28
+ We design `DataContainer` and `MMDataParallel` to overcome these
29
+ limitations. The behavior can be either of the following.
30
+
31
+ - copy to GPU, pad all tensors to the same size and stack them
32
+ - copy to GPU without stacking
33
+ - leave the objects as is and pass it to the model
34
+ - pad_dims specifies the number of last few dimensions to do padding
35
+ """
36
+
37
+ def __init__(self,
38
+ data,
39
+ stack=False,
40
+ padding_value=0,
41
+ cpu_only=False,
42
+ pad_dims=2):
43
+ self._data = data
44
+ self._cpu_only = cpu_only
45
+ self._stack = stack
46
+ self._padding_value = padding_value
47
+ assert pad_dims in [None, 1, 2, 3]
48
+ self._pad_dims = pad_dims
49
+
50
+ def __repr__(self):
51
+ return f'{self.__class__.__name__}({repr(self.data)})'
52
+
53
+ def __len__(self):
54
+ return len(self._data)
55
+
56
+ @property
57
+ def data(self):
58
+ return self._data
59
+
60
+ @property
61
+ def datatype(self):
62
+ if isinstance(self.data, torch.Tensor):
63
+ return self.data.type()
64
+ else:
65
+ return type(self.data)
66
+
67
+ @property
68
+ def cpu_only(self):
69
+ return self._cpu_only
70
+
71
+ @property
72
+ def stack(self):
73
+ return self._stack
74
+
75
+ @property
76
+ def padding_value(self):
77
+ return self._padding_value
78
+
79
+ @property
80
+ def pad_dims(self):
81
+ return self._pad_dims
82
+
83
+ @assert_tensor_type
84
+ def size(self, *args, **kwargs):
85
+ return self.data.size(*args, **kwargs)
86
+
87
+ @assert_tensor_type
88
+ def dim(self):
89
+ return self.data.dim()
groundingLMM/mmcv/mmcv/parallel/data_parallel.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ from itertools import chain
3
+
4
+ from torch.nn.parallel import DataParallel
5
+
6
+ from .scatter_gather import scatter_kwargs
7
+
8
+
9
+ class MMDataParallel(DataParallel):
10
+ """The DataParallel module that supports DataContainer.
11
+
12
+ MMDataParallel has two main differences with PyTorch DataParallel:
13
+
14
+ - It supports a custom type :class:`DataContainer` which allows more
15
+ flexible control of input data during both GPU and CPU inference.
16
+ - It implement two more APIs ``train_step()`` and ``val_step()``.
17
+
18
+ .. warning::
19
+ MMDataParallel only supports single GPU training, if you need to
20
+ train with multiple GPUs, please use MMDistributedDataParallel
21
+ instead. If you have multiple GPUs and you just want to use
22
+ MMDataParallel, you can set the environment variable
23
+ ``CUDA_VISIBLE_DEVICES=0`` or instantiate ``MMDataParallel`` with
24
+ ``device_ids=[0]``.
25
+
26
+ Args:
27
+ module (:class:`nn.Module`): Module to be encapsulated.
28
+ device_ids (list[int]): Device IDS of modules to be scattered to.
29
+ Defaults to None when GPU is not available.
30
+ output_device (str | int): Device ID for output. Defaults to None.
31
+ dim (int): Dimension used to scatter the data. Defaults to 0.
32
+ """
33
+
34
+ def __init__(self, *args, dim=0, **kwargs):
35
+ super(MMDataParallel, self).__init__(*args, dim=dim, **kwargs)
36
+ self.dim = dim
37
+
38
+ def forward(self, *inputs, **kwargs):
39
+ """Override the original forward function.
40
+
41
+ The main difference lies in the CPU inference where the data in
42
+ :class:`DataContainers` will still be gathered.
43
+ """
44
+ if not self.device_ids:
45
+ # We add the following line thus the module could gather and
46
+ # convert data containers as those in GPU inference
47
+ inputs, kwargs = self.scatter(inputs, kwargs, [-1])
48
+ return self.module(*inputs[0], **kwargs[0])
49
+ else:
50
+ return super().forward(*inputs, **kwargs)
51
+
52
+ def scatter(self, inputs, kwargs, device_ids):
53
+ return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
54
+
55
+ def train_step(self, *inputs, **kwargs):
56
+ if not self.device_ids:
57
+ # We add the following line thus the module could gather and
58
+ # convert data containers as those in GPU inference
59
+ inputs, kwargs = self.scatter(inputs, kwargs, [-1])
60
+ return self.module.train_step(*inputs[0], **kwargs[0])
61
+
62
+ assert len(self.device_ids) == 1, \
63
+ ('MMDataParallel only supports single GPU training, if you need to'
64
+ ' train with multiple GPUs, please use MMDistributedDataParallel'
65
+ ' instead.')
66
+
67
+ for t in chain(self.module.parameters(), self.module.buffers()):
68
+ if t.device != self.src_device_obj:
69
+ raise RuntimeError(
70
+ 'module must have its parameters and buffers '
71
+ f'on device {self.src_device_obj} (device_ids[0]) but '
72
+ f'found one of them on device: {t.device}')
73
+
74
+ inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
75
+ return self.module.train_step(*inputs[0], **kwargs[0])
76
+
77
+ def val_step(self, *inputs, **kwargs):
78
+ if not self.device_ids:
79
+ # We add the following line thus the module could gather and
80
+ # convert data containers as those in GPU inference
81
+ inputs, kwargs = self.scatter(inputs, kwargs, [-1])
82
+ return self.module.val_step(*inputs[0], **kwargs[0])
83
+
84
+ assert len(self.device_ids) == 1, \
85
+ ('MMDataParallel only supports single GPU training, if you need to'
86
+ ' train with multiple GPUs, please use MMDistributedDataParallel'
87
+ ' instead.')
88
+
89
+ for t in chain(self.module.parameters(), self.module.buffers()):
90
+ if t.device != self.src_device_obj:
91
+ raise RuntimeError(
92
+ 'module must have its parameters and buffers '
93
+ f'on device {self.src_device_obj} (device_ids[0]) but '
94
+ f'found one of them on device: {t.device}')
95
+
96
+ inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
97
+ return self.module.val_step(*inputs[0], **kwargs[0])
groundingLMM/mmcv/mmcv/parallel/distributed.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import torch
3
+ from torch.nn.parallel.distributed import (DistributedDataParallel,
4
+ _find_tensors)
5
+
6
+ from mmcv import print_log
7
+ from mmcv.utils import TORCH_VERSION, digit_version
8
+ from .scatter_gather import scatter_kwargs
9
+
10
+
11
+ class MMDistributedDataParallel(DistributedDataParallel):
12
+ """The DDP module that supports DataContainer.
13
+
14
+ MMDDP has two main differences with PyTorch DDP:
15
+
16
+ - It supports a custom type :class:`DataContainer` which allows more
17
+ flexible control of input data.
18
+ - It implement two APIs ``train_step()`` and ``val_step()``.
19
+ """
20
+
21
+ def to_kwargs(self, inputs, kwargs, device_id):
22
+ # Use `self.to_kwargs` instead of `self.scatter` in pytorch1.8
23
+ # to move all tensors to device_id
24
+ return scatter_kwargs(inputs, kwargs, [device_id], dim=self.dim)
25
+
26
+ def scatter(self, inputs, kwargs, device_ids):
27
+ return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
28
+
29
+ def train_step(self, *inputs, **kwargs):
30
+ """train_step() API for module wrapped by DistributedDataParallel.
31
+
32
+ This method is basically the same as
33
+ ``DistributedDataParallel.forward()``, while replacing
34
+ ``self.module.forward()`` with ``self.module.train_step()``.
35
+ It is compatible with PyTorch 1.1 - 1.5.
36
+ """
37
+
38
+ # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
39
+ # end of backward to the beginning of forward.
40
+ if ('parrots' not in TORCH_VERSION
41
+ and digit_version(TORCH_VERSION) >= digit_version('1.7')
42
+ and self.reducer._rebuild_buckets()):
43
+ print_log(
44
+ 'Reducer buckets have been rebuilt in this iteration.',
45
+ logger='mmcv')
46
+
47
+ if getattr(self, 'require_forward_param_sync', True):
48
+ self._sync_params()
49
+ if self.device_ids:
50
+ inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
51
+ if len(self.device_ids) == 1:
52
+ output = self.module.train_step(*inputs[0], **kwargs[0])
53
+ else:
54
+ outputs = self.parallel_apply(
55
+ self._module_copies[:len(inputs)], inputs, kwargs)
56
+ output = self.gather(outputs, self.output_device)
57
+ else:
58
+ output = self.module.train_step(*inputs, **kwargs)
59
+
60
+ if torch.is_grad_enabled() and getattr(
61
+ self, 'require_backward_grad_sync', True):
62
+ if self.find_unused_parameters:
63
+ self.reducer.prepare_for_backward(list(_find_tensors(output)))
64
+ else:
65
+ self.reducer.prepare_for_backward([])
66
+ else:
67
+ if ('parrots' not in TORCH_VERSION
68
+ and digit_version(TORCH_VERSION) > digit_version('1.2')):
69
+ self.require_forward_param_sync = False
70
+ return output
71
+
72
+ def val_step(self, *inputs, **kwargs):
73
+ """val_step() API for module wrapped by DistributedDataParallel.
74
+
75
+ This method is basically the same as
76
+ ``DistributedDataParallel.forward()``, while replacing
77
+ ``self.module.forward()`` with ``self.module.val_step()``.
78
+ It is compatible with PyTorch 1.1 - 1.5.
79
+ """
80
+ # In PyTorch >= 1.7, ``reducer._rebuild_buckets()`` is moved from the
81
+ # end of backward to the beginning of forward.
82
+ if ('parrots' not in TORCH_VERSION
83
+ and digit_version(TORCH_VERSION) >= digit_version('1.7')
84
+ and self.reducer._rebuild_buckets()):
85
+ print_log(
86
+ 'Reducer buckets have been rebuilt in this iteration.',
87
+ logger='mmcv')
88
+
89
+ if getattr(self, 'require_forward_param_sync', True):
90
+ self._sync_params()
91
+ if self.device_ids:
92
+ inputs, kwargs = self.scatter(inputs, kwargs, self.device_ids)
93
+ if len(self.device_ids) == 1:
94
+ output = self.module.val_step(*inputs[0], **kwargs[0])
95
+ else:
96
+ outputs = self.parallel_apply(
97
+ self._module_copies[:len(inputs)], inputs, kwargs)
98
+ output = self.gather(outputs, self.output_device)
99
+ else:
100
+ output = self.module.val_step(*inputs, **kwargs)
101
+
102
+ if torch.is_grad_enabled() and getattr(
103
+ self, 'require_backward_grad_sync', True):
104
+ if self.find_unused_parameters:
105
+ self.reducer.prepare_for_backward(list(_find_tensors(output)))
106
+ else:
107
+ self.reducer.prepare_for_backward([])
108
+ else:
109
+ if ('parrots' not in TORCH_VERSION
110
+ and digit_version(TORCH_VERSION) > digit_version('1.2')):
111
+ self.require_forward_param_sync = False
112
+ return output
groundingLMM/mmcv/mmcv/parallel/distributed_deprecated.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) OpenMMLab. All rights reserved.
2
+ import torch
3
+ import torch.distributed as dist
4
+ import torch.nn as nn
5
+ from torch._utils import (_flatten_dense_tensors, _take_tensors,
6
+ _unflatten_dense_tensors)
7
+
8
+ from mmcv.utils import TORCH_VERSION, digit_version
9
+ from .registry import MODULE_WRAPPERS
10
+ from .scatter_gather import scatter_kwargs
11
+
12
+
13
+ @MODULE_WRAPPERS.register_module()
14
+ class MMDistributedDataParallel(nn.Module):
15
+
16
+ def __init__(self,
17
+ module,
18
+ dim=0,
19
+ broadcast_buffers=True,
20
+ bucket_cap_mb=25):
21
+ super(MMDistributedDataParallel, self).__init__()
22
+ self.module = module
23
+ self.dim = dim
24
+ self.broadcast_buffers = broadcast_buffers
25
+
26
+ self.broadcast_bucket_size = bucket_cap_mb * 1024 * 1024
27
+ self._sync_params()
28
+
29
+ def _dist_broadcast_coalesced(self, tensors, buffer_size):
30
+ for tensors in _take_tensors(tensors, buffer_size):
31
+ flat_tensors = _flatten_dense_tensors(tensors)
32
+ dist.broadcast(flat_tensors, 0)
33
+ for tensor, synced in zip(
34
+ tensors, _unflatten_dense_tensors(flat_tensors, tensors)):
35
+ tensor.copy_(synced)
36
+
37
+ def _sync_params(self):
38
+ module_states = list(self.module.state_dict().values())
39
+ if len(module_states) > 0:
40
+ self._dist_broadcast_coalesced(module_states,
41
+ self.broadcast_bucket_size)
42
+ if self.broadcast_buffers:
43
+ if (TORCH_VERSION != 'parrots'
44
+ and digit_version(TORCH_VERSION) < digit_version('1.0')):
45
+ buffers = [b.data for b in self.module._all_buffers()]
46
+ else:
47
+ buffers = [b.data for b in self.module.buffers()]
48
+ if len(buffers) > 0:
49
+ self._dist_broadcast_coalesced(buffers,
50
+ self.broadcast_bucket_size)
51
+
52
+ def scatter(self, inputs, kwargs, device_ids):
53
+ return scatter_kwargs(inputs, kwargs, device_ids, dim=self.dim)
54
+
55
+ def forward(self, *inputs, **kwargs):
56
+ inputs, kwargs = self.scatter(inputs, kwargs,
57
+ [torch.cuda.current_device()])
58
+ return self.module(*inputs[0], **kwargs[0])
59
+
60
+ def train_step(self, *inputs, **kwargs):
61
+ inputs, kwargs = self.scatter(inputs, kwargs,
62
+ [torch.cuda.current_device()])
63
+ output = self.module.train_step(*inputs[0], **kwargs[0])
64
+ return output
65
+
66
+ def val_step(self, *inputs, **kwargs):
67
+ inputs, kwargs = self.scatter(inputs, kwargs,
68
+ [torch.cuda.current_device()])
69
+ output = self.module.val_step(*inputs[0], **kwargs[0])
70
+ return output