File size: 7,429 Bytes

032e687

from mmdet.datasets import RefCocoDataset

from mmdet.datasets.transforms import LoadAnnotations
from mmdet.evaluation import RefSegMetric
import argparse
from mmengine.config import Config
from xtuner.model.utils import guess_load_checkpoint
from xtuner.registry import BUILDER
from xtuner.utils.constants import DEFAULT_IMAGE_TOKEN
from accelerate import Accelerator
from accelerate.utils import gather_object
from mmdet.structures.mask import BitmapMasks

from tqdm import tqdm
import torch
import torch.nn.functional as F
from time import time

from projects.f_llm.datasets.transforms import PILLoadImageFromFile, RefCOCO2PNG

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter)
    parser.add_argument('config', help='config file path.')
    parser.add_argument('--checkpoint', default=None, type=str)
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--ceph', action='store_true')
    parser.add_argument('--concat', action='store_true')
    args = parser.parse_args()

    # Initialize accelerator
    accelerator = Accelerator()
    # each GPU creates a string
    message = [f"Hello this is GPU {accelerator.process_index}"]
    # collect the messages from all GPUs
    messages = gather_object(message)
    # output the messages only on the main process with accelerator.print()
    accelerator.print(messages)

    cfg = Config.fromfile(args.config)
    prompt_template = cfg.prompt_template
    tokenizer = cfg.tokenizer
    image_processor = cfg.image_processor
    prompt = cfg.get('prompt', None)
    model = BUILDER.build(cfg.model)

    if args.checkpoint is not None:
        state_dict = guess_load_checkpoint(args.checkpoint)
        missing, unexpected = model.load_state_dict(state_dict, strict=False)
        accelerator.print(f"Unexpected parameters: {unexpected}")

    print(f"Start moving model to device: {accelerator.device}", flush=True)
    tik = time()
    model = model.to(device=accelerator.device)
    print(
        f"Finished moving model to device: {accelerator.device}, time used: {time() - tik}", flush=True)
    model.eval()

    if args.ceph:
        backend_args = dict(
            backend='petrel',
            path_mapping=dict({
                'data/coco/train2014/': 'openmmlab:s3://openmmlab/datasets/detection/coco/train2014/'
            }))
    else:
        backend_args = None

    refcoco2png_params = dict(
        type=RefCOCO2PNG,
        image_processor=image_processor,
        tokenizer=tokenizer,
        prompt_template=prompt_template,
        concat=args.concat,
        image2tensor=cfg.get('image2tensor', True),
        add_image_token=cfg.get('add_image_token', False),
        image_token=cfg.get('image_token', DEFAULT_IMAGE_TOKEN)
    )
    accelerator.print(f"Do concatenation? {args.concat}")
    if prompt is not None:
        refcoco2png_params.update(prompt=prompt)

    # ref_coco data pipeline
    test_pipeline = [
        dict(type=PILLoadImageFromFile, backend_args=backend_args),
        dict(
            type=LoadAnnotations,
            with_mask=True,
            with_bbox=False,
            with_seg=False,
            with_label=False),
        refcoco2png_params
    ]

    refcoco_subsets = dict()
    for split in ['val', 'testA', 'testB']:
        refcoco_subsets[f'refcoco_{split}'] = dict(
            ann_file='refcoco/instances.json',
            split_file='refcoco/refs(unc).p',
            split=split)

    for split in ['val', 'testA', 'testB']:
        refcoco_subsets[f'refcoco+_{split}'] = dict(
            ann_file='refcoco+/instances.json',
            split_file='refcoco+/refs(unc).p',
            split=split)

    for split in ['val', 'test']:
        refcoco_subsets[f'refcocog_{split}'] = dict(
            ann_file='refcocog/instances.json',
            split_file='refcocog/refs(umd).p',
            split=split)

    for name, subset in refcoco_subsets.items():
        accelerator.print(f"Start evaluating {name}")
        dataset = RefCocoDataset(
            data_root='data/coco/',
            data_prefix=dict(img_path='train2014/'),
            text_mode='select_first',
            pipeline=test_pipeline,
            **subset
        )
        # sync GPUs and start the timer
        accelerator.wait_for_everyone()

        data_ids = list(range(len(dataset)))
        if args.debug:
            data_ids = data_ids[:100]

        results = []
        # divide the prompt list onto the available GPUs
        with accelerator.split_between_processes(data_ids) as sub_ids:
            for idx in tqdm(sub_ids, disable=not accelerator.is_main_process):
                data_sample = dataset[idx]
                if args.concat:
                    with torch.no_grad():
                        pred_mask_logits = model.predict(data_sample)

                    gt_masks = data_sample['gt_masks'].numpy() > 0
                    pred_masks = F.interpolate(pred_mask_logits[None].float().sigmoid(),
                                               size=gt_masks.shape[-2:], mode='bilinear')[0].cpu()
                    pred_masks = pred_masks > 0.5

                    assert len(pred_masks) == len(gt_masks)
                    mask_cnt = pred_masks.shape[0]

                    # Formulate the output into the format that the evaluator accepts
                    results.append(dict(pred_instances=dict(masks=pred_masks),
                                        gt_masks=BitmapMasks(masks=gt_masks,
                                                             height=gt_masks.shape[1],
                                                             width=gt_masks.shape[2]))
                                   )
                else:
                    for sub_data_sample in data_sample:
                        with torch.no_grad():
                            pred_mask_logits = model.predict(sub_data_sample)

                        gt_masks = sub_data_sample['gt_masks'].numpy() > 0
                        pred_masks = F.interpolate(pred_mask_logits[None].float().sigmoid(),
                                                   size=gt_masks.shape[-2:], mode='bilinear')[0].cpu()
                        pred_masks = pred_masks > 0.5

                        assert len(pred_masks) == len(gt_masks)
                        mask_cnt = pred_masks.shape[0]
                        assert mask_cnt == 1

                        # Formulate the output into the format that the evaluator accepts
                        results.append(dict(pred_instances=dict(masks=pred_masks),
                                            gt_masks=BitmapMasks(masks=gt_masks,
                                                                 height=gt_masks.shape[1],
                                                                 width=gt_masks.shape[2]))
                                       )
            results = gather_object(results)
        if accelerator.is_main_process:
            accelerator.print(
                f"Collected {len(results)} result samples from all gpus")
            evaluator = RefSegMetric(metric=['cIoU', 'mIoU'])
            evaluator.process(data_batch=dict(), data_samples=results)
            metrics = evaluator.compute_metrics(evaluator.results)
            accelerator.print(f"Evaluation results on {name}: {metrics}")
        accelerator.print(f"Finished evaluating {name}")