|
|
import argparse
|
|
|
import torch
|
|
|
import os
|
|
|
import json
|
|
|
from tqdm import tqdm
|
|
|
import shortuuid
|
|
|
from pycocotools import mask
|
|
|
import numpy as np
|
|
|
import cv2
|
|
|
from psalm.constants import IGNORE_INDEX, IMAGE_TOKEN_INDEX, DEFAULT_IMAGE_TOKEN, DEFAULT_IM_START_TOKEN, \
|
|
|
DEFAULT_IM_END_TOKEN, DEFAULT_SEG_TOKEN, SEG_TOKEN_INDEX, CLS_TOKEN_INDEX
|
|
|
from psalm.conversation import conv_templates, SeparatorStyle
|
|
|
from psalm.model.builder import load_pretrained_model
|
|
|
from psalm.utils import disable_torch_init
|
|
|
from psalm.mm_utils import tokenizer_image_token, get_model_name_from_path, KeywordsStoppingCriteria
|
|
|
from psalm.eval.segmentation_evaluation.instance_evaluation import InstanceSegEvaluator, my_coco_evaluator
|
|
|
from psalm.eval.segmentation_evaluation.panoptic_evaluation import my_coco_panoptic_evaluator, my_SemSegEvaluator
|
|
|
from transformers import StoppingCriteria, StoppingCriteriaList
|
|
|
|
|
|
from torch.utils.data import Dataset, DataLoader
|
|
|
|
|
|
from psalm import conversation as conversation_lib
|
|
|
from psalm.model.datasets_mapper.coco_instance_mapper import COCOInstanceNewBaselineDatasetMapperForEval
|
|
|
|
|
|
from PIL import Image
|
|
|
import math
|
|
|
import copy
|
|
|
from detectron2.structures import BoxMode
|
|
|
from detectron2.evaluation import inference_on_dataset, COCOEvaluator
|
|
|
from detectron2.data import MetadataCatalog, DatasetCatalog
|
|
|
|
|
|
from typing import Dict, Optional, Sequence, List
|
|
|
from dataclasses import dataclass, field
|
|
|
from psalm.train.train_datasets import DataCollatorForCOCODatasetV2, COCO_panoptic_dataset
|
|
|
|
|
|
import transformers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
class DataArguments:
|
|
|
data_path: str = field(default=None,
|
|
|
metadata={"help": "Path to the training data."})
|
|
|
lazy_preprocess: bool = False
|
|
|
is_multimodal: bool = False
|
|
|
image_folder: Optional[str] = field(default='/path/to/val2017')
|
|
|
model_path: Optional[str] = field(default="/path/to/model")
|
|
|
mask_config: Optional[str] = field(default="./psalm/mask_config/maskformer2_swin_base_384_bs16_50ep.yaml")
|
|
|
image_aspect_ratio: str = 'square'
|
|
|
image_grid_pinpoints: Optional[str] = field(default=None)
|
|
|
json_path: str = '/path/to/coco'
|
|
|
model_map_name: str = 'psalm'
|
|
|
version: str = 'llava_phi'
|
|
|
output_dir: str = './output/panoptic_segmentation'
|
|
|
segmentation: bool = True
|
|
|
eval_batch_size: int = 1
|
|
|
dataloader_num_workers: int = 4
|
|
|
seg_task: Optional[str] = field(default="panoptic")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class StoppingCriteriaSub(StoppingCriteria):
|
|
|
def __init__(self, stops=[], encounters=1):
|
|
|
super().__init__()
|
|
|
self.stops = stops
|
|
|
|
|
|
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor):
|
|
|
last_token = input_ids[0][-1]
|
|
|
for stop in self.stops:
|
|
|
if stop == last_token:
|
|
|
return True
|
|
|
return False
|
|
|
|
|
|
|
|
|
def split_list(lst, n):
|
|
|
"""Split a list into n (roughly) equal-sized chunks"""
|
|
|
chunk_size = math.ceil(len(lst) / n)
|
|
|
return [lst[i:i + chunk_size] for i in range(0, len(lst), chunk_size)]
|
|
|
|
|
|
|
|
|
def get_chunk(lst, n, k):
|
|
|
chunks = split_list(lst, n)
|
|
|
return chunks[k]
|
|
|
|
|
|
|
|
|
def evaluation():
|
|
|
parser = transformers.HfArgumentParser(DataArguments)
|
|
|
data_args = parser.parse_args_into_dataclasses()[0]
|
|
|
disable_torch_init()
|
|
|
model_path = os.path.expanduser(data_args.model_path)
|
|
|
model_name = get_model_name_from_path(model_path)
|
|
|
tokenizer, model, image_processor, context_len = load_pretrained_model(model_path, None, model_name,mask_config=data_args.mask_config,model_args=data_args)
|
|
|
data_args.image_processor = image_processor
|
|
|
data_args.is_multimodal = True
|
|
|
|
|
|
conversation_lib.default_conversation = conversation_lib.conv_templates[data_args.version]
|
|
|
eval_dataset = COCO_panoptic_dataset(json_path=data_args.json_path, tokenizer=tokenizer,
|
|
|
data_args=data_args, is_train=False)
|
|
|
|
|
|
data_collator = DataCollatorForCOCODatasetV2(tokenizer=tokenizer)
|
|
|
dataloader_params = {
|
|
|
"batch_size": data_args.eval_batch_size,
|
|
|
"num_workers": data_args.dataloader_num_workers,
|
|
|
}
|
|
|
eval_dataloader = DataLoader(eval_dataset, batch_size=dataloader_params['batch_size'], collate_fn=data_collator,
|
|
|
num_workers=dataloader_params['num_workers'])
|
|
|
|
|
|
def load_instruction_dataset():
|
|
|
return eval_dataset
|
|
|
|
|
|
DatasetCatalog.register('instruction_dataset', load_instruction_dataset)
|
|
|
MetadataCatalog.get('instruction_dataset').set(panoptic_json=eval_dataset.panoptic_json_path,
|
|
|
panoptic_root=eval_dataset.panoptic_gt_path)
|
|
|
|
|
|
evaluator = my_coco_panoptic_evaluator('instruction_dataset',
|
|
|
output_dir=data_args.output_dir, dataset_id_to_cont_id=eval_dataset.coco_id_to_cont_id, is_thing_list=eval_dataset.coco_is_thing)
|
|
|
sem_evaluator = my_SemSegEvaluator('instruction_dataset',
|
|
|
output_dir=data_args.output_dir, dataset_id_to_cont_id=eval_dataset.coco_id_to_cont_id, class_name=eval_dataset.coco_class_name[:-1],ignore_label=255)
|
|
|
evaluator.reset()
|
|
|
sem_evaluator.reset()
|
|
|
|
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
|
|
model.to(dtype=torch.float32, device=device).eval()
|
|
|
with torch.no_grad():
|
|
|
for idx, inputs in tqdm(enumerate(eval_dataloader), total=len(eval_dataloader)):
|
|
|
inputs = {k: v.to(device) if torch.is_tensor(v) else v for k, v in inputs.items()}
|
|
|
outputs = model.eval_seg(
|
|
|
input_ids=inputs['input_ids'],
|
|
|
attention_mask=inputs['attention_mask'],
|
|
|
images=inputs['images'].float(),
|
|
|
seg_info=inputs['seg_info'],
|
|
|
class_name_embedding_indices=inputs['class_name_embedding_indices'],
|
|
|
class_name_ids=inputs['class_name_ids'],
|
|
|
cls_indices=inputs['cls_indices'],
|
|
|
labels=inputs['labels'],
|
|
|
is_thing_list=eval_dataset.coco_is_thing
|
|
|
)
|
|
|
if torch.cuda.is_available():
|
|
|
torch.cuda.synchronize()
|
|
|
evaluator.process(inputs['seg_info'], outputs)
|
|
|
sem_evaluator.process(inputs['seg_info'], outputs)
|
|
|
|
|
|
results = evaluator.evaluate()
|
|
|
sem_results = sem_evaluator.evaluate()
|
|
|
print(results)
|
|
|
print(sem_results)
|
|
|
|
|
|
if results is None:
|
|
|
results = {}
|
|
|
return results
|
|
|
|
|
|
|
|
|
def intersectionAndUnionGPU(output, target, K, ignore_index=255):
|
|
|
|
|
|
assert output.dim() in [1, 2, 3]
|
|
|
assert output.shape == target.shape
|
|
|
output = output.view(-1)
|
|
|
target = target.view(-1)
|
|
|
output[target == ignore_index] = ignore_index
|
|
|
intersection = output[output == target]
|
|
|
area_intersection = torch.histc(intersection, bins=K, min=0, max=K - 1)
|
|
|
area_output = torch.histc(output, bins=K, min=0, max=K - 1)
|
|
|
area_target = torch.histc(target, bins=K, min=0, max=K - 1)
|
|
|
area_union = area_output + area_target - area_intersection
|
|
|
return area_intersection, area_union, area_target
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
evaluation()
|
|
|
|
|
|
|