| | import os |
| |
|
| | import pandas as pd |
| | from mmengine.dist import master_only |
| | from PIL import Image |
| |
|
| | from xtuner.registry import BUILDER |
| | from mmengine.logging import print_log |
| | from .base_eval_dataset import BaseEvalDataset |
| |
|
| | from .utils import YOrN_Extraction, load_jsonl |
| | from .utils import custom_data_process |
| |
|
| |
|
| | def eval_func(pred_list, label_list): |
| | pos = 1 |
| | neg = 0 |
| | yes_ratio = pred_list.count(1) / len(pred_list) |
| |
|
| | TP, TN, FP, FN = 0, 0, 0, 0 |
| | for pred, label in zip(pred_list, label_list): |
| | if pred == pos and label == pos: |
| | TP += 1 |
| | elif pred == pos and label == neg: |
| | FP += 1 |
| | elif pred == neg and label == neg: |
| | TN += 1 |
| | elif pred == neg and label == pos: |
| | FN += 1 |
| |
|
| | print_log('TP\tFP\tTN\tFN\t', 'current') |
| | print_log(f'{TP}\t{FP}\t{TN}\t{FN}', 'current') |
| |
|
| | precision = float(TP) / float(TP + FP) |
| | recall = float(TP) / float(TP + FN) |
| | f1 = 2 * precision * recall / (precision + recall) |
| | acc = (TP + TN) / (TP + TN + FP + FN) |
| | print_log(f'Accuracy: {acc}', 'current') |
| | print_log(f'Precision: {precision}', 'current') |
| | print_log(f'Recall: {recall}', 'current') |
| | print_log(f'F1 score: {f1}', 'current') |
| | print_log(f'Yes ratio: {yes_ratio}', 'current') |
| | return f1 |
| |
|
| |
|
| | class POPEDataset(BaseEvalDataset): |
| | METAINFO: dict = dict(name='pope') |
| |
|
| | def __init__(self, data_file, coco_val_path, image_processor, |
| | pad_image_to_square=True, |
| | metainfo=None): |
| | super().__init__(metainfo) |
| | if isinstance(data_file, str): |
| | data_file = [data_file] |
| | self.raw_data = [load_jsonl(f) for f in data_file] |
| |
|
| | self.name = [ |
| | os.path.splitext(os.path.basename(f))[0] for f in data_file |
| | ] |
| |
|
| | self.coco_val_path = coco_val_path |
| | self.image_processor = BUILDER.build(image_processor) |
| | self.pad_image_to_square = pad_image_to_square |
| |
|
| | self.results_xlsx_path = 'pope-results.xlsx' |
| | self.data = self.load_data_list() |
| |
|
| | def get_image(self, image): |
| | image = Image.open(os.path.join(self.coco_val_path, image)) |
| | return image |
| |
|
| | def __len__(self): |
| | return len(self.data) |
| |
|
| | def load_data_list(self): |
| | data_list = [] |
| | idx = 0 |
| | for data_idx in range(len(self.raw_data)): |
| | for sample_idx in range(len(self.raw_data[data_idx])): |
| | sample = self.raw_data[data_idx][sample_idx] |
| | index = sample['question_id'] |
| | image_path = sample['image'] |
| | question = sample['text'] |
| | answer = sample['label'] |
| | category = self.name[data_idx] |
| | assert answer in ['yes', 'no'] |
| | data = { |
| | 'img_id': idx, |
| | 'index': index, |
| | 'img': image_path, |
| | 'question': question, |
| | 'answer': answer, |
| | 'category': category |
| | } |
| | data_list.append(data) |
| | idx += 1 |
| | return data_list |
| |
|
| | def __getitem__(self, idx): |
| | data = self.data[idx] |
| | data_dict = custom_data_process(self, data) |
| | return data_dict |
| |
|
| | @master_only |
| | def evaluate(self, result, work_dir, show=True): |
| | orig_index = [x['img_id'] for x in self.data] |
| | results = [] |
| | for pred_dict in result: |
| | index = pred_dict['img_id'] |
| | new_index = orig_index.index(index) |
| | filtered_rows = self.data[new_index] |
| | cur_result = {} |
| | cur_result['question'] = filtered_rows.get('question') |
| | cur_result['prediction'] = pred_dict['prediction'] |
| | cur_result['category'] = filtered_rows['category'] |
| | cur_result['index'] = filtered_rows.get('index') |
| | cur_result['answer'] = filtered_rows.get('answer') |
| | results.append(cur_result) |
| |
|
| | results_df = pd.DataFrame(results) |
| | with pd.ExcelWriter( |
| | os.path.join(work_dir, self.results_xlsx_path), |
| | engine='openpyxl') as writer: |
| | results_df.to_excel(writer, index=False) |
| |
|
| | score = 0 |
| | for sub_name in self.name: |
| | sub_results = [x for x in results if x['category'] == sub_name] |
| | pred_list = [ |
| | int(YOrN_Extraction(x['prediction']) == 'Yes') |
| | for x in sub_results |
| | ] |
| | label_list = [ |
| | int(YOrN_Extraction(x['answer']) == 'Yes') for x in sub_results |
| | ] |
| | print_log('============================================', 'current') |
| | print_log('Category: {}, # samples: {}'.format(sub_name, |
| | len(sub_results)), 'current') |
| | cur_f1 = eval_func(pred_list, label_list) |
| | score += cur_f1 |
| |
|
| | score /= len(self.name) |
| | print_log('============================================', 'current') |
| | print_log(f'Average F1-score: {score}', 'current') |
| | print_log('============================================', 'current') |
| | print_log('POPE successfully finished evaluating', 'current') |
| | return score |
| |
|