| | from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler |
| | import torch |
| | import cv2 |
| | import json |
| | import os |
| | import numpy as np |
| | from PIL import Image |
| | import argparse |
| | import pandas as pd |
| | import torch |
| | from torchvision.transforms import Resize |
| | from torchvision import transforms |
| | import torch.nn.functional as F |
| | import numpy as np |
| | from torchmetrics.multimodal import CLIPScore |
| | from torchmetrics.image import PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure |
| | from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity |
| | from torchmetrics.regression import MeanSquaredError |
| | from urllib.request import urlretrieve |
| | from PIL import Image |
| | import open_clip |
| | import os |
| | import hpsv2 |
| | import ImageReward as RM |
| | import math |
| | from transformers import AutoProcessor, AutoModel |
| |
|
| | def rle2mask(mask_rle, shape): |
| | starts, lengths = [np.asarray(x, dtype=int) for x in (mask_rle[0:][::2], mask_rle[1:][::2])] |
| | starts -= 1 |
| | ends = starts + lengths |
| | binary_mask = np.zeros(shape[0] * shape[1], dtype=np.uint8) |
| | for lo, hi in zip(starts, ends): |
| | binary_mask[lo:hi] = 1 |
| | return binary_mask.reshape(shape) |
| |
|
| |
|
| | class MetricsCalculator: |
| | def __init__(self, device,ckpt_path="../../data/ckpt") -> None: |
| | self.device=device |
| | |
| | self.clip_metric_calculator = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device) |
| | |
| | self.lpips_metric_calculator = LearnedPerceptualImagePatchSimilarity(net_type='squeeze').to(device) |
| | |
| | self.aesthetic_model = torch.nn.Linear(768, 1) |
| | aesthetic_model_url = ( |
| | "https://github.com/LAION-AI/aesthetic-predictor/blob/main/sa_0_4_vit_l_14_linear.pth?raw=true" |
| | ) |
| | aesthetic_model_ckpt_path=os.path.join(ckpt_path,"sa_0_4_vit_l_14_linear.pth") |
| | urlretrieve(aesthetic_model_url, aesthetic_model_ckpt_path) |
| | self.aesthetic_model.load_state_dict(torch.load(aesthetic_model_ckpt_path)) |
| | self.aesthetic_model.eval() |
| | self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai') |
| | |
| | self.imagereward_model = RM.load("ImageReward-v1.0") |
| | |
| |
|
| | def calculate_image_reward(self,image,prompt): |
| | reward = self.imagereward_model.score(prompt, [image]) |
| | return reward |
| |
|
| | def calculate_hpsv21_score(self,image,prompt): |
| | result = hpsv2.score(image, prompt, hps_version="v2.1")[0] |
| | return result.item() |
| |
|
| | def calculate_aesthetic_score(self,img): |
| | image = self.clip_preprocess(img).unsqueeze(0) |
| | with torch.no_grad(): |
| | image_features = self.clip_model.encode_image(image) |
| | image_features /= image_features.norm(dim=-1, keepdim=True) |
| | prediction = self.aesthetic_model(image_features) |
| | return prediction.cpu().item() |
| |
|
| | def calculate_clip_similarity(self, img, txt): |
| | img = np.array(img) |
| | |
| | img_tensor=torch.tensor(img).permute(2,0,1).to(self.device) |
| | |
| | score = self.clip_metric_calculator(img_tensor, txt) |
| | score = score.cpu().item() |
| | |
| | return score |
| | |
| | def calculate_psnr(self, img_pred, img_gt, mask=None): |
| | img_pred = np.array(img_pred).astype(np.float32)/255. |
| | img_gt = np.array(img_gt).astype(np.float32)/255. |
| |
|
| | assert img_pred.shape == img_gt.shape, "Image shapes should be the same." |
| | if mask is not None: |
| | mask = np.array(mask).astype(np.float32) |
| | img_pred = img_pred * mask |
| | img_gt = img_gt * mask |
| | |
| | difference = img_pred - img_gt |
| | difference_square = difference ** 2 |
| | difference_square_sum = difference_square.sum() |
| | difference_size = mask.sum() |
| |
|
| | mse = difference_square_sum/difference_size |
| |
|
| | if mse < 1.0e-10: |
| | return 1000 |
| | PIXEL_MAX = 1 |
| | return 20 * math.log10(PIXEL_MAX / math.sqrt(mse)) |
| |
|
| | |
| | def calculate_lpips(self, img_gt, img_pred, mask=None): |
| | img_pred = np.array(img_pred).astype(np.float32)/255 |
| | img_gt = np.array(img_gt).astype(np.float32)/255 |
| | assert img_pred.shape == img_gt.shape, "Image shapes should be the same." |
| |
|
| | if mask is not None: |
| | mask = np.array(mask).astype(np.float32) |
| | img_pred = img_pred * mask |
| | img_gt = img_gt * mask |
| | |
| | img_pred_tensor=torch.tensor(img_pred).permute(2,0,1).unsqueeze(0).to(self.device) |
| | img_gt_tensor=torch.tensor(img_gt).permute(2,0,1).unsqueeze(0).to(self.device) |
| | |
| | score = self.lpips_metric_calculator(img_pred_tensor*2-1,img_gt_tensor*2-1) |
| | score = score.cpu().item() |
| | |
| | return score |
| | |
| | def calculate_mse(self, img_pred, img_gt, mask=None): |
| | img_pred = np.array(img_pred).astype(np.float32)/255. |
| | img_gt = np.array(img_gt).astype(np.float32)/255. |
| |
|
| | assert img_pred.shape == img_gt.shape, "Image shapes should be the same." |
| | if mask is not None: |
| | mask = np.array(mask).astype(np.float32) |
| | img_pred = img_pred * mask |
| | img_gt = img_gt * mask |
| | |
| | difference = img_pred - img_gt |
| | difference_square = difference ** 2 |
| | difference_square_sum = difference_square.sum() |
| | difference_size = mask.sum() |
| |
|
| | mse = difference_square_sum/difference_size |
| |
|
| | return mse.item() |
| | |
| |
|
| | device = "cuda" if torch.cuda.is_available() else "cpu" |
| |
|
| |
|
| | cc3m_base_dir = "/home/kis/datasets/cc3m_attempt12" |
| | cc3m_full_regen = "/home/kis/Downloads/imgs" |
| |
|
| | |
| | cc3m_annotations_full = pd.read_csv("../../../../datasets/cc3m/Train_GCC-training.tsv", sep='\t', header=0) |
| |
|
| | |
| | |
| | evaluation_df = pd.DataFrame(columns=['Image ID', 'Aesthetic Score']) |
| |
|
| | metrics_calculator=MetricsCalculator(device) |
| | mask_root = "../../../SemanticSegmentation/mask_skin" |
| | prev = None |
| |
|
| | gender = "real" |
| |
|
| | root = "PATH_TO_THE_ROOT" |
| |
|
| | if gender == "man": |
| | img_root = root + "/foreground_syn_men/" |
| | elif gender == "woman": |
| | img_root = root + "/foreground_syn_women/" |
| | elif gender == "real": |
| | img_root = root + "/foreground/" |
| | annotations = cc3m_annotations_full |
| | elif gender == "cvpr": |
| | img_root = root + "/foreground_cvpr_images/" |
| | annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv") |
| | elif gender == "syn": |
| | img_root = root + "/foreground_fully_synthetic/" |
| | |
| | elif gender == "coco": |
| | img_root = root + "/foreground_coco_counterfactuals/" |
| | annotations = pd.read_json(path_or_buf=root + "/coco_counterfactuals.jsonl", lines=True) |
| | annotations = annotations.set_index("id") |
| |
|
| | for fname in os.listdir(img_root): |
| | image_name = fname.split(".")[0] |
| | if "mask" in image_name or "original" in image_name: |
| | continue |
| | image_name = fname.split("_")[0] |
| |
|
| |
|
| | print(f"evaluating image {image_name} ...") |
| |
|
| |
|
| | image_id = str(image_name).zfill(9) |
| |
|
| | if gender in ["man", "woman"]: |
| | caption = annotations[(annotations[0]==image_name) & (annotations[1]==gender)][2].item() |
| | elif gender == "real": |
| | caption = annotations.iloc[int(image_id), 0] |
| | elif gender == "cvpr": |
| | caption = annotations.iloc[int(image_name), 1] |
| | elif gender == "syn": |
| | caption = annotations.iloc[int(image_id), 0] |
| | elif gender == "coco": |
| | image_id = f'{fname.split("_")[0]}{fname.split("_")[1]}' |
| | n = int(fname.split(".")[0][-1]) |
| | caption = annotations.loc[int(image_id)][n] |
| |
|
| | image_path = f"{img_root}/{fname}" |
| | prompt = caption |
| | try: |
| | src_image_path = image_path |
| | src_image = Image.open(src_image_path).resize((512,512)) |
| | evaluation_result=[str(image_id)+"_"+str(gender)] |
| | except FileNotFoundError: |
| | continue |
| |
|
| | success = True |
| | for metric in evaluation_df.columns.values.tolist()[1:]: |
| | print(f"evaluating metric: {metric}") |
| | try: |
| |
|
| | if metric == 'Image Reward': |
| | metric_result = metrics_calculator.calculate_image_reward(src_image,prompt) |
| | |
| | if metric == 'HPS V2.1': |
| | metric_result = metrics_calculator.calculate_hpsv21_score(src_image,prompt) |
| | |
| | if metric == 'Aesthetic Score': |
| | metric_result = metrics_calculator.calculate_aesthetic_score(src_image) |
| | |
| | if metric == 'CLIP Similarity': |
| | metric_result = metrics_calculator.calculate_clip_similarity(src_image, prompt) |
| |
|
| | evaluation_result.append(metric_result) |
| | except RuntimeError: |
| | success = False |
| | break |
| | |
| | if success: |
| | evaluation_df.loc[len(evaluation_df.index)] = evaluation_result |
| | evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv") |
| |
|
| | print("The averaged evaluation result:") |
| | averaged_results=evaluation_df.mean(numeric_only=True) |
| | print(averaged_results) |
| | averaged_results.to_csv(f"evaluation/evaluation_result_{gender}.csv") |
| | evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv") |
| |
|
| | print(f"The generated images and evaluation results is saved in ./evaluation/") |