anonymous-upload-neurips-2025

Upload 1784 files

e74a7f1 verified 10 months ago

9.53 kB

	from diffusers import StableDiffusionBrushNetPipeline, BrushNetModel, UniPCMultistepScheduler
	import torch
	import cv2
	import json
	import os
	import numpy as np
	from PIL import Image
	import argparse
	import pandas as pd
	import torch
	from torchvision.transforms import Resize
	from torchvision import transforms
	import torch.nn.functional as F
	import numpy as np
	from torchmetrics.multimodal import CLIPScore
	from torchmetrics.image import PeakSignalNoiseRatio, StructuralSimilarityIndexMeasure
	from torchmetrics.image.lpip import LearnedPerceptualImagePatchSimilarity
	from torchmetrics.regression import MeanSquaredError
	from urllib.request import urlretrieve
	from PIL import Image
	import open_clip
	import os
	import hpsv2
	import ImageReward as RM
	import math
	from transformers import AutoProcessor, AutoModel

	def rle2mask(mask_rle, shape): # height, width
	starts, lengths = [np.asarray(x, dtype=int) for x in (mask_rle[0:][::2], mask_rle[1:][::2])]
	starts -= 1
	ends = starts + lengths
	binary_mask = np.zeros(shape[0] * shape[1], dtype=np.uint8)
	for lo, hi in zip(starts, ends):
	binary_mask[lo:hi] = 1
	return binary_mask.reshape(shape)


	class MetricsCalculator:
	def __init__(self, device,ckpt_path="../../data/ckpt") -> None:
	self.device=device
	# clip
	self.clip_metric_calculator = CLIPScore(model_name_or_path="openai/clip-vit-large-patch14").to(device)
	# lpips
	self.lpips_metric_calculator = LearnedPerceptualImagePatchSimilarity(net_type='squeeze').to(device)
	# aesthetic model
	self.aesthetic_model = torch.nn.Linear(768, 1)
	aesthetic_model_url = (
	"https://github.com/LAION-AI/aesthetic-predictor/blob/main/sa_0_4_vit_l_14_linear.pth?raw=true"
	)
	aesthetic_model_ckpt_path=os.path.join(ckpt_path,"sa_0_4_vit_l_14_linear.pth")
	urlretrieve(aesthetic_model_url, aesthetic_model_ckpt_path)
	self.aesthetic_model.load_state_dict(torch.load(aesthetic_model_ckpt_path))
	self.aesthetic_model.eval()
	self.clip_model, _, self.clip_preprocess = open_clip.create_model_and_transforms('ViT-L-14', pretrained='openai')
	# image reward model
	self.imagereward_model = RM.load("ImageReward-v1.0")


	def calculate_image_reward(self,image,prompt):
	reward = self.imagereward_model.score(prompt, [image])
	return reward

	def calculate_hpsv21_score(self,image,prompt):
	result = hpsv2.score(image, prompt, hps_version="v2.1")[0]
	return result.item()

	def calculate_aesthetic_score(self,img):
	image = self.clip_preprocess(img).unsqueeze(0)
	with torch.no_grad():
	image_features = self.clip_model.encode_image(image)
	image_features /= image_features.norm(dim=-1, keepdim=True)
	prediction = self.aesthetic_model(image_features)
	return prediction.cpu().item()

	def calculate_clip_similarity(self, img, txt):
	img = np.array(img)

	img_tensor=torch.tensor(img).permute(2,0,1).to(self.device)

	score = self.clip_metric_calculator(img_tensor, txt)
	score = score.cpu().item()

	return score

	def calculate_psnr(self, img_pred, img_gt, mask=None):
	img_pred = np.array(img_pred).astype(np.float32)/255.
	img_gt = np.array(img_gt).astype(np.float32)/255.

	assert img_pred.shape == img_gt.shape, "Image shapes should be the same."
	if mask is not None:
	mask = np.array(mask).astype(np.float32)
	img_pred = img_pred * mask
	img_gt = img_gt * mask

	difference = img_pred - img_gt
	difference_square = difference ** 2
	difference_square_sum = difference_square.sum()
	difference_size = mask.sum()

	mse = difference_square_sum/difference_size

	if mse < 1.0e-10:
	return 1000
	PIXEL_MAX = 1
	return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))


	def calculate_lpips(self, img_gt, img_pred, mask=None):
	img_pred = np.array(img_pred).astype(np.float32)/255
	img_gt = np.array(img_gt).astype(np.float32)/255
	assert img_pred.shape == img_gt.shape, "Image shapes should be the same."

	if mask is not None:
	mask = np.array(mask).astype(np.float32)
	img_pred = img_pred * mask
	img_gt = img_gt * mask

	img_pred_tensor=torch.tensor(img_pred).permute(2,0,1).unsqueeze(0).to(self.device)
	img_gt_tensor=torch.tensor(img_gt).permute(2,0,1).unsqueeze(0).to(self.device)

	score = self.lpips_metric_calculator(img_pred_tensor2-1,img_gt_tensor2-1)
	score = score.cpu().item()

	return score

	def calculate_mse(self, img_pred, img_gt, mask=None):
	img_pred = np.array(img_pred).astype(np.float32)/255.
	img_gt = np.array(img_gt).astype(np.float32)/255.

	assert img_pred.shape == img_gt.shape, "Image shapes should be the same."
	if mask is not None:
	mask = np.array(mask).astype(np.float32)
	img_pred = img_pred * mask
	img_gt = img_gt * mask

	difference = img_pred - img_gt
	difference_square = difference ** 2
	difference_square_sum = difference_square.sum()
	difference_size = mask.sum()

	mse = difference_square_sum/difference_size

	return mse.item()


	device = "cuda" if torch.cuda.is_available() else "cpu"


	cc3m_base_dir = "/home/kis/datasets/cc3m_attempt12"
	cc3m_full_regen = "/home/kis/Downloads/imgs"

	# annotations = pd.read_csv("annotations/annotations_synthetic_100percent.txt", sep='\t', header=None, dtype=str)
	cc3m_annotations_full = pd.read_csv("../../../../datasets/cc3m/Train_GCC-training.tsv", sep='\t', header=0)

	# evaluation
	# evaluation_df = pd.DataFrame(columns=['Image ID','Image Reward', 'HPS V2.1', 'Aesthetic Score'])#, 'CLIP Similarity'])
	evaluation_df = pd.DataFrame(columns=['Image ID', 'Aesthetic Score'])#, 'CLIP Similarity'])

	metrics_calculator=MetricsCalculator(device)
	mask_root = "../../../SemanticSegmentation/mask_skin"
	prev = None

	gender = "real"

	root = "PATH_TO_THE_ROOT"

	if gender == "man":
	img_root = root + "/foreground_syn_men/"
	elif gender == "woman":
	img_root = root + "/foreground_syn_women/"
	elif gender == "real":
	img_root = root + "/foreground/"
	annotations = cc3m_annotations_full
	elif gender == "cvpr":
	img_root = root + "/foreground_cvpr_images/"
	annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv")
	elif gender == "syn":
	img_root = root + "/foreground_fully_synthetic/"
	# annotations = pd.read_csv("/home/kis/Downloads/annotations_cvpr.csv")
	elif gender == "coco":
	img_root = root + "/foreground_coco_counterfactuals/"
	annotations = pd.read_json(path_or_buf=root + "/coco_counterfactuals.jsonl", lines=True)
	annotations = annotations.set_index("id")

	for fname in os.listdir(img_root):
	image_name = fname.split(".")[0]
	if "mask" in image_name or "original" in image_name:
	continue
	image_name = fname.split("_")[0]


	print(f"evaluating image {image_name} ...")


	image_id = str(image_name).zfill(9)

	if gender in ["man", "woman"]:
	caption = annotations[(annotations[0]==image_name) & (annotations[1]==gender)][2].item()
	elif gender == "real":
	caption = annotations.iloc[int(image_id), 0]
	elif gender == "cvpr":
	caption = annotations.iloc[int(image_name), 1]
	elif gender == "syn":
	caption = annotations.iloc[int(image_id), 0]
	elif gender == "coco":
	image_id = f'{fname.split("_")[0]}{fname.split("_")[1]}'
	n = int(fname.split(".")[0][-1]) # 0 or 1 in the end of the filename - two counterfactuals
	caption = annotations.loc[int(image_id)][n]

	image_path = f"{img_root}/{fname}"
	prompt = caption
	try:
	src_image_path = image_path
	src_image = Image.open(src_image_path).resize((512,512))
	evaluation_result=[str(image_id)+"_"+str(gender)]
	except FileNotFoundError:
	continue

	success = True
	for metric in evaluation_df.columns.values.tolist()[1:]:
	print(f"evaluating metric: {metric}")
	try:

	if metric == 'Image Reward':
	metric_result = metrics_calculator.calculate_image_reward(src_image,prompt)

	if metric == 'HPS V2.1':
	metric_result = metrics_calculator.calculate_hpsv21_score(src_image,prompt)

	if metric == 'Aesthetic Score':
	metric_result = metrics_calculator.calculate_aesthetic_score(src_image)

	if metric == 'CLIP Similarity':
	metric_result = metrics_calculator.calculate_clip_similarity(src_image, prompt)

	evaluation_result.append(metric_result)
	except RuntimeError:
	success = False
	break

	if success:
	evaluation_df.loc[len(evaluation_df.index)] = evaluation_result
	evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv")

	print("The averaged evaluation result:")
	averaged_results=evaluation_df.mean(numeric_only=True)
	print(averaged_results)
	averaged_results.to_csv(f"evaluation/evaluation_result_{gender}.csv")
	evaluation_df.to_csv(f"evaluation/evaluation_result_{gender}.csv")

	print(f"The generated images and evaluation results is saved in ./evaluation/")