Spaces:

KumaPower
/

AvatarArtist

Running on Zero

AvatarArtist / DiT_VAE /diffusion /data /datasets /TriplaneData.py

刘虹雨

update code

ab06a25 10 months ago

5.61 kB

	import os
	import random
	from PIL import Image
	import numpy as np
	import torch
	from torch.utils.data import Dataset
	from transformers import AutoImageProcessor
	from DiT_VAE.diffusion.data.builder import DATASETS
	from omegaconf import OmegaConf
	from torchvision import transforms
	from transformers import CLIPImageProcessor
	import io
	import zipfile
	import numpy
	import json


	def to_rgb_image(maybe_rgba: Image.Image):
	if maybe_rgba.mode == 'RGB':
	return maybe_rgba
	elif maybe_rgba.mode == 'RGBA':
	rgba = maybe_rgba
	img = numpy.random.randint(127, 128, size=[rgba.size[1], rgba.size[0], 3], dtype=numpy.uint8)
	img = Image.fromarray(img, 'RGB')
	img.paste(rgba, mask=rgba.getchannel('A'))
	return img
	else:
	raise ValueError("Unsupported image type.", maybe_rgba.mode)


	@DATASETS.register_module()
	class TriplaneData(Dataset):
	def __init__(self,
	data_base_dir,
	model_names,
	data_json_file,
	dino_path,
	i_drop_rate=0.1,
	image_size=256,
	**kwargs):
	self.dict_data_image = json.load(open(data_json_file)) # {'image_name': pose}
	self.data_base_dir = data_base_dir
	self.dino_img_processor = AutoImageProcessor.from_pretrained(dino_path)
	self.size = image_size
	self.data_list = list(self.dict_data_image.keys())
	self.zip_file_dict = {}
	config_gan_model = OmegaConf.load(model_names)
	all_models = config_gan_model['gan_models'].keys()
	for model_name in all_models:
	zipfile_path = os.path.join(self.data_base_dir, model_name + '.zip')
	zipfile_load = zipfile.ZipFile(zipfile_path)
	self.zip_file_dict[model_name] = zipfile_load
	self.transform = transforms.Compose([
	transforms.Resize(self.size, interpolation=transforms.InterpolationMode.BILINEAR),
	transforms.CenterCrop(self.size),
	transforms.ToTensor(),
	transforms.Normalize([0.5], [0.5]),
	])
	self.clip_image_processor = CLIPImageProcessor()
	self.i_drop_rate = i_drop_rate


	def getdata(self, idx):

	data_name = self.data_list[idx]
	data_model_name = self.dict_data_image[data_name]['model_name']
	zipfile_loaded = self.zip_file_dict[data_model_name]
	# zipfile_path = os.path.join(self.data_base_dir, data_model_name)
	# zipfile_loaded = zipfile.ZipFile(zipfile_path)
	with zipfile_loaded.open(self.dict_data_image[data_name]['z_dir'], 'r') as f:
	buffer = io.BytesIO(f.read())
	data_z = torch.load(buffer)

	with zipfile_loaded.open(self.dict_data_image[data_name]['vert_dir'], 'r') as f:
	buffer = io.BytesIO(f.read())
	data_vert = torch.load(buffer)

	with zipfile_loaded.open(self.dict_data_image[data_name]['img_dir'], 'r') as f:
	raw_image = to_rgb_image(Image.open(f))
	dino_img = self.dino_img_processor(images=raw_image, return_tensors="pt").pixel_values
	image = self.transform(raw_image.convert("RGB"))
	clip_image = self.clip_image_processor(images=raw_image, return_tensors="pt").pixel_values
	drop_image_embed = 0
	rand_num = random.random()
	if rand_num < self.i_drop_rate:
	drop_image_embed = 1
	return {
	"raw_image": raw_image,
	"dino_img": dino_img,
	"image": image,
	"clip_image": clip_image.clone(),
	"data_z": data_z,
	"data_vert": data_vert,
	"data_model_name": data_model_name,
	"drop_image_embed": drop_image_embed,

	}

	#
	# img_path = self.img_samples[index]
	# npz_path = self.txt_feat_samples[index]
	# npy_path = self.vae_feat_samples[index]
	# prompt = self.prompt_samples[index]
	# data_info = {
	# 'img_hw': torch.tensor([torch.tensor(self.resolution), torch.tensor(self.resolution)], dtype=torch.float32),
	# 'aspect_ratio': torch.tensor(1.)
	# }
	#
	# img = self.loader(npy_path) if self.load_vae_feat else self.loader(img_path)
	# txt_info = np.load(npz_path)
	# txt_fea = torch.from_numpy(txt_info['caption_feature']) # 1xTx4096
	# attention_mask = torch.ones(1, 1, txt_fea.shape[1]) # 1x1xT
	# if 'attention_mask' in txt_info.keys():
	# attention_mask = torch.from_numpy(txt_info['attention_mask'])[None]
	# if txt_fea.shape[1] != self.max_lenth:
	# txt_fea = torch.cat([txt_fea, txt_fea[:, -1:].repeat(1, self.max_lenth-txt_fea.shape[1], 1)], dim=1)
	# attention_mask = torch.cat([attention_mask, torch.zeros(1, 1, self.max_lenth-attention_mask.shape[-1])], dim=-1)
	#
	# if self.transform:
	# img = self.transform(img)
	#
	# data_info['prompt'] = prompt
	# return img, txt_fea, attention_mask, data_info

	def __getitem__(self, idx):
	for _ in range(20):
	try:
	return self.getdata(idx)
	except Exception as e:
	print(f"Error details: {str(e)}")
	idx = np.random.randint(len(self))
	raise RuntimeError('Too many bad data.')


	def __len__(self):
	return len(self.data_list)

	def __getattr__(self, name):
	if name == "set_epoch":
	return lambda epoch: None
	raise AttributeError(f"'{type(self).__name__}' object has no attribute '{name}'")