| how to use | |
| import torch | |
| from diffusers.models.autoencoders.autoencoder_kl import AutoencoderKL | |
| from diffusers import AutoencoderDC | |
| from torchvision.utils import save_image | |
| import numpy as np | |
| import h5py | |
| dc_encoder=AutoencoderDC.from_pretrained("mit-han-lab/dc-ae-f64c128-in-1.0-diffusers", torch_dtype=torch.float32).to('cuda') | |
| with h5py.File('latent_folder/image_latents.hdf5', 'r', libver='latest', swmr=True) as f: | |
| print(list(f.keys())) | |
| dataset = f['image_latents'][:] | |
| latents=np.expand_dims(dataset[5]*35, axis=0) | |
| latents=torch.from_numpy(latents) | |
| latents=latents.float() | |
| latents=latents.to('cuda') | |
| y = dc_encoder.decode(latents).sample | |
| save_image(y * 0.5 + 0.5, "demo_dc_ae.png") | |
| import clip | |
| model, _ = clip.load("ViT-L/14") | |
| model.to('cuda') | |
| def encode_text(label, model, device): | |
| text_tokens = clip.tokenize(label, truncate=True).to(device) | |
| text_encoding = model.encode_text(text_tokens) | |
| return text_encoding.cpu() | |
| import h5py | |
| # Open the file | |
| f = h5py.File('latent_folder/text_encodings.hdf5', 'r') | |
| # View the contents | |
| print(list(f.keys())) | |
| # Access specific datasets | |
| dataset = f['text_encodings'][:] | |
| # Close the file when done | |
| f.close() | |
| def cosine_similarity(v1, v2): | |
| # Reshape v1 to match v2's dimensions if needed | |
| v1 = v1.reshape(-1) # converts (1,768) to (768,) | |
| # Calculate cosine similarity | |
| dot_product = np.dot(v1, v2) | |
| norm_v1 = np.linalg.norm(v1) | |
| norm_v2 = np.linalg.norm(v2) | |
| return dot_product / (norm_v1 * norm_v2) | |
| textembed=encode_text("""The double-cut pork chop at The Ainsworth in""" , model,'cuda') | |
| cosine_similarity(textembed.cpu().detach().numpy() , dataset[5]) | |
| dataset link https://huggingface.co/datasets/zzliang/GRIT | |