import marimo __generated_with = "0.17.0" app = marimo.App() @app.cell def _(): from open_clip import create_model_from_pretrained import torch from transformers import CLIPImageProcessor, AutoImageProcessor from urllib.request import urlopen from PIL import Image return ( AutoImageProcessor, CLIPImageProcessor, Image, create_model_from_pretrained, torch, urlopen, ) @app.cell def _(Image, urlopen): image = Image.open(urlopen( # 'https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/beignets-task-guide.png' 'https://media.gettyimages.com/id/1309385864/photo/las-vegas-strip-skyline-landscaped-ultra-wide-shot-at-night.jpg?s=1024x1024&w=gi&k=20&c=bBxkjq8FoK2bPKYLtqbJMCgpsvjUm-vI7-yw04cq7AU=' )) image.size return (image,) @app.cell def _(): model = "apple/DFN5B-CLIP-ViT-H-14" return (model,) @app.cell def _( AutoImageProcessor, CLIPImageProcessor, create_model_from_pretrained, model, ): try: hf_pre = AutoImageProcessor.from_pretrained(model) except: print("Auto image processor not found") hf_pre = CLIPImageProcessor() hf_pre_fix = CLIPImageProcessor( do_center_crop=False, do_normalize=True, do_resize=True, feature_extractor_type="CLIPFeatureExtractor", image_mean=[ 0.48145466, 0.4578275, 0.40821073 ], image_std=[ 0.26862954, 0.26130258, 0.27577711 ], size={"width": 224, "height": 224}, do_convert_rgb=True ) _, preprocess = create_model_from_pretrained(f'hf-hub:{model}') preprocess return hf_pre, hf_pre_fix, preprocess @app.cell def _(hf_pre, hf_pre_fix, image, preprocess, torch): hf_res_fix = torch.tensor(hf_pre_fix(images=image)["pixel_values"]).squeeze() hf_res = torch.tensor(hf_pre(images=image)["pixel_values"]).squeeze() op_res = preprocess(image) op_res.shape, hf_res.shape return hf_res, hf_res_fix, op_res @app.cell def _(hf_res, op_res): (hf_res == op_res).all() return @app.cell def _(hf_res_fix, op_res): (hf_res_fix == op_res).all() return @app.cell def _(hf_res, hf_res_fix, image, op_res): # view the images import matplotlib.pyplot as plt fig, axs = plt.subplots(1, 4, figsize=(15, 5)) axs[0].imshow(image) axs[0].set_title("Original Image") axs[1].imshow(op_res.permute(1, 2, 0).numpy()) axs[1].set_title("OpenCLIP Preprocessed Image") axs[2].imshow(hf_res.permute(1, 2, 0).numpy()) axs[2].set_title("HuggingFace Preprocessed Image") axs[3].imshow(hf_res_fix.permute(1, 2, 0).numpy()) axs[3].set_title("HuggingFace FIXED Preprocessed Image") plt.show() return @app.cell def _(hf_pre_fix): hf_pre_fix.to_json_file("test.json") return @app.cell def _(): return if __name__ == "__main__": app.run()