|
|
import marimo |
|
|
|
|
|
__generated_with = "0.17.0" |
|
|
app = marimo.App() |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(): |
|
|
from open_clip import create_model_from_pretrained |
|
|
import torch |
|
|
from transformers import CLIPImageProcessor, AutoImageProcessor |
|
|
from urllib.request import urlopen |
|
|
from PIL import Image |
|
|
return ( |
|
|
AutoImageProcessor, |
|
|
CLIPImageProcessor, |
|
|
Image, |
|
|
create_model_from_pretrained, |
|
|
torch, |
|
|
urlopen, |
|
|
) |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(Image, urlopen): |
|
|
image = Image.open(urlopen( |
|
|
|
|
|
'https://media.gettyimages.com/id/1309385864/photo/las-vegas-strip-skyline-landscaped-ultra-wide-shot-at-night.jpg?s=1024x1024&w=gi&k=20&c=bBxkjq8FoK2bPKYLtqbJMCgpsvjUm-vI7-yw04cq7AU=' |
|
|
)) |
|
|
image.size |
|
|
return (image,) |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(): |
|
|
model = "apple/DFN5B-CLIP-ViT-H-14" |
|
|
return (model,) |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _( |
|
|
AutoImageProcessor, |
|
|
CLIPImageProcessor, |
|
|
create_model_from_pretrained, |
|
|
model, |
|
|
): |
|
|
try: |
|
|
hf_pre = AutoImageProcessor.from_pretrained(model) |
|
|
except: |
|
|
print("Auto image processor not found") |
|
|
hf_pre = CLIPImageProcessor() |
|
|
hf_pre_fix = CLIPImageProcessor( |
|
|
do_center_crop=False, |
|
|
do_normalize=True, |
|
|
do_resize=True, |
|
|
feature_extractor_type="CLIPFeatureExtractor", |
|
|
image_mean=[ |
|
|
0.48145466, |
|
|
0.4578275, |
|
|
0.40821073 |
|
|
], |
|
|
image_std=[ |
|
|
0.26862954, |
|
|
0.26130258, |
|
|
0.27577711 |
|
|
], |
|
|
size={"width": 224, "height": 224}, |
|
|
do_convert_rgb=True |
|
|
) |
|
|
_, preprocess = create_model_from_pretrained(f'hf-hub:{model}') |
|
|
preprocess |
|
|
return hf_pre, hf_pre_fix, preprocess |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(hf_pre, hf_pre_fix, image, preprocess, torch): |
|
|
hf_res_fix = torch.tensor(hf_pre_fix(images=image)["pixel_values"]).squeeze() |
|
|
hf_res = torch.tensor(hf_pre(images=image)["pixel_values"]).squeeze() |
|
|
op_res = preprocess(image) |
|
|
op_res.shape, hf_res.shape |
|
|
return hf_res, hf_res_fix, op_res |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(hf_res, op_res): |
|
|
(hf_res == op_res).all() |
|
|
return |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(hf_res_fix, op_res): |
|
|
(hf_res_fix == op_res).all() |
|
|
return |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(hf_res, hf_res_fix, image, op_res): |
|
|
|
|
|
import matplotlib.pyplot as plt |
|
|
fig, axs = plt.subplots(1, 4, figsize=(15, 5)) |
|
|
axs[0].imshow(image) |
|
|
axs[0].set_title("Original Image") |
|
|
axs[1].imshow(op_res.permute(1, 2, 0).numpy()) |
|
|
axs[1].set_title("OpenCLIP Preprocessed Image") |
|
|
axs[2].imshow(hf_res.permute(1, 2, 0).numpy()) |
|
|
axs[2].set_title("HuggingFace Preprocessed Image") |
|
|
axs[3].imshow(hf_res_fix.permute(1, 2, 0).numpy()) |
|
|
axs[3].set_title("HuggingFace FIXED Preprocessed Image") |
|
|
plt.show() |
|
|
return |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(hf_pre_fix): |
|
|
hf_pre_fix.to_json_file("test.json") |
|
|
return |
|
|
|
|
|
|
|
|
@app.cell |
|
|
def _(): |
|
|
return |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.run() |
|
|
|