|
|
import open_clip |
|
|
from PIL import Image |
|
|
|
|
|
import gradio as gr |
|
|
|
|
|
model, _, transform = open_clip.create_model_and_transforms( |
|
|
model_name="coca_ViT-L-14", |
|
|
pretrained="laion2B-s13B-b90k" |
|
|
) |
|
|
def resize_background(img): |
|
|
width, height = img.size |
|
|
aspect_ratio = width / height |
|
|
|
|
|
if aspect_ratio != 1: |
|
|
new_img = Image.new("RGB", (512, 512), color=(255, 255, 255)) |
|
|
new_img.paste(img, (int((512 - width) / 2), int((512 - height) / 2))) |
|
|
img = new_img |
|
|
|
|
|
img = img.resize((512, 512), Image.LANCZOS) |
|
|
return img |
|
|
|
|
|
def generate_caption(image): |
|
|
im = resize_background(image) |
|
|
|
|
|
im = transform(im).unsqueeze(0) |
|
|
|
|
|
generated = model.generate(im) |
|
|
generated = generated.detach() |
|
|
|
|
|
return(open_clip.decode(generated[0]).split("<start_of_text>")[1].split("<end_of_text>")[0]) |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("## Captioning with OpenCLIP CoCa") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
image = gr.Image(label="Image to Caption", type="pil") |
|
|
out = gr.Textbox(label="Caption") |
|
|
btn = gr.Button("Generate caption") |
|
|
btn.click(fn=generate_caption,inputs=image, outputs=out) |
|
|
|
|
|
demo.launch() |