Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import numpy as np | |
| from io import BytesIO | |
| from PIL import Image as PImage | |
| from torch import cuda | |
| from sklearn.metrics.pairwise import euclidean_distances, cosine_distances | |
| from transformers import AutoModel, AutoProcessor, pipeline | |
| from utils import draw_results, embed_image, embed_word, idxs_along_axes, idxs_by_dist, make_image | |
| DEVICE = "cuda" if cuda.is_available() else "cpu" | |
| # CLIP_MODEL = "google/siglip2-large-patch16-256" | |
| CLIP_MODEL = "openai/clip-vit-large-patch14" | |
| DETR_MODEL = "facebook/detr-resnet-50" | |
| OWL_MODEL = "google/owlv2-base-patch16" | |
| detr = pipeline(task="object-detection", | |
| model=DETR_MODEL, | |
| device=DEVICE) | |
| owl = pipeline(task="zero-shot-object-detection", | |
| model=OWL_MODEL, | |
| device=DEVICE) | |
| clip_processor = AutoProcessor.from_pretrained(CLIP_MODEL) | |
| clip = AutoModel.from_pretrained(CLIP_MODEL, device_map="auto").to(DEVICE) | |
| def run_detr(img): | |
| predictions = detr(img) | |
| return draw_results(img, predictions) | |
| def run_owl(img, classes_str): | |
| classes = [c.strip() for c in classes_str.split(",")] | |
| predictions = owl(img, candidate_labels=classes) | |
| return draw_results(img, predictions) | |
| def run_clip(files, word0, word1=""): | |
| w0e = embed_word(word0, clip_processor, clip, DEVICE) | |
| w1e = embed_word(word1, clip_processor, clip, DEVICE) | |
| ies = [] | |
| imgs = [] | |
| for f in files: | |
| img = PImage.open(f.name).convert("RGB") | |
| img = img.resize((int(256 * img.width/img.height), 256)) | |
| imgs.append(img) | |
| ies.append(embed_image(img, clip_processor, clip, DEVICE)) | |
| if word1 == "": | |
| ordered_idxs = idxs_by_dist(ies, w0e) | |
| return make_image(imgs, ordered_idxs) | |
| else: | |
| ordered_idxs = idxs_along_axes(ies, (w0e, w1e)) | |
| return make_image(imgs, ordered_idxs) | |
| examples = [ | |
| ("painted portrait young person", "painted portrait old person"), | |
| ("painted portrait happy person", "painted portrait worried person"), | |
| ] | |
| with gr.Blocks() as demo: | |
| gr.Interface( | |
| title="Object Detection", | |
| description="[DETR](https://huggingface.co/facebook/detr-resnet-50) model from facebook (2020), trained on [COCO 2017](https://github.com/amikelive/coco-labels/blob/master/coco-labels-2014_2017.txt) dataset and labels.", | |
| api_name="object", | |
| fn=run_detr, | |
| inputs=gr.Image(type="pil"), | |
| outputs=gr.Image(format="jpeg"), | |
| flagging_mode="never", | |
| ) | |
| gr.Interface( | |
| title="Zero-Shot Object Detection", | |
| description="[OWLv2](https://huggingface.co/google/owlv2-large-patch14-ensemble) model from google (2023).", | |
| api_name="zero", | |
| fn=run_owl, | |
| inputs=[gr.Image(type="pil"), gr.Textbox(label="Object", show_label=True)], | |
| outputs=gr.Image(format="jpeg"), | |
| flagging_mode="never", | |
| ) | |
| gr.Interface( | |
| title="Contrastive Embedding", | |
| description="[CLIP](https://huggingface.co/openai/clip-vit-large-patch14) model from openai (2021).", | |
| api_name="clip", | |
| fn=run_clip, | |
| inputs=[gr.File(file_count="multiple"), | |
| gr.Textbox(label="1st Descriptor", show_label=True), | |
| gr.Textbox(label="2nd Descriptor", show_label=True)], | |
| outputs=gr.Image(format="jpeg"), | |
| flagging_mode="never", | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |