Spaces:

HF-Pawan
/

Open-AI-Zero-Shot-Image-Classification

Running

File size: 2,093 Bytes

import gradio as gr
from core.inference import infer

def build_ui(model, processor):
    with gr.Blocks(title="AI Document Summarizer") as demo:
        with gr.Column(elem_id="container"):
            gr.Markdown("# **Open AI Zero-Shot Classification**", elem_id="title")
            gr.Markdown(
                "This is the demo of model **openai/clip-vit-base-patch32** "
                "for zero-shot image classification."
            )

            with gr.Row(equal_height=True):
                with gr.Column():
                    image_input = gr.Image(type="pil", label="Upload Image", height=310)
                    text_input = gr.Textbox(label="Input labels (comma separated)")
                    run_button = gr.Button("Run", variant="primary")

                with gr.Column():
                    output = gr.Label(
                        label="Open AI Zero-Shot Classification Output",
                        num_top_classes=5
                    )

            with gr.Row(equal_height=True):
                gr.Examples(
                    examples=[
                        ["./assets/zebra.jpg", "a photo of a zebra, a photo of a horse, a photo of a donkey"],
                        ["./assets/cat.jpg", "a photo of a cat, a photo of two cats, a photo of three cats"],
                        ["./assets/fridge.jpg", "a photo of a fridge, a photo of a cupboard, a photo of a wardrobe"],
                        ["./assets/marriage.jpg", "a photo of a birthday, a photo of a marriage, a photo of a engagement"],
                        ["./assets/giraffe.jpg", "Giraffe looking at same direction, Giraffe looking at opposite direction"]
                    ],
                    inputs=[image_input, text_input],
                    outputs=[output],
                    fn=lambda img, txt: infer(model, processor, img, txt)
                )

            run_button.click(
                fn=lambda img, txt: infer(model, processor, img, txt),
                inputs=[image_input, text_input],
                outputs=[output]
            )

    return demo