import gradio as gr from core.inference import infer def build_ui(model, processor): with gr.Blocks(title="AI Document Summarizer") as demo: with gr.Column(elem_id="container"): gr.Markdown("# **Open AI Zero-Shot Classification**", elem_id="title") gr.Markdown( "This is the demo of model **openai/clip-vit-base-patch32** " "for zero-shot image classification." ) with gr.Row(equal_height=True): with gr.Column(): image_input = gr.Image(type="pil", label="Upload Image", height=310) text_input = gr.Textbox(label="Input labels (comma separated)") run_button = gr.Button("Run", variant="primary") with gr.Column(): output = gr.Label( label="Open AI Zero-Shot Classification Output", num_top_classes=5 ) with gr.Row(equal_height=True): gr.Examples( examples=[ ["./assets/zebra.jpg", "a photo of a zebra, a photo of a horse, a photo of a donkey"], ["./assets/cat.jpg", "a photo of a cat, a photo of two cats, a photo of three cats"], ["./assets/fridge.jpg", "a photo of a fridge, a photo of a cupboard, a photo of a wardrobe"], ["./assets/marriage.jpg", "a photo of a birthday, a photo of a marriage, a photo of a engagement"], ["./assets/giraffe.jpg", "Giraffe looking at same direction, Giraffe looking at opposite direction"] ], inputs=[image_input, text_input], outputs=[output], fn=lambda img, txt: infer(model, processor, img, txt) ) run_button.click( fn=lambda img, txt: infer(model, processor, img, txt), inputs=[image_input, text_input], outputs=[output] ) return demo