Spaces:

Salesforce
/

BLIP2

Running

App Files Files Community

Dongxu Li commited on Feb 3, 2023

Commit

4ecd25d

1 Parent(s): 0314a2f

change ui

Browse files

Files changed (1) hide show

app.py +65 -49

app.py CHANGED Viewed

@@ -125,12 +125,20 @@ def inference_caption(
     return output[0]
 title = """<h1 align="center">BLIP-2</h1>"""
-description = """Gradio demo for BLIP-2, image-to-text generation from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them.</p>
-<p> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected. </p>"""
 article = """<strong>Paper</strong>: <a href='https://arxiv.org/abs/2301.12597' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>
 <br> <strong>Code</strong>: BLIP2 is now integrated into GitHub repo: <a href='https://github.com/salesforce/LAVIS' target='_blank'>LAVIS: a One-stop Library for Language and Vision</a>
 <br> <strong>Project Page</strong>: <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'> BLIP2 on LAVIS</a>
 """
 endpoint = Endpoint()
@@ -147,6 +155,7 @@ with gr.Blocks() as iface:
     gr.Markdown(title)
     gr.Markdown(description)
     gr.Markdown(article)
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil")
@@ -189,54 +198,61 @@ with gr.Blocks() as iface:
         with gr.Column():
             with gr.Column():
-                # with gr.Row():
-                    caption_output = gr.Textbox(lines=1, label="Caption Output (from OPT)")
-                    caption_button = gr.Button(
-                        value="Caption it!", interactive=True, variant="primary"
-                    )
-                    caption_button.click(
-                        inference_caption,
-                        [
-                            image_input,
-                            sampling,
-                            temperature,
-                            len_penalty,
-                            rep_penalty,
-                        ],
-                        [caption_output],
-                    )
-            with gr.Column():
-                chat_input = gr.Textbox(lines=1, label="Chat Input (recommend prompt for QA, Question: {} Answer:)")
-                with gr.Row():
                     chatbot = gr.Chatbot(label="Chat Output (from FlanT5)")
-                    image_input.change(lambda: (None, "", "", []), [], [chatbot, chat_input, caption_output, state])
-                with gr.Row():
-                    clear_button = gr.Button(value="Clear", interactive=True)
-                    clear_button.click(
-                        lambda: ("", None, [], []),
-                        [],
-                        [chat_input, image_input, chatbot, state],
-                    )
-                    submit_button = gr.Button(
-                        value="Submit", interactive=True, variant="primary"
-                    )
-                    submit_button.click(
-                        inference_chat,
-                        [
-                            image_input,
-                            chat_input,
-                            sampling,
-                            temperature,
-                            len_penalty,
-                            rep_penalty,
-                            state,
-                        ],
-                        [chatbot, state],
-                    )
     examples = gr.Examples(
         examples=examples,

     return output[0]
+def clear_fn(image_input, chatbot, chat_input, caption_output, state):
+    if image_input is None:
+        return (None, "", "", [])
+    else:
+        return chatbot, chat_input, caption_output, state
 title = """<h1 align="center">BLIP-2</h1>"""
+description = """Gradio demo for BLIP-2, image-to-text generation from Salesforce Research. To use it, simply upload your image, or click one of the examples to load them.
+<br> <strong>Disclaimer</strong>: This is a research prototype and is not intended for production use. No data including but not restricted to text and images is collected."""
 article = """<strong>Paper</strong>: <a href='https://arxiv.org/abs/2301.12597' target='_blank'>BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a>
 <br> <strong>Code</strong>: BLIP2 is now integrated into GitHub repo: <a href='https://github.com/salesforce/LAVIS' target='_blank'>LAVIS: a One-stop Library for Language and Vision</a>
 <br> <strong>Project Page</strong>: <a href='https://github.com/salesforce/LAVIS/tree/main/projects/blip2' target='_blank'> BLIP2 on LAVIS</a>
+<br> <strong>Description</strong>: Captioning results from <strong>BLIP2_OPT_6.7B</strong>. Chat results from <strong>BLIP2_FlanT5xxl</strong>.
 """
 endpoint = Endpoint()
     gr.Markdown(title)
     gr.Markdown(description)
     gr.Markdown(article)
     with gr.Row():
         with gr.Column():
             image_input = gr.Image(type="pil")
         with gr.Column():
             with gr.Column():
+                caption_output = gr.Textbox(lines=1, label="Caption Output")
+                caption_button = gr.Button(
+                    value="Caption it!", interactive=True, variant="primary"
+                )
+                caption_button.click(
+                    inference_caption,
+                    [
+                        image_input,
+                        sampling,
+                        temperature,
+                        len_penalty,
+                        rep_penalty,
+                    ],
+                    [caption_output],
+                )
+            gr.Markdown("""Trying prompting your input for chat; e.g. recommended prompt for QA, \"Question: {} Answer:\"""")
+            with gr.Row():
+                with gr.Column():
                     chatbot = gr.Chatbot(label="Chat Output (from FlanT5)")
+                # with gr.Row():
+                with gr.Column():
+                    chat_input = gr.Textbox(lines=1, label="Chat Input")
+                    with gr.Row():
+                        clear_button = gr.Button(value="Clear", interactive=True)
+                        clear_button.click(
+                            lambda: ("", [], []),
+                            [],
+                            [chat_input, chatbot, state],
+                        )
+                        submit_button = gr.Button(
+                            value="Submit", interactive=True, variant="primary"
+                        )
+                        submit_button.click(
+                            inference_chat,
+                            [
+                                image_input,
+                                chat_input,
+                                sampling,
+                                temperature,
+                                len_penalty,
+                                rep_penalty,
+                                state,
+                            ],
+                            [chatbot, state],
+                        )
+            image_input.change(
+                clear_fn,
+                [image_input, chatbot, chat_input, caption_output, state],
+                [chatbot, chat_input, caption_output, state]
+            )
     examples = gr.Examples(
         examples=examples,