Spaces:

amrn
/

misty

Paused

App Files Files Community

am commited on Sep 4, 2025

Commit

081767b

1 Parent(s): 394c7b7

st

Browse files

Files changed (1) hide show

app.py +98 -73

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
-from transformers.image_utils import load_image
 from transformers.image_transforms import resize
 from threading import Thread
 import re
@@ -13,10 +13,8 @@ import os
 from transformers import Qwen2_5_VLForConditionalGeneration
 pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/testmodel2")
 auth_token = os.environ.get("HF_TOKEN") or True
 model = AutoModelForImageTextToText.from_pretrained(
     pretrained_model_name_or_path=pretrained_model_name_or_path,
@@ -40,76 +38,50 @@ processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path,
 @spaces.GPU
 def model_inference(
-    input_dict, history
 ):
-    print(f"input_dict: {input_dict}")
-    print(f"history: {history}")
-    text = input_dict["text"]
-    if len(history) > 0:
-        try:
-            image = history[0]['content'][0]
-        except:
-            raise gr.Error("Please refresh the page to start over.")
-    else:
-        try:
-            image = input_dict["files"][0]
-        except:
-            raise gr.Error("Please provide an image.", duration=2)
     if len(text) == 0:
-        raise gr.Error("Please input a query.", duration=2)
-    if len(image) == 0:
-        raise gr.Error("Please provide an image.", duration=2)
-    image = load_image(image)
-    resulting_messages=[]
     if len(history) > 0:
-        for i in range(1, len(history)):
             h = history[i]
-            resulting_messages.append({
-                "role": h['role'],
-                "content": [{"type": "text", "text": h['content']}]
-            })
-    # latest
-    resulting_messages.append({
-        "role": "user",
-        "content": [{"type": "text", "text": text}]
-    })
-    resulting_messages[0]['content'].append({"type": "image"})
-    print(f"resulting_messages: {resulting_messages}")
-    print(f"image0: {image} size: {image.size}")
-    # width, height = image.size
-    # max_pixels = 512*512
-    # if height * width > max_pixels:
-    #     beta = math.sqrt((height * width) / max_pixels)
-    #     h_bar = math.floor(height / beta)
-    #     w_bar =  math.floor(width / beta)
-    #     image = image.resize((w_bar, h_bar))
-    #     print(f"resizedimage: {image} size: {image.size}")
-    # inputs = processor.apply_chat_template(
-    #     resulting_messages,
-    #     add_generation_prompt=True,
-    #     tokenize=True,
-    #     return_dict=True,
-    #     return_tensors="pt",
-    #     padding=True,
-    #     padding_side="left",
-    # )
-    prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
     inputs = processor(text=prompt, images=[image], return_tensors="pt")
     inputs = inputs.to('cuda')
@@ -135,25 +107,78 @@ def model_inference(
             yield buffer
-examples=[
-              [{"text": "Find abnormalities and support devices.", "files": ["example_images/35.jpg"]}],
-              [{"text": "Find abnormalities and support devices.", "files": ["example_images/363.jpg"]}],
-              [{"text": "Find abnormalities and support devices.", "files": ["example_images/376.jpg"]}],
-      ]
-demo = gr.ChatInterface(fn=model_inference,
-                chatbot=gr.Chatbot(type="messages", render_markdown=True, sanitize_html=False, allow_tags=True, height=640, min_height=640, max_height=640, resizable=False),
                 type="messages",
-                title="Demo",
-                description="Demo.",
-                examples=examples,
-                textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="single", lines=1, max_lines=4), stop_btn=True, multimodal=True,
-                cache_examples=False,
-                fill_height=False
-                # flagging_mode="manual",
-                )

 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
+from transformers.image_utils import load_image, valid_images
 from transformers.image_transforms import resize
 from threading import Thread
 import re
 from transformers import Qwen2_5_VLForConditionalGeneration
 pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/testmodel2")
 auth_token = os.environ.get("HF_TOKEN") or True
+DEFAULT_PROMPT = "Find abnormalities and support devices."
 model = AutoModelForImageTextToText.from_pretrained(
     pretrained_model_name_or_path=pretrained_model_name_or_path,
 @spaces.GPU
 def model_inference(
+    text, history, image=None
 ):
+    print(f"text: {text}")
+    print(f"history: {history}")
     if len(text) == 0:
+        # return 'bad request', 'Please input a query.'
+        raise gr.Error("Please input a query.", duration=3, print_exception=False)
+    if image is None:
+        raise gr.Error("Please provide an image.", duration=3, print_exception=False)
+    # image = load_image(image)
+    print(f"image0: {image} size: {image.size}")
+    messages=[]
     if len(history) > 0:
+        valid_index = None
+        for i in range(len(history)):
             h = history[i]
+            if len(h.get("content").strip()) > 0:
+                if valid_index is None and h['role'] == 'assistant':
+                    valid_index = i-1 #supposed to be 0
+                messages.append({"role": h['role'], "content": [{"type": "text", "text": h['content']}] })
+        # print(f"valid_index: {valid_index}")
+        if valid_index is None:
+            messages = []
+        if len(messages) > 0 and valid_index > 0:
+            # print(f"removing previous messages (without image) valid_index: {valid_index}")
+            messages = messages[valid_index:] #remove previous messages (without image)
+    # current prompt
+    messages.append({"role": "user","content": [{"type": "text", "text": text}]})
+    messages[0]['content'].insert(0, {"type": "image"})
+    print(f"messages: {messages}")
+    prompt = processor.apply_chat_template(messages, add_generation_prompt=True)
     inputs = processor(text=prompt, images=[image], return_tensors="pt")
     inputs = inputs.to('cuda')
             yield buffer
+# css_no_header = """
+# /* Hide the header row inside this Examples block */
+# #ex_tbl thead { display: none !important; }
+# """
+theme = gr.themes.Default(
+    primary_hue="green",
+    # text_size="lg",
+)
+with gr.Blocks(theme=theme) as demo:
+    send_btn = gr.Button("Send", variant="primary", render=False)
+    textbox = gr.Textbox(show_label=False, placeholder="Enter your text here and press ENTER", render=False, submit_btn="Send")
+    with gr.Row():
+        with gr.Column(scale=4):
+            # input_type_radio = gr.Radio(choices=["Image", "Video"], value="Image", label="Select Input Type")
+            image_input = gr.Image(type="pil", visible=True, sources="upload", show_label=False)
+            clear_btn = gr.Button("Clear", variant="secondary")
+            with gr.Column():
+                ex =gr.Examples(
+                    examples=[
+                        ["example_images/35.jpg", "Find abnormalities and support devices."],
+                        ["example_images/363.jpg", "Provide a comprehensive image analysis, and list all abnormalities."],
+                        ["example_images/376.jpg", "Examine the chest X-ray."],
+                    ],
+                    inputs=[image_input, textbox],
+                    # elem_id=css_no_header
+                )
+        with gr.Column(scale=7):
+            chat_interface = gr.ChatInterface(fn=model_inference,
                 type="messages",
+                chatbot=gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, height=800,),
+                textbox=textbox,
+                additional_inputs=image_input,
+                multimodal=False,
+                )
+        # Clear chat history when an example is selected (keep example-populated inputs intact)
+        ex.load_input_event.then(
+                lambda: ([], [], [], None),
+                None,
+                [chat_interface.chatbot, chat_interface.chatbot_state, chat_interface.chatbot_value, chat_interface.saved_input],
+                queue=False,
+                show_api=False,
+            )
+        # Clear chat history when a new image is uploaded via the image input
+        image_input.upload(
+                lambda: ([], [], [], None, DEFAULT_PROMPT),
+                None,
+                [chat_interface.chatbot, chat_interface.chatbot_state, chat_interface.chatbot_value, chat_interface.saved_input, textbox],
+                queue=False,
+                show_api=False,
+            )
+        # Clear everything on Clear button click
+        clear_btn.click(
+                lambda: ([], [], [], None, "", None),
+                None,
+                [chat_interface.chatbot, chat_interface.chatbot_state, chat_interface.chatbot_value, chat_interface.saved_input, textbox, image_input],
+                queue=False,
+                show_api=False,
+            )