Spaces:

amrn
/

misty

Paused

App Files Files Community

am commited on Sep 22, 2025

Commit

07794b9

1 Parent(s): 3431d22

misty

Browse files

Files changed (8) hide show

README.md +5 -5
app.py +37 -65
example_images/376.jpg +0 -0
example_images/4747.jpg +0 -0
example_images/6218.jpg +0 -0
example_images/6447.jpg +0 -0
example_images/87.jpg +0 -0
requirements.txt +1 -1

README.md CHANGED Viewed

@@ -1,12 +1,12 @@
 ---
-title: Misty2
-emoji: 😊
-colorFrom: blue
-colorTo: green
 sdk: gradio
 sdk_version: 5.44.1
 app_file: app.py
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Misty
+emoji: 🩻
+colorFrom: green
+colorTo: gray
 sdk: gradio
 sdk_version: 5.44.1
 app_file: app.py
 license: apache-2.0
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
-from transformers.image_utils import load_image, valid_images
-from transformers.image_transforms import resize
 from threading import Thread
 import re
 import time
@@ -9,14 +7,14 @@ import torch
 import spaces
 import math
 import os
-# from transformers import Qwen2_5_VLForConditionalGeneration
-from qwen_vl_utils import process_vision_info, fetch_image
-# pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/testmodel2")
-# pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/dsv5mx3")
-pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/gmdsv5mx3")
 auth_token = os.environ.get("HF_TOKEN") or True
 DEFAULT_PROMPT = "Find abnormalities and support devices."
@@ -24,43 +22,33 @@ DEFAULT_PROMPT = "Find abnormalities and support devices."
 model = AutoModelForImageTextToText.from_pretrained(
     pretrained_model_name_or_path=pretrained_model_name_or_path,
     dtype=torch.bfloat16,
-    # attn_implementation="flash_attention_2",
-    # trust_remote_code=True,
     token=auth_token
 ).eval().to("cuda")
-processor_config={}
-# if isinstance(model, Qwen2_5_VLForConditionalGeneration):
-#     processor_config = {"min_pixels": 28*28*2, "max_pixels": 476*476}
 processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path,
     use_fast=True,
-    **processor_config
   )
 @spaces.GPU
 def model_inference(
-    text, history, image=None
 ):
     print(f"text: {text}")
     print(f"history: {history}")
     if len(text) == 0:
-        # return 'bad request', 'Please input a query.'
         raise gr.Error("Please input a query.", duration=3, print_exception=False)
     if image is None:
         raise gr.Error("Please provide an image.", duration=3, print_exception=False)
-    # image = load_image(image)
     print(f"image0: {image} size: {image.size}")
-    image = fetch_image({"image": image, "min_pixels": 28*28*2, "max_pixels": 476*476})
-    print(f"image1: {image} size: {image.size}")
     messages=[]
@@ -70,21 +58,17 @@ def model_inference(
             h = history[i]
             if len(h.get("content").strip()) > 0:
                 if valid_index is None and h['role'] == 'assistant':
-                    valid_index = i-1 #supposed to be 0
                 messages.append({"role": h['role'], "content": [{"type": "text", "text": h['content']}] })
-        # print(f"valid_index: {valid_index}")
         if valid_index is None:
             messages = []
         if len(messages) > 0 and valid_index > 0:
-            # print(f"removing previous messages (without image) valid_index: {valid_index}")
             messages = messages[valid_index:] #remove previous messages (without image)
     # current prompt
     messages.append({"role": "user","content": [{"type": "text", "text": text}]})
     messages[0]['content'].insert(0, {"type": "image"})
     print(f"messages: {messages}")
@@ -95,8 +79,7 @@ def model_inference(
     # Generate
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
-    generation_args = dict(inputs, streamer=streamer, max_new_tokens=2048)
-    # generated_text = ""
     with torch.inference_mode():
         thread = Thread(target=model.generate, kwargs=generation_args)
@@ -108,60 +91,49 @@ def model_inference(
         for new_text in streamer:
             buffer += new_text
-            #   generated_text_without_prompt = buffer#[len(ext_buffer):]
-            #   time.sleep(0.01)
-            #   print(f"buffer: {buffer}")
             yield buffer
-# CSS = """
-# .contain { display: flex; flex-direction: column; }
-# #component-0 { height: 100%; }
-# #chatbot { flex-grow: 1; height: 600px; }  /* Set height here */
-# """
 with gr.Blocks() as demo:
     send_btn = gr.Button("Send", variant="primary", render=False)
     textbox = gr.Textbox(show_label=False, placeholder="Enter your text here and press ENTER", render=False, submit_btn="Send")
-    # chatbot = gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, height=800, container=False, show_share_button=False)
     with gr.Row():
-        with gr.Column(scale=0.5):
-            # input_type_radio = gr.Radio(choices=["Image", "Video"], value="Image", label="Select Input Type")
             image_input = gr.Image(type="pil", visible=True, sources="upload", show_label=False)
             clear_btn = gr.Button("Clear", variant="secondary")
-            # with gr.Column():
             ex =gr.Examples(
                 examples=[
-                    ["example_images/35.jpg", "Find abnormalities and support devices."],
                     ["example_images/363.jpg", "Provide a comprehensive image analysis, and list all abnormalities."],
-                    ["example_images/376.jpg", "Examine the chest X-ray."],
                 ],
                 inputs=[image_input, textbox],
             )
-        # with gr.Column(scale=7):
-        chat_interface = gr.ChatInterface(fn=model_inference,
-            # title='title', description='description',
-            type="messages",
-            chatbot=gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, height='40vw', container=False, show_share_button=False),
-            # chatbot=gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True,  scale=1, show_share_button=False),
-            textbox=textbox,
-            additional_inputs=image_input,
-            multimodal=False,
-            fill_height=False,
-            # css=CSS,
-            )
-        # chat_interface.chatbot.render_markdown=True
-        # chat_interface.chatbot.sanitize_html=False
-        # chat_interface.chatbot.allow_tags=True
-        # chat_interface.chatbot.elem_id="chatbot"
         # Clear chat history when an example is selected (keep example-populated inputs intact)
         ex.load_input_event.then(
@@ -192,6 +164,6 @@ with gr.Blocks() as demo:
 demo.launch(debug=False, server_name="0.0.0.0")

 import gradio as gr
 from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
 from threading import Thread
 import re
 import time
 import spaces
 import math
 import os
+# from qwen_vl_utils import process_vision_info, fetch_image
+# run locally: CUDA_VISIBLE_DEVICES=0 GRADIO_SERVER_PORT=7860 MODEL=./model_dir python app.py
+# and open http://localhost:7860
+# pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/gmdsv5mx3")
+# pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/gr1")
+pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/mrcxr1")
 auth_token = os.environ.get("HF_TOKEN") or True
 DEFAULT_PROMPT = "Find abnormalities and support devices."
 model = AutoModelForImageTextToText.from_pretrained(
     pretrained_model_name_or_path=pretrained_model_name_or_path,
     dtype=torch.bfloat16,
     token=auth_token
 ).eval().to("cuda")
 processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path,
     use_fast=True,
   )
 @spaces.GPU
 def model_inference(
+    text, history, image
 ):
     print(f"text: {text}")
     print(f"history: {history}")
     if len(text) == 0:
         raise gr.Error("Please input a query.", duration=3, print_exception=False)
     if image is None:
         raise gr.Error("Please provide an image.", duration=3, print_exception=False)
     print(f"image0: {image} size: {image.size}")
+    # image = fetch_image({"image": image, "min_pixels": 28*28*2, "max_pixels": 476*476})
+    # image.thumbnail((512, 512)) #resize image to 512x512  preserve aspect ratio
+    # print(f"image1: {image} size: {image.size}")
     messages=[]
             h = history[i]
             if len(h.get("content").strip()) > 0:
                 if valid_index is None and h['role'] == 'assistant':
+                    valid_index = i-1
                 messages.append({"role": h['role'], "content": [{"type": "text", "text": h['content']}] })
         if valid_index is None:
             messages = []
         if len(messages) > 0 and valid_index > 0:
             messages = messages[valid_index:] #remove previous messages (without image)
     # current prompt
     messages.append({"role": "user","content": [{"type": "text", "text": text}]})
     messages[0]['content'].insert(0, {"type": "image"})
     print(f"messages: {messages}")
     # Generate
     streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_args = dict(inputs, streamer=streamer, max_new_tokens=4096)
     with torch.inference_mode():
         thread = Thread(target=model.generate, kwargs=generation_args)
         for new_text in streamer:
             buffer += new_text
             yield buffer
 with gr.Blocks() as demo:
+    # gr.Markdown('<h1 style="text-align:center; margin: 0.2em 0;">Demo.</h1>')
     send_btn = gr.Button("Send", variant="primary", render=False)
     textbox = gr.Textbox(show_label=False, placeholder="Enter your text here and press ENTER", render=False, submit_btn="Send")
     with gr.Row():
+        with gr.Column(scale=1):
             image_input = gr.Image(type="pil", visible=True, sources="upload", show_label=False)
             clear_btn = gr.Button("Clear", variant="secondary")
             ex =gr.Examples(
                 examples=[
+                    ["example_images/35.jpg", "Examine the chest X-ray."],
                     ["example_images/363.jpg", "Provide a comprehensive image analysis, and list all abnormalities."],
+                    ["example_images/4747.jpg", "Find abnormalities and support devices."],
+                    ["example_images/87.jpg", "Find abnormalities and support devices."],
+                    ["example_images/6218.jpg", "Find abnormalities and support devices."],
+                    ["example_images/6447.jpg", "Find abnormalities and support devices."],
                 ],
                 inputs=[image_input, textbox],
             )
+        with gr.Column(scale=2):
+            chat_interface = gr.ChatInterface(fn=model_inference,
+                type="messages",
+                chatbot=gr.Chatbot(type="messages", label="AI", render_markdown=True, sanitize_html=False, allow_tags=True, height='35vw', container=False, show_share_button=False),
+                textbox=textbox,
+                additional_inputs=image_input,
+                multimodal=False,
+                fill_height=False,
+                show_api=False,
+                )
+            gr.HTML('<span style="color:lightgray">Start with a full prompt: Find abnormalities and support devices.<br>\
+                Follow up with additial questions, such as Provide differentials or Write a structured report.<br>')
         # Clear chat history when an example is selected (keep example-populated inputs intact)
         ex.load_input_event.then(
+demo.queue(max_size=10)
 demo.launch(debug=False, server_name="0.0.0.0")

example_images/376.jpg DELETED Viewed

Binary file (61 kB)

example_images/4747.jpg ADDED Viewed

example_images/6218.jpg ADDED Viewed

example_images/6447.jpg ADDED Viewed

example_images/87.jpg ADDED Viewed

requirements.txt CHANGED Viewed

@@ -4,5 +4,5 @@ transformers==4.56.0
 huggingface_hub
 gradio==5.44.1
 spaces==0.40.1
-qwen_vl_utils==0.0.11

 huggingface_hub
 gradio==5.44.1
 spaces==0.40.1
+# qwen_vl_utils==0.0.11