Spaces:

amrn
/

misty

Paused

App Files Files Community

am commited on Sep 2, 2025

Commit

895f657

1 Parent(s): 651be4c

1

Browse files

Files changed (5) hide show

app.py +148 -0
example_images/35.jpg +0 -0
example_images/363.jpg +0 -0
example_images/376.jpg +0 -0
requirements.txt +13 -0

app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import gradio as gr
+from transformers import AutoProcessor, AutoModelForImageTextToText, TextIteratorStreamer
+from transformers.image_utils import load_image
+from transformers.image_transforms import resize
+from threading import Thread
+import re
+import time
+import torch
+import spaces
+import math
+import os
+# pretrained_model_name_or_path="amrn/testmodel"
+pretrained_model_name_or_path=os.environ.get("MODEL", "amrn/testmodel")
+auth_token = os.environ.get("HF_TOKEN") or True
+processor = AutoProcessor.from_pretrained(pretrained_model_name_or_path,
+    use_fast=True,
+    #trust_remote_code=True
+  )
+model = AutoModelForImageTextToText.from_pretrained(
+    pretrained_model_name_or_path=pretrained_model_name_or_path,
+    torch_dtype=torch.bfloat16,
+    # attn_implementation="flash_attention_2",
+    # trust_remote_code=True,
+    token=auth_token
+).eval().to("cuda")
+@spaces.GPU
+def model_inference(
+    input_dict, history
+):
+    print(f"input_dict: {input_dict}")
+    print(f"history: {history}")
+    text = input_dict["text"]
+    if len(history) > 0:
+        try:
+            image = history[0]['content'][0]
+        except:
+            raise gr.Error("Please refresh the page to start over.")
+    else:
+        try:
+            image = input_dict["files"][0]
+        except:
+            raise gr.Error("Please provide an image.", duration=2)
+    if len(text) == 0:
+        raise gr.Error("Please input a query.", duration=2)
+    if len(image) == 0:
+        raise gr.Error("Please provide an image.", duration=2)
+    image = load_image(image)
+    resulting_messages=[]
+    if len(history) > 0:
+        for i in range(1, len(history)):
+            h = history[i]
+            resulting_messages.append({
+                "role": h['role'],
+                "content": [{"type": "text", "text": h['content']}]
+            })
+    # latest
+    resulting_messages.append({
+        "role": "user",
+        "content": [{"type": "text", "text": text}]
+    })
+    resulting_messages[0]['content'].append({"type": "image"})
+    print(f"resulting_messages: {resulting_messages}")
+    print(f"image0: {image} size: {image.size}")
+    width, height = image.size
+    max_pixels = 512*512
+    if height * width > max_pixels:
+        beta = math.sqrt((height * width) / max_pixels)
+        h_bar = math.floor(height / beta)
+        w_bar =  math.floor(width / beta)
+        image = image.resize((w_bar, h_bar))
+        print(f"resizedimage: {image} size: {image.size}")
+    prompt = processor.apply_chat_template(resulting_messages, add_generation_prompt=True)
+    inputs = processor(text=prompt, images=[image], return_tensors="pt")
+    inputs = inputs.to('cuda')
+    # Generate
+    streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
+    generation_args = dict(inputs, streamer=streamer, max_new_tokens=2048)
+    generated_text = ""
+    thread = Thread(target=model.generate, kwargs=generation_args)
+    thread.start()
+    yield "..."
+    buffer = ""
+    for new_text in streamer:
+      buffer += new_text
+    #   generated_text_without_prompt = buffer#[len(ext_buffer):]
+    #   time.sleep(0.01)
+    #   print(f"buffer: {buffer}")
+      yield buffer
+examples=[
+              [{"text": "Find abnormalities and support devices.", "files": ["example_images/35.jpg"]}],
+              [{"text": "Find abnormalities and support devices.", "files": ["example_images/363.jpg"]}],
+              [{"text": "Find abnormalities and support devices.", "files": ["example_images/376.jpg"]}],
+      ]
+demo = gr.ChatInterface(fn=model_inference,
+                chatbot=gr.Chatbot(type="messages", render_markdown=True, sanitize_html=False, allow_tags=True, height=640, min_height=640, max_height=640, resizable=False),
+                type="messages",
+                title="Demo",
+                description="Demo.",
+                examples=examples,
+                textbox=gr.MultimodalTextbox(label="Query Input", file_types=["image"], file_count="single", lines=1, max_lines=4), stop_btn=True, multimodal=True,
+                cache_examples=False,
+                fill_height=False
+                # flagging_mode="manual",
+                )
+demo.launch(debug=False, server_name="0.0.0.0")

example_images/35.jpg ADDED Viewed

example_images/363.jpg ADDED Viewed

example_images/376.jpg ADDED Viewed

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+torch
+torchvision
+transformers
+huggingface_hub
+gradio
+spaces
+# accelerate
+# flash-attn --no-build-isolation
+# numpy
+# Pillow
+# requests
+# pydantic==2.10.6