osunlp
/

GUI-Drag-7B

Image-Text-to-Text

vision-language

text-generation-inference

Model card Files Files and versions

lzy337 commited on Oct 10, 2025

Commit

deddc60

·

verified ·

1 Parent(s): dfa7aa6

Update README.md

Files changed (1) hide show

README.md +6 -6

README.md CHANGED Viewed

@@ -10,7 +10,7 @@ Below is the code of a quick demo:
 ```
 # pip install "transformers>=4.42" pillow openai
-# 并启动你的 vLLM 服务，例如：
 # vllm serve osunlp/GUI-Drag-7B --tensor-parallel-size 1 --dtype bfloat16 --port 8000
 import base64
@@ -23,7 +23,7 @@ from PIL import Image
 from transformers.models.qwen2_vl.image_processing_qwen2_vl_fast import smart_resize as qwen_smart_resize
 MODEL_NAME = "GUI-Drag-7B"
-BASE_URL = "http://localhost:8000/v1"   # 替换成你的 vLLM 服务端口
 FN_CALL_TEMPLATE = """You are a helpful assistant.
 # Tools
 You may call one or more functions to assist with the user query.
@@ -61,17 +61,17 @@ def process_simple_drag_response(parsed_responses):
 def resize_back(coords, original_size, resized_size):
     ox, oy = original_size
     rx, ry = resized_size
-    return round(coords[0] * ox / rx), round(coords[1] * oy / ry)
 def demo():
     image_path = Path("demo_image.png")
-    instruction = "Drag to select the highlighted paragraph."
     image = Image.open(image_path)
     resized_h, resized_w = qwen_smart_resize(
         image.height, image.width,
-        max_pixels=2_116_800,
-        min_pixels=12_544,
     )
     messages = [

 ```
 # pip install "transformers>=4.42" pillow openai
+# start vllm server like:
 # vllm serve osunlp/GUI-Drag-7B --tensor-parallel-size 1 --dtype bfloat16 --port 8000
 import base64
 from transformers.models.qwen2_vl.image_processing_qwen2_vl_fast import smart_resize as qwen_smart_resize
 MODEL_NAME = "GUI-Drag-7B"
+BASE_URL = "http://localhost:8000/v1" # replace it with your own port
 FN_CALL_TEMPLATE = """You are a helpful assistant.
 # Tools
 You may call one or more functions to assist with the user query.
 def resize_back(coords, original_size, resized_size):
     ox, oy = original_size
     rx, ry = resized_size
+    return round(coords[0] * rx / ox), round(coords[1] * ry / oy)
 def demo():
     image_path = Path("demo_image.png")
+    instruction = "Drag to select the first sentence of the first paragraph."
     image = Image.open(image_path)
     resized_h, resized_w = qwen_smart_resize(
         image.height, image.width,
+        max_pixels=2116800,
+        min_pixels=12544,
     )
     messages = [