ServiceNow
/

GroundNext-7B-V0

Image-Text-to-Text

vision-language

text-generation-inference

Model card Files Files and versions

feiziaarash commited on Oct 14, 2025

Commit

3d94ccf

·

verified ·

1 Parent(s): 738dfa1

Update README.md

Files changed (1) hide show

README.md +24 -7

README.md CHANGED Viewed

@@ -16,8 +16,14 @@ At runtime, you must:
 3. Parse <tool_call> tags in the model’s output to extract JSON tool calls.
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
 GroundNext_GROUNDER_SYS_PROMPT = """You are a helpful assistant.
 # Tools
@@ -34,12 +40,24 @@ For each function call, return a json object with function name and arguments wi
 {{"name": <function-name>, "arguments": <args-json-object>}}
 </tool_call>"""
-model_id = "ServiceNow/GroundNext-7B-V0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto")
-image_path = "screenshot.png"
-instruction = "Locate the 'Save' icon"
 # inference
@@ -48,9 +66,8 @@ width, height = image.size
 resized_height, resized_width = smart_resize(
     height,
     width,
-    min_pixels=56 * 56,
     max_pixels=6_000_000,
-    # max_pixels=2_100_00,
 )
 image = image.resize((resized_width, resized_height))

 3. Parse <tool_call> tags in the model’s output to extract JSON tool calls.
 ```python
+import torch
+from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer
+from transformers.models.qwen2_vl.image_processing_qwen2_vl_fast import smart_resize
+from PIL import Image
+TEMP = 0.0
 GroundNext_GROUNDER_SYS_PROMPT = """You are a helpful assistant.
 # Tools
 {{"name": <function-name>, "arguments": <args-json-object>}}
 </tool_call>"""
+model_name = "ServiceNow/GroundNext-7B-V0"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
+            model_name,
+            torch_dtype=torch.bfloat16,
+            attn_implementation="flash_attention_2",
+            device_map="auto",
+            trust_remote_code=True
+        ).eval()
+processor = AutoProcessor.from_pretrained(model_name)
+model.generation_config.temperature = TEMP
+model.generation_config.do_sample = False if TEMP == 0.0 else True
+model.generation_config.use_cache = True
+image_path = "./screenshot.png"
+instruction = "Click on the 'Save' icon"
 # inference
 resized_height, resized_width = smart_resize(
     height,
     width,
+    min_pixels=78_400,
     max_pixels=6_000_000,
 )
 image = image.resize((resized_width, resized_height))