Spaces:

aliangdw
/

trace_visualizer

Sleeping

App Files Files Community

Anthony Liang commited on 25 days ago

Commit

130aa46

1 Parent(s): 28efb30

psuhing some code

Browse files

Files changed (1) hide show

trace_inference.py +53 -21

trace_inference.py CHANGED Viewed

@@ -327,7 +327,18 @@ IGNORE_INDEX = -100
 def preprocess_qwen_visual(
     sources,
     processor,
 ) -> Dict:
     if len(sources) != 1:
         raise ValueError(f"Expected 1 source, got {len(sources)}")
@@ -336,33 +347,39 @@ def preprocess_qwen_visual(
     messages = _build_messages(source, base_path)
     full_result = processor.apply_chat_template(
-        messages, tokenize=True, return_dict=True, return_tensors="pt"
     )
     input_ids = full_result["input_ids"]
     if isinstance(input_ids, list):
         input_ids = torch.tensor(input_ids).unsqueeze(0)
-    labels = torch.full_like(input_ids, IGNORE_INDEX)
-    input_ids_flat = input_ids[0].tolist()
-    L = len(input_ids_flat)
-    pos = 0
-    while pos < L:
-        if input_ids_flat[pos] == 77091:
-            ans_start = pos + 2
-            ans_end = ans_start
-            while ans_end < L and input_ids_flat[ans_end] != 151645:
-                ans_end += 1
-            if ans_end < L:
-                labels[0, ans_start : ans_end + 2] = input_ids[
-                    0, ans_start : ans_end + 2
-                ]
-                pos = ans_end
-        pos += 1
-    full_result["labels"] = labels
     full_result["input_ids"] = input_ids
     return full_result
 def run_inference_qwenvl(
@@ -407,6 +424,13 @@ def run_inference_qwenvl(
         "data_path": data_path,
     }
     try:
         import torch
         from trajectory_viz import extract_trajectory_from_text, visualize_trajectory_on_image
@@ -415,6 +439,8 @@ def run_inference_qwenvl(
             [inference_sample], processor, add_gen_prompt=True
         )
         print("processed_data")
         print(processed_data)
@@ -437,7 +463,7 @@ def run_inference_qwenvl(
             inputs["image_grid_thw"] = image_grid_thw
         with torch.no_grad():
-            generated_ids = model.generate(**inputs, max_new_tokens=1024)
         generated_ids_trimmed = [
             out_ids[len(in_ids) :] for in_ids, out_ids in zip(input_ids, generated_ids)
         ]
@@ -445,6 +471,9 @@ def run_inference_qwenvl(
             generated_ids_trimmed[0], skip_special_tokens=True
         )
         # Format output like the example: "Trace: [[x,y], [x,y], ...]"
         trajectories = extract_trajectory_from_text(prediction)
         trace_value = f"Trace: {trajectories}" if trajectories else f"Trace: {prediction}"
@@ -459,6 +488,9 @@ def run_inference_qwenvl(
         trace_points_text = format_trace_points(trajectories)
         overlay_path = None
         if trajectories and len(trajectories) >= 2:
             _, preprocessed_path = preprocess_image_for_trace(image_path)

 def preprocess_qwen_visual(
     sources,
     processor,
+    add_gen_prompt: bool = False,
 ) -> Dict:
+    """
+    Preprocess one sample for Qwen-VL.
+    Args:
+        sources: List of one dict with keys: image, conversations, data_path.
+        processor: Qwen-VL processor.
+        add_gen_prompt: If True, add generation prompt so the model generates the
+            assistant reply (use for inference). If False, full conversation is
+            tokenized and labels are built for training.
+    """
     if len(sources) != 1:
         raise ValueError(f"Expected 1 source, got {len(sources)}")
     messages = _build_messages(source, base_path)
     full_result = processor.apply_chat_template(
+        messages,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+        add_generation_prompt=add_gen_prompt,
     )
     input_ids = full_result["input_ids"]
     if isinstance(input_ids, list):
         input_ids = torch.tensor(input_ids).unsqueeze(0)
     full_result["input_ids"] = input_ids
+    # Labels are only needed for training; skip for generation
+    if not add_gen_prompt:
+        labels = torch.full_like(input_ids, IGNORE_INDEX)
+        input_ids_flat = input_ids[0].tolist()
+        L = len(input_ids_flat)
+        pos = 0
+        while pos < L:
+            if input_ids_flat[pos] == 77091:
+                ans_start = pos + 2
+                ans_end = ans_start
+                while ans_end < L and input_ids_flat[ans_end] != 151645:
+                    ans_end += 1
+                if ans_end < L:
+                    labels[0, ans_start : ans_end + 2] = input_ids[
+                        0, ans_start : ans_end + 2
+                    ]
+                    pos = ans_end
+            pos += 1
+        full_result["labels"] = labels
     return full_result
 def run_inference_qwenvl(
         "data_path": data_path,
     }
+    print("prompt")
+    print(prompt)
+    print("image_path")
+    print(image_rel)
+    print("data_path")
+    print(data_path)
     try:
         import torch
         from trajectory_viz import extract_trajectory_from_text, visualize_trajectory_on_image
             [inference_sample], processor, add_gen_prompt=True
         )
+        print("inference_sample")
+        print(inference_sample)
         print("processed_data")
         print(processed_data)
             inputs["image_grid_thw"] = image_grid_thw
         with torch.no_grad():
+            generated_ids = model.generate(**inputs, max_new_tokens=512)
         generated_ids_trimmed = [
             out_ids[len(in_ids) :] for in_ids, out_ids in zip(input_ids, generated_ids)
         ]
             generated_ids_trimmed[0], skip_special_tokens=True
         )
+        print("prediction")
+        print(prediction)
         # Format output like the example: "Trace: [[x,y], [x,y], ...]"
         trajectories = extract_trajectory_from_text(prediction)
         trace_value = f"Trace: {trajectories}" if trajectories else f"Trace: {prediction}"
         trace_points_text = format_trace_points(trajectories)
+        print("trace_points_text")
+        print(trace_points_text)
         overlay_path = None
         if trajectories and len(trajectories) >= 2:
             _, preprocessed_path = preprocess_image_for_trace(image_path)