jiyatai
/

temp

Model card Files Files and versions

xet

Community

jiyatai commited on Mar 9

Commit

e456723

verified ·

1 Parent(s): fd9d77f

Delete qwen25vl.py

Browse files

Files changed (1) hide show

qwen25vl.py +0 -250

qwen25vl.py DELETED Viewed

@@ -1,250 +0,0 @@
-from datasets import load_dataset
-import json
-import random
-import io
-import ast
-from PIL import Image, ImageDraw, ImageFont
-from PIL import ImageColor
-from tqdm import tqdm
-import torch
-import os
-import torch.distributed as dist
-import xml.etree.ElementTree as ET
-additional_colors = [colorname for (colorname, colorcode) in ImageColor.colormap.items()]
-def decode_xml_points(text):
-    try:
-        root = ET.fromstring(text)
-        num_points = (len(root.attrib) - 1) // 2
-        points = []
-        for i in range(num_points):
-            x = root.attrib.get(f'x{i+1}')
-            y = root.attrib.get(f'y{i+1}')
-            points.append([x, y])
-        alt = root.attrib.get('alt')
-        phrase = root.text.strip() if root.text else None
-        return {
-            "points": points,
-            "alt": alt,
-            "phrase": phrase
-        }
-    except Exception as e:
-        print(e)
-        return None
-def plot_bounding_boxes(im, bounding_boxes, input_width, input_height):
-    """
-    Plots bounding boxes on an image with markers for each a name, using PIL, normalized coordinates, and different colors.
-    Args:
-        img_path: The path to the image file.
-        bounding_boxes: A list of bounding boxes containing the name of the object
-         and their positions in normalized [y1 x1 y2 x2] format.
-    """
-    # Load the image
-    img = im
-    width, height = img.size
-    # print(img.size)
-    # Create a drawing object
-    draw = ImageDraw.Draw(img)
-    # Define a list of colors
-    colors = [
-    'red',
-    'green',
-    'blue',
-    'yellow',
-    'orange',
-    'pink',
-    'purple',
-    'brown',
-    'gray',
-    'beige',
-    'turquoise',
-    'cyan',
-    'magenta',
-    'lime',
-    'navy',
-    'maroon',
-    'teal',
-    'olive',
-    'coral',
-    'lavender',
-    'violet',
-    'gold',
-    'silver',
-    ] + additional_colors
-    # Parsing out the markdown fencing
-    bounding_boxes = parse_json(bounding_boxes)
-    # font = ImageFont.truetype("NotoSansCJK-Regular.ttc", size=14)
-    try:
-      json_output = ast.literal_eval(bounding_boxes)
-    except Exception as e:
-      end_idx = bounding_boxes.rfind('"}') + len('"}')
-      truncated_text = bounding_boxes[:end_idx] + "]"
-      json_output = ast.literal_eval(truncated_text)
-    # Iterate over the bounding boxes
-    for i, bounding_box in enumerate(json_output):
-      # Select a color from the list
-      color = colors[i % len(colors)]
-      # Convert normalized coordinates to absolute coordinates
-      abs_y1 = int(bounding_box["bbox_2d"][1]/input_height * height)
-      abs_x1 = int(bounding_box["bbox_2d"][0]/input_width * width)
-      abs_y2 = int(bounding_box["bbox_2d"][3]/input_height * height)
-      abs_x2 = int(bounding_box["bbox_2d"][2]/input_width * width)
-      if abs_x1 > abs_x2:
-        abs_x1, abs_x2 = abs_x2, abs_x1
-      if abs_y1 > abs_y2:
-        abs_y1, abs_y2 = abs_y2, abs_y1
-      # Draw the bounding box
-      draw.rectangle(
-          ((abs_x1, abs_y1), (abs_x2, abs_y2)), outline=color, width=4
-      )
-      # # Draw the text
-      # if "label" in bounding_box:
-      #   draw.text((abs_x1 + 8, abs_y1 + 6), bounding_box["label"], fill=color, font=font)
-    # Display the image
-    # img.show()
-    # img.save('output.png')
-    return [abs_x1, abs_y1, abs_x2, abs_y2]
-# @title Parsing JSON output
-def parse_json(json_output):
-    # Parsing out the markdown fencing
-    lines = json_output.splitlines()
-    for i, line in enumerate(lines):
-        if line == "```json":
-            json_output = "\n".join(lines[i+1:])  # Remove everything before "```json"
-            json_output = json_output.split("```")[0]  # Remove everything after the closing "```"
-            break  # Exit the loop once "```json" is found
-    return json_output
-import torch
-from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor
-world_size = torch.cuda.device_count()
-rank = int(os.environ.get("LOCAL_RANK", 0))
-os.environ['MASTER_ADDR'] = 'localhost'
-os.environ['MASTER_PORT'] = '12355'
-# 初始化进程组
-dist.init_process_group(
-    backend="nccl",  # 使用NCCL后端（适用于GPU）
-    init_method="env://",
-    rank=rank,
-    world_size=world_size
-)
-print(f"Rank {rank} initialized")
-device = torch.device(f"cuda:{rank}")
-model_path = "/lustre/fsw/portfolios/nvr/users/yataij/pretrained/Qwen2.5-VL-7B-Instruct"
-model = Qwen2_5_VLForConditionalGeneration.from_pretrained(model_path, torch_dtype=torch.bfloat16, attn_implementation="flash_attention_2",device_map={"": device},)
-processor = AutoProcessor.from_pretrained(model_path)
-def inference(image, prompt, system_prompt="You are a helpful assistant", max_new_tokens=1024):
-  # image = Image.open(img_url)
-  img_url_dummy = "/lustre/fsw/portfolios/nvr/users/yataij/data/SPAR-7M-RGBD/example.png"
-  messages = [
-    {
-      "role": "system",
-      "content": system_prompt
-    },
-    {
-      "role": "user",
-      "content": [
-        {
-          "type": "text",
-          "text": prompt
-        },
-        {
-          "image": img_url_dummy
-        }
-      ]
-    }
-  ]
-  text = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-  # print("input:\n",text)
-  inputs = processor(text=[text], images=[image], padding=True, return_tensors="pt").to(device)
-  output_ids = model.generate(**inputs, max_new_tokens=1024)
-  generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(inputs.input_ids, output_ids)]
-  output_text = processor.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=True)
-  # print("output:\n",output_text[0])
-  input_height = inputs['image_grid_thw'][0][1]*14
-  input_width = inputs['image_grid_thw'][0][2]*14
-  return output_text[0], input_height, input_width
-# prepare the model input
-dataset = load_dataset('/lustre/fs12/portfolios/nvr/projects/nvr_lpr_nvgptvision/users/yataij/data/SPAR_Bench')
-print(len(dataset['test']))
-print(dataset['test'].features.keys())
-data_list = []
-for i,example in tqdm(enumerate(dataset['test'])):
-    if example['img_type'] == 'single_view' and example['format_type'] == 'select': # select fill
-        data_list.append(example)
-print('test', len(data_list))
-visual_prompts = json.load(open('qwen3_visual_prompt_extract.json'))
-print(len(visual_prompts))
-data_list = data_list[rank::world_size]
-visual_prompts = visual_prompts[rank::world_size]
-res = []
-for i in tqdm(range(len(data_list))):
-  instance = data_list[i]
-  visual_prompt = visual_prompts[i]
-  assert instance['id'] == visual_prompt['id']
-  image = instance['image'][0]
-  width, height = image.size
-  vp_bbox = {}
-  for vp,ins in visual_prompt['visual_prompt'].items():
-    if 'point' in vp:
-      color = vp.split()[0]
-      prompt = f"Locate the {color} round point, output its bbox coordinates using JSON format."
-      response, input_height, input_width = inference(image, prompt)
-      try:
-        coord = plot_bounding_boxes(image,response,input_width,input_height)
-      except:
-        print(i, vp)
-        continue
-      coord = [coord[0]-50, coord[1]-50, coord[2]+50, coord[3]+50]
-      if coord[0] < 0: coord[0] = 0
-      if coord[1] < 0: coord[1] = 0
-      if coord[2] > width: coord[2] = width
-      if coord[3] > height: coord[3] = height
-      vp_bbox[vp] = coord
-    elif 'bbox' in vp:
-      anno = f"the {ins} in {vp}"
-      prompt = f"Locate {anno}, output its bbox coordinates using JSON format."
-      response, input_height, input_width = inference(image, prompt)
-      try:
-        coord = plot_bounding_boxes(image,response,input_width,input_height)
-      except:
-        print(i, vp)
-        continue
-      vp_bbox[vp] = coord
-  visual_prompt['visual_prompt_bbox'] = vp_bbox
-  res.append(visual_prompt)
-  with open(f'qwen25vl_sparbench_singleimg_select_bbox_rank{rank}.json', 'w') as f:
-    json.dump(res, f, indent=4)