import numpy as np import random from xtuner.utils import DEFAULT_IMAGE_TOKEN import re PREFIX_REASONING_STR = [ 'This conclusion is based on several observations: ', 'Because: ', "This conclusion stems from several key factors: ", "The reasoning behind this conclusion includes: ", "Several observations lead to this conclusion: ", "The underlying reasons are: ", "The evidence supporting this conclusion includes: ", "This is justified by: ", ] CONTOUR_QUESTIONS = [ "Here are two images. In the second image, I have marked several "\ "visual objects with their contours in different colors, and each "\ "is identified by a white numeric ID against a background that "\ "matches the contour's color. Could you please tell me which of "\ "these marked objects is the same as the object marked with a {color} "\ "contour in the first image?", "Observe the two images provided. In the second image, several objects "\ "are outlined in various colors, each accompanied by a white numeric ID "\ "on a matching color background. Can you identify which object corresponds "\ "to the one outlined in {color} in the first image?", "You have two images in front of you. The second image contains multiple "\ "objects, each highlighted with a distinct color contour and labeled with "\ "a numeric ID. Please determine which object matches the one outlined in "\ "{color} in the first image?", "Examine the pair of images. In the second image, objects are marked with "\ "different colored contours, each paired with a white numeric ID on a "\ "corresponding colored background. Which object is identical to the one "\ "marked with a {color} contour in the first image?", "Here are two images for comparison. The second image features several "\ "objects, each enclosed in a uniquely colored contour and identified by "\ "a numeric ID. Can you select the object that matches the one outlined "\ "in {color} in the first image?", "Look at the two images provided. In the second image, objects are "\ "highlighted with various colored contours, each with a white numeric "\ "ID on a matching background. Which of these objects is the same as the "\ "one outlined in {color} in the first image?" ] CHOICE_STR = " Please make a choice from the following options: \n{choices}" def match_reasoning_preprocess(example): conversations = [] conversations.append({"from": "human", "value": random.choice(CONTOUR_QUESTIONS) + CHOICE_STR}) conversations.append({"from": 'gpt', "value": '{answer}'}) conversations.append({"from": 'human', "value": "Why?"}) conversations.append({"from": 'gpt', "value": random.choice(PREFIX_REASONING_STR) + example['description']}) for i, conversation in enumerate(conversations): if i == 0: role = conversation['from'] assert role == 'human' question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n\n" question = question + conversation['value'] conversation['value'] = question example['conversations'] = conversations return example def match_reasoning_map_fn(example): example = match_reasoning_preprocess(example) return example BBOX_QUESTIONS = [ "Here are two images. In the second image, I have marked several "\ "visual objects with their bounding boxes in different colors, and each "\ "is identified by a white numeric ID against a background that "\ "matches the bounding box color. Could you please tell me which of "\ "these marked objects is the same as the object marked with a {color} "\ "bounding box in the first image?", "Observe the two images provided. In the second image, several objects "\ "are outlined in various colors, each accompanied by a white numeric ID "\ "on a matching color background. Can you identify which object corresponds "\ "to the one outlined in {color} in the first image?", "You have two images in front of you. The second image contains multiple "\ "objects, each highlighted with a distinct color bounding box and labeled with "\ "a numeric ID. Please determine which object matches the one outlined in "\ "{color} in the first image?", "Examine the pair of images. In the second image, objects are marked with "\ "different colored bounding boxes, each paired with a white numeric ID on a "\ "corresponding colored background. Which object is identical to the one "\ "marked with a {color} bounding box in the first image?", "Here are two images for comparison. The second image features several "\ "objects, each enclosed in a uniquely colored bounding box and identified by "\ "a numeric ID. Can you select the object that matches the one outlined "\ "in {color} in the first image?", "Look at the two images provided. In the second image, objects are "\ "highlighted with various colored bounding boxes, each with a white numeric "\ "ID on a matching background. Which of these objects is the same as the "\ "one outlined in {color} in the first image?" ] def match_choice_only_preprocess(example): conversations = [] if example['vprompt_type'] == "mask": conversations.append({"from": "human", "value": random.choice(CONTOUR_QUESTIONS) + CHOICE_STR}) elif example["vprompt_type"] == "bbox": conversations.append({"from": "human", "value": random.choice(BBOX_QUESTIONS) + CHOICE_STR}) else: raise NotImplementedError conversations.append({"from": 'gpt', "value": '{answer}'}) for i, conversation in enumerate(conversations): if i == 0: role = conversation['from'] assert role == 'human' question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n\n" question = question + conversation['value'] conversation['value'] = question example['conversations'] = conversations return example def match_choice_only_map_fn(example): example = match_choice_only_preprocess(example) return example ROI_QUESTIONS = [ "Here are two images. In the first image, I have specified a query object, "\ "and in the second image, there are multiple candidate objects. Could you "\ "identify which candidate object is the same as the query object?", ] def match_reasoning_preprocess_roi(example): conversations = [] conversations.append({"from": "human", "value": random.choice(ROI_QUESTIONS) + CHOICE_STR}) conversations.append({"from": 'gpt', "value": '{answer}'}) conversations.append({"from": 'human', "value": "Why?"}) conversations.append({"from": 'gpt', "value": random.choice(PREFIX_REASONING_STR) + example['description']}) for i, conversation in enumerate(conversations): if i == 0: role = conversation['from'] assert role == 'human' question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n\n" question = question + conversation['value'] conversation['value'] = question example['conversations'] = conversations return example def match_reasoning_map_fn_roi(example): example = match_reasoning_preprocess_roi(example) return example