File size: 7,435 Bytes

032e687

import numpy as np
import random
from xtuner.utils import DEFAULT_IMAGE_TOKEN
import re

PREFIX_REASONING_STR = [
    'This conclusion is based on several observations: ',
    'Because: ',
    "This conclusion stems from several key factors: ",
    "The reasoning behind this conclusion includes: ",
    "Several observations lead to this conclusion: ",
    "The underlying reasons are: ",
    "The evidence supporting this conclusion includes: ",
    "This is justified by: ",
]

CONTOUR_QUESTIONS = [
    "Here are two images. In the second image, I have marked several "\
    "visual objects with their contours in different colors, and each "\
    "is identified by a white numeric ID against a background that "\
    "matches the contour's color. Could you please tell me which of "\
    "these marked objects is the same as the object marked with a {color} "\
    "contour in the first image?",
    "Observe the two images provided. In the second image, several objects "\
    "are outlined in various colors, each accompanied by a white numeric ID "\
    "on a matching color background. Can you identify which object corresponds "\
    "to the one outlined in {color} in the first image?",
    "You have two images in front of you. The second image contains multiple "\
    "objects, each highlighted with a distinct color contour and labeled with "\
    "a numeric ID. Please determine which object matches the one outlined in "\
    "{color} in the first image?",
    "Examine the pair of images. In the second image, objects are marked with "\
    "different colored contours, each paired with a white numeric ID on a "\
    "corresponding colored background. Which object is identical to the one "\
    "marked with a {color} contour in the first image?",
    "Here are two images for comparison. The second image features several "\
    "objects, each enclosed in a uniquely colored contour and identified by "\
    "a numeric ID. Can you select the object that matches the one outlined "\
    "in {color} in the first image?",
    "Look at the two images provided. In the second image, objects are "\
    "highlighted with various colored contours, each with a white numeric "\
    "ID on a matching background. Which of these objects is the same as the "\
    "one outlined in {color} in the first image?"
]

CHOICE_STR = " Please make a choice from the following options: \n{choices}"

def match_reasoning_preprocess(example):
    conversations = []
    conversations.append({"from": "human", "value": random.choice(CONTOUR_QUESTIONS) + CHOICE_STR})
    conversations.append({"from": 'gpt', "value": '{answer}'})
    conversations.append({"from": 'human', "value": "Why?"})
    conversations.append({"from": 'gpt', "value": random.choice(PREFIX_REASONING_STR) + example['description']})
    
    for i, conversation in enumerate(conversations):
        if i == 0:
            role = conversation['from']
            assert role == 'human'
            question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n<OBJECT_TOKENS>\n"
            question = question + conversation['value']
            conversation['value'] = question
    
    example['conversations'] = conversations
    return example

def match_reasoning_map_fn(example):
    example = match_reasoning_preprocess(example)

    return example


BBOX_QUESTIONS = [
    "Here are two images. In the second image, I have marked several "\
    "visual objects with their bounding boxes in different colors, and each "\
    "is identified by a white numeric ID against a background that "\
    "matches the bounding box color. Could you please tell me which of "\
    "these marked objects is the same as the object marked with a {color} "\
    "bounding box in the first image?",
    "Observe the two images provided. In the second image, several objects "\
    "are outlined in various colors, each accompanied by a white numeric ID "\
    "on a matching color background. Can you identify which object corresponds "\
    "to the one outlined in {color} in the first image?",
    "You have two images in front of you. The second image contains multiple "\
    "objects, each highlighted with a distinct color bounding box and labeled with "\
    "a numeric ID. Please determine which object matches the one outlined in "\
    "{color} in the first image?",
    "Examine the pair of images. In the second image, objects are marked with "\
    "different colored bounding boxes, each paired with a white numeric ID on a "\
    "corresponding colored background. Which object is identical to the one "\
    "marked with a {color} bounding box in the first image?",
    "Here are two images for comparison. The second image features several "\
    "objects, each enclosed in a uniquely colored bounding box and identified by "\
    "a numeric ID. Can you select the object that matches the one outlined "\
    "in {color} in the first image?",
    "Look at the two images provided. In the second image, objects are "\
    "highlighted with various colored bounding boxes, each with a white numeric "\
    "ID on a matching background. Which of these objects is the same as the "\
    "one outlined in {color} in the first image?"
]


def match_choice_only_preprocess(example):
    conversations = []
    if example['vprompt_type'] == "mask":
        conversations.append({"from": "human", "value": random.choice(CONTOUR_QUESTIONS) + CHOICE_STR})
    elif example["vprompt_type"] == "bbox":
        conversations.append({"from": "human", "value": random.choice(BBOX_QUESTIONS) + CHOICE_STR})
    else:
        raise NotImplementedError
    conversations.append({"from": 'gpt', "value": '{answer}'})
    
    for i, conversation in enumerate(conversations):
        if i == 0:
            role = conversation['from']
            assert role == 'human'
            question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n<OBJECT_TOKENS>\n"
            question = question + conversation['value']
            conversation['value'] = question
    
    example['conversations'] = conversations
    return example

def match_choice_only_map_fn(example):
    example = match_choice_only_preprocess(example)

    return example


ROI_QUESTIONS = [
    "Here are two images. In the first image, I have specified a query object, "\
    "and in the second image, there are multiple candidate objects. Could you "\
    "identify which candidate object is the same as the query object?",
]

def match_reasoning_preprocess_roi(example):
    conversations = []
    conversations.append({"from": "human", "value": random.choice(ROI_QUESTIONS) + CHOICE_STR})
    conversations.append({"from": 'gpt', "value": '{answer}'})
    conversations.append({"from": 'human', "value": "Why?"})
    conversations.append({"from": 'gpt', "value": random.choice(PREFIX_REASONING_STR) + example['description']})
    
    for i, conversation in enumerate(conversations):
        if i == 0:
            role = conversation['from']
            assert role == 'human'
            question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n<OBJECT_TOKENS>\n"
            question = question + conversation['value']
            conversation['value'] = question
    
    example['conversations'] = conversations
    return example

def match_reasoning_map_fn_roi(example):
    example = match_reasoning_preprocess_roi(example)

    return example