File size: 7,435 Bytes
032e687
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
import numpy as np
import random
from xtuner.utils import DEFAULT_IMAGE_TOKEN
import re

PREFIX_REASONING_STR = [
    'This conclusion is based on several observations: ',
    'Because: ',
    "This conclusion stems from several key factors: ",
    "The reasoning behind this conclusion includes: ",
    "Several observations lead to this conclusion: ",
    "The underlying reasons are: ",
    "The evidence supporting this conclusion includes: ",
    "This is justified by: ",
]

CONTOUR_QUESTIONS = [
    "Here are two images. In the second image, I have marked several "\
    "visual objects with their contours in different colors, and each "\
    "is identified by a white numeric ID against a background that "\
    "matches the contour's color. Could you please tell me which of "\
    "these marked objects is the same as the object marked with a {color} "\
    "contour in the first image?",
    "Observe the two images provided. In the second image, several objects "\
    "are outlined in various colors, each accompanied by a white numeric ID "\
    "on a matching color background. Can you identify which object corresponds "\
    "to the one outlined in {color} in the first image?",
    "You have two images in front of you. The second image contains multiple "\
    "objects, each highlighted with a distinct color contour and labeled with "\
    "a numeric ID. Please determine which object matches the one outlined in "\
    "{color} in the first image?",
    "Examine the pair of images. In the second image, objects are marked with "\
    "different colored contours, each paired with a white numeric ID on a "\
    "corresponding colored background. Which object is identical to the one "\
    "marked with a {color} contour in the first image?",
    "Here are two images for comparison. The second image features several "\
    "objects, each enclosed in a uniquely colored contour and identified by "\
    "a numeric ID. Can you select the object that matches the one outlined "\
    "in {color} in the first image?",
    "Look at the two images provided. In the second image, objects are "\
    "highlighted with various colored contours, each with a white numeric "\
    "ID on a matching background. Which of these objects is the same as the "\
    "one outlined in {color} in the first image?"
]

CHOICE_STR = " Please make a choice from the following options: \n{choices}"

def match_reasoning_preprocess(example):
    conversations = []
    conversations.append({"from": "human", "value": random.choice(CONTOUR_QUESTIONS) + CHOICE_STR})
    conversations.append({"from": 'gpt', "value": '{answer}'})
    conversations.append({"from": 'human', "value": "Why?"})
    conversations.append({"from": 'gpt', "value": random.choice(PREFIX_REASONING_STR) + example['description']})
    
    for i, conversation in enumerate(conversations):
        if i == 0:
            role = conversation['from']
            assert role == 'human'
            question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n<OBJECT_TOKENS>\n"
            question = question + conversation['value']
            conversation['value'] = question
    
    example['conversations'] = conversations
    return example

def match_reasoning_map_fn(example):
    example = match_reasoning_preprocess(example)

    return example


BBOX_QUESTIONS = [
    "Here are two images. In the second image, I have marked several "\
    "visual objects with their bounding boxes in different colors, and each "\
    "is identified by a white numeric ID against a background that "\
    "matches the bounding box color. Could you please tell me which of "\
    "these marked objects is the same as the object marked with a {color} "\
    "bounding box in the first image?",
    "Observe the two images provided. In the second image, several objects "\
    "are outlined in various colors, each accompanied by a white numeric ID "\
    "on a matching color background. Can you identify which object corresponds "\
    "to the one outlined in {color} in the first image?",
    "You have two images in front of you. The second image contains multiple "\
    "objects, each highlighted with a distinct color bounding box and labeled with "\
    "a numeric ID. Please determine which object matches the one outlined in "\
    "{color} in the first image?",
    "Examine the pair of images. In the second image, objects are marked with "\
    "different colored bounding boxes, each paired with a white numeric ID on a "\
    "corresponding colored background. Which object is identical to the one "\
    "marked with a {color} bounding box in the first image?",
    "Here are two images for comparison. The second image features several "\
    "objects, each enclosed in a uniquely colored bounding box and identified by "\
    "a numeric ID. Can you select the object that matches the one outlined "\
    "in {color} in the first image?",
    "Look at the two images provided. In the second image, objects are "\
    "highlighted with various colored bounding boxes, each with a white numeric "\
    "ID on a matching background. Which of these objects is the same as the "\
    "one outlined in {color} in the first image?"
]


def match_choice_only_preprocess(example):
    conversations = []
    if example['vprompt_type'] == "mask":
        conversations.append({"from": "human", "value": random.choice(CONTOUR_QUESTIONS) + CHOICE_STR})
    elif example["vprompt_type"] == "bbox":
        conversations.append({"from": "human", "value": random.choice(BBOX_QUESTIONS) + CHOICE_STR})
    else:
        raise NotImplementedError
    conversations.append({"from": 'gpt', "value": '{answer}'})
    
    for i, conversation in enumerate(conversations):
        if i == 0:
            role = conversation['from']
            assert role == 'human'
            question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n<OBJECT_TOKENS>\n"
            question = question + conversation['value']
            conversation['value'] = question
    
    example['conversations'] = conversations
    return example

def match_choice_only_map_fn(example):
    example = match_choice_only_preprocess(example)

    return example


ROI_QUESTIONS = [
    "Here are two images. In the first image, I have specified a query object, "\
    "and in the second image, there are multiple candidate objects. Could you "\
    "identify which candidate object is the same as the query object?",
]

def match_reasoning_preprocess_roi(example):
    conversations = []
    conversations.append({"from": "human", "value": random.choice(ROI_QUESTIONS) + CHOICE_STR})
    conversations.append({"from": 'gpt', "value": '{answer}'})
    conversations.append({"from": 'human', "value": "Why?"})
    conversations.append({"from": 'gpt', "value": random.choice(PREFIX_REASONING_STR) + example['description']})
    
    for i, conversation in enumerate(conversations):
        if i == 0:
            role = conversation['from']
            assert role == 'human'
            question = f"Image-1: {DEFAULT_IMAGE_TOKEN}\nImage-2: {DEFAULT_IMAGE_TOKEN}\n<OBJECT_TOKENS>\n"
            question = question + conversation['value']
            conversation['value'] = question
    
    example['conversations'] = conversations
    return example

def match_reasoning_map_fn_roi(example):
    example = match_reasoning_preprocess_roi(example)

    return example