File size: 9,451 Bytes
f27a827
 
 
 
 
23ce4a9
6e7284c
 
a780b40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f27a827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05de806
f27a827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a780b40
f27a827
 
a780b40
f27a827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a780b40
 
 
 
 
 
 
f27a827
 
 
 
a780b40
 
 
 
 
 
 
 
 
 
f27a827
a780b40
 
23ce4a9
 
f27a827
 
 
 
 
a780b40
f27a827
 
 
 
 
 
 
 
 
a780b40
 
4259577
 
a780b40
 
f27a827
 
6e7284c
f27a827
a780b40
 
f27a827
a780b40
 
f27a827
a780b40
f27a827
 
4259577
f27a827
 
a780b40
f27a827
 
 
 
a780b40
f27a827
 
 
 
 
 
 
 
 
 
 
 
a780b40
 
 
f27a827
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import os, time, threading
import gradio as gr

from display_model import *

# default_llm = 'gpt-3.5-turbo-0125'
# default_llm = 'gpt-4-0613'
default_llm = 'gpt-4o-2024-05-13'
# scan_id = "scene0132_00"
scan_id_mapper={
    'Living Room':'scene0024_00', 
    'Bedroom':'scene0051_00',  #144
    'Kitchen':'scene0335_01',  #164 197
    'Bathroom':'scene0014_00',  #14 26
    'Office':'scene0114_00'
    # gym 428
}
defualt_scene_type = 'Living Room'
scan_id = scan_id_mapper[defualt_scene_type]

def get_path(scan_id):
    ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.ply")
    glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.glb")
    new_ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.ply")
    new_glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.glb")
    objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy")
    return {"ply_file":ply_file, "glb_file":glb_file, "new_ply_file":new_ply_file, "new_glb_file":new_glb_file, "objects_info_file":objects_info_file}

def insert_user_none_between_assistant(messages):
    # 初始化结果列表
    result = []
    # 初始状态设置为"user",以确保列表第一个条目为"assistant"时能正确插入
    last_role = "user"

    for msg in messages:
        # 检查当前信息的角色
        current_role = msg["role"]

        # 如果上一个和当前信息均为"assistant",插入content为None的"user"信息
        if last_role == "assistant" and current_role == "assistant":
            result.append({"role": "user", "content": None})

        # 将当前信息添加到结果列表
        result.append(msg)

        # 更新上一条信息的角色
        last_role = current_role

    return result

def timer_check_update(code_interpreter, update_interval, stop_event):
    """
    定时检查 code_interpreter.has_update 是否为True,
    如果为True,则触发界面更新逻辑并重置状态。
    参数:
    - code_interpreter: CodeInterpreter的实例,预期包含has_update属性。
    - update_interval: 定时器检查间隔,以秒为单位。
    - stop_event: 一个threading.Event()实例,用于停止定时器线程。
    """
    while not stop_event.is_set():
        if code_interpreter.has_update:
            # 实现更新界面显示的逻辑
            print("Detected update, trigger UI refreshing...")
            # 在这里添加更新界面显示的代码
            # ...
            # 重置has_update状态
            code_interpreter.has_update = False

        # 等待下次检查
        time.sleep(update_interval)

def process_instruction_callback(inp_api_key, instruction, llm_name):

    if not inp_api_key:
        print("Please input OpenAI API Key.")
        return
    else:
        os.environ["OPENAI_API_KEY"] = inp_api_key
        from transcrib3d_main import gen_prompt, get_gpt_response, get_openai_config
        from code_interpreter import CodeInterpreter

    print("llm_name:",llm_name)
    # generate prompt from user instruction
    # scan_id = "scene0132_00"
    prompt = gen_prompt(instruction, scan_id)

    # get oepnai config
    openai_config = get_openai_config(llm_name)
    openai_config['api_key'] = inp_api_key
    
    # get LLM response
    
    code_interpreter = CodeInterpreter(**openai_config)
    get_gpt_response(prompt, code_interpreter)
    messages = code_interpreter.pretext

    # draw the answer bounding box to the scene
    generate_answer_glb(messages[-1]['content'])
    # model3d.update(value=new_glb_file)

    # form gradio chat history
    messages = insert_user_none_between_assistant(messages[1:])
    gradio_messages = []
    for idx in range(int(len(messages)/2)):
        gradio_message = [messages[idx*2]['content'], messages[idx*2+1]['content']]
        gradio_messages.append(gradio_message)

    # return gradio_messages
    return get_path(scan_id)['new_glb_file'], gradio_messages

def generate_answer_glb(answer_content):
    ply_file, glb_file, new_ply_file, new_glb_file, objects_info_file = get_path(scan_id).values()
    from transcrib3d_main import extract_answer_id_from_last_line
    last_line = answer_content.splitlines()[-1] if len(answer_content) > 0 else ''
    answer_id, _ = extract_answer_id_from_last_line(last_line)
    print("extracted answer id:", answer_id)

    # get the bounding box of the answer object
    
    box = np.load(objects_info_file, allow_pickle=True)[answer_id]['extension']
    print("box extension:",box)

    # add the box to ply
    add_1box_to_ply(box, ply_file, new_ply_file)
    ply_to_glb(new_ply_file, new_glb_file)

def llm_dropdown_callback(llm_name):
    print("type in callback:",type(llm_name))
    llm_name = str(llm_name)
    print("llm_name in callback:",llm_name)
    return llm_name

def scene_type_dropdown_callback(scene_type):
    # update scan_id as a global vairable
    global scan_id
    scan_id = scan_id_mapper[scene_type]
    print("scan_id inside scene_type_dropdown_callback:",scan_id)
    return get_path(scan_id)['glb_file']

with gr.Blocks() as demo:
    gr.Markdown("## Transcrib3D-Demo")
    with gr.Row():
        with gr.Column():
            # Left-1: Scene Type Selection
            scene_type_dropdown = gr.Dropdown(
                choices=['Living Room', 'Bedroom', 'Kitchen', 'Bathroom', 'Office'],
                label='Scene Type',
                type='value',
                value=defualt_scene_type
            )
            scan_id_text = gr.Text(visible=False)
            
            # Left-2: 3D Scene
            model3d = gr.Model3D(
                value=f"scenes/{scan_id}_vh_clean_2_aligned.glb",
                # label=f"ScanNet {scan_id}", #how to update this label????
                # label="3D Scene from ScanNet Dataset",
                label="Note: It takes ~10 seconds to load the 3D scene online. Please be patient.",
                camera_position=(90,120,8),
                zoom_speed=0.25,
                # height=635,
                height=725
            )
            # Left-3: Tips
            # gr.Markdown("🖱️:arrow_up::arrow_down:: SCROLL to zoom in/out.\t🖱️🔁 DRAG to rotate.\tCTRL+🖱️🔁 Press CTRL and DRAG to pan.")
            html_content = """
                <div style='text-align: center;'>
                    🖱️🔼🔽: SCROLL to zoom in/out.&nbsp;&nbsp;&nbsp;🖱️🔁: DRAG to rotate.&nbsp;&nbsp;&nbsp;[CTRL]+🖱️🔁: Press CTRL and DRAG to pan.
                </div>
                """
            gr.HTML(value=html_content)

        with gr.Column():
            # Right-1: Openai Api Key Input
            inp_api_key = gr.Textbox(
                label='OpenAI API Key (This will not stored anywhere and you could revoke it after use. Generate new key at [https://platform.openai.com/api-keys].)',
                placeholder='sk-xxxxxxxxx',
                lines=1)
            # Right-2: LLM Selection
            llm_dropdown = gr.Dropdown(
                # choices=['gpt-4-turbo','gpt-4','gpt-3.5-turbo'],
                choices=['gpt-4o-2024-05-13','gpt-4-0613', 'gpt-4-0125-preview', 'gpt-4-1106-preview', 'gpt-3.5-turbo-0125'],
                label="LLM Selection",
                type='value',
                value=default_llm
            )
            llm_name_text = gr.Text(value=default_llm, visible=False)
            

            # Right-3: User Instruction Input
            user_instruction_textbox = gr.Textbox(
                label="Instruction",
                placeholder="Describe an object in the scene with its attributes and its relation with other objects, e.g. 'The plant between the armchair and the piano.'",
                # scale=4
            )
            # Right-4: Submit Button
            bt = gr.Button(
                value="Submit",
                # scale=1
            )
            # Right-5: Dialogue
            dialogue = gr.Chatbot(
                height=470
                # value = [["1","2"], [None, '3']]
            )
    
    
    # print("Type2:",type(model3d))
    # 直接在 inputs列表里写model3d,会导致实际传给callback函数的是str
    # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
    bt.click(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox,llm_name_text], outputs=[model3d,dialogue])
    user_instruction_textbox.submit(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox, llm_name_text], outputs=[model3d,dialogue])

    scene_type_dropdown.select(fn=scene_type_dropdown_callback, inputs=scene_type_dropdown, outputs=model3d)
    llm_dropdown.select(fn=llm_dropdown_callback, inputs=llm_dropdown, outputs=llm_name_text)

    # 直接用lambda函数定义一个映射
    # type(user_instruction_textbox.value)
    # user_instruction_textbox.
    # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
    # user_instruction_textbox.
    # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
    
# os.system('uname -a')  # 显示所有系统信息
# demo.launch()

    


if __name__ == "__main__":
    # demo.launch(share=True, server_name="0.0.0.0", server_port=7860)
    demo.launch(server_name="0.0.0.0", server_port=7860)