Spaces:

Transcrib3D
/

Transcrib3D-Demo

Sleeping

File size: 9,451 Bytes

import os, time, threading
import gradio as gr

from display_model import *

# default_llm = 'gpt-3.5-turbo-0125'
# default_llm = 'gpt-4-0613'
default_llm = 'gpt-4o-2024-05-13'
# scan_id = "scene0132_00"
scan_id_mapper={
    'Living Room':'scene0024_00', 
    'Bedroom':'scene0051_00',  #144
    'Kitchen':'scene0335_01',  #164 197
    'Bathroom':'scene0014_00',  #14 26
    'Office':'scene0114_00'
    # gym 428
}
defualt_scene_type = 'Living Room'
scan_id = scan_id_mapper[defualt_scene_type]

def get_path(scan_id):
    ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.ply")
    glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned.glb")
    new_ply_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.ply")
    new_glb_file = os.path.join("scenes", f"{scan_id}_vh_clean_2_aligned_AddBox.glb")
    objects_info_file = os.path.join("objects_info", f"objects_info_{scan_id}.npy")
    return {"ply_file":ply_file, "glb_file":glb_file, "new_ply_file":new_ply_file, "new_glb_file":new_glb_file, "objects_info_file":objects_info_file}

def insert_user_none_between_assistant(messages):
    # 初始化结果列表
    result = []
    # 初始状态设置为"user"，以确保列表第一个条目为"assistant"时能正确插入
    last_role = "user"

    for msg in messages:
        # 检查当前信息的角色
        current_role = msg["role"]

        # 如果上一个和当前信息均为"assistant"，插入content为None的"user"信息
        if last_role == "assistant" and current_role == "assistant":
            result.append({"role": "user", "content": None})

        # 将当前信息添加到结果列表
        result.append(msg)

        # 更新上一条信息的角色
        last_role = current_role

    return result

def timer_check_update(code_interpreter, update_interval, stop_event):
    """
    定时检查 code_interpreter.has_update 是否为True，
    如果为True，则触发界面更新逻辑并重置状态。
    参数：
    - code_interpreter: CodeInterpreter的实例，预期包含has_update属性。
    - update_interval: 定时器检查间隔，以秒为单位。
    - stop_event: 一个threading.Event()实例，用于停止定时器线程。
    """
    while not stop_event.is_set():
        if code_interpreter.has_update:
            # 实现更新界面显示的逻辑
            print("Detected update, trigger UI refreshing...")
            # 在这里添加更新界面显示的代码
            # ...
            # 重置has_update状态
            code_interpreter.has_update = False

        # 等待下次检查
        time.sleep(update_interval)

def process_instruction_callback(inp_api_key, instruction, llm_name):

    if not inp_api_key:
        print("Please input OpenAI API Key.")
        return
    else:
        os.environ["OPENAI_API_KEY"] = inp_api_key
        from transcrib3d_main import gen_prompt, get_gpt_response, get_openai_config
        from code_interpreter import CodeInterpreter

    print("llm_name:",llm_name)
    # generate prompt from user instruction
    # scan_id = "scene0132_00"
    prompt = gen_prompt(instruction, scan_id)

    # get oepnai config
    openai_config = get_openai_config(llm_name)
    openai_config['api_key'] = inp_api_key
    
    # get LLM response
    
    code_interpreter = CodeInterpreter(**openai_config)
    get_gpt_response(prompt, code_interpreter)
    messages = code_interpreter.pretext

    # draw the answer bounding box to the scene
    generate_answer_glb(messages[-1]['content'])
    # model3d.update(value=new_glb_file)

    # form gradio chat history
    messages = insert_user_none_between_assistant(messages[1:])
    gradio_messages = []
    for idx in range(int(len(messages)/2)):
        gradio_message = [messages[idx*2]['content'], messages[idx*2+1]['content']]
        gradio_messages.append(gradio_message)

    # return gradio_messages
    return get_path(scan_id)['new_glb_file'], gradio_messages

def generate_answer_glb(answer_content):
    ply_file, glb_file, new_ply_file, new_glb_file, objects_info_file = get_path(scan_id).values()
    from transcrib3d_main import extract_answer_id_from_last_line
    last_line = answer_content.splitlines()[-1] if len(answer_content) > 0 else ''
    answer_id, _ = extract_answer_id_from_last_line(last_line)
    print("extracted answer id:", answer_id)

    # get the bounding box of the answer object
    
    box = np.load(objects_info_file, allow_pickle=True)[answer_id]['extension']
    print("box extension:",box)

    # add the box to ply
    add_1box_to_ply(box, ply_file, new_ply_file)
    ply_to_glb(new_ply_file, new_glb_file)

def llm_dropdown_callback(llm_name):
    print("type in callback:",type(llm_name))
    llm_name = str(llm_name)
    print("llm_name in callback:",llm_name)
    return llm_name

def scene_type_dropdown_callback(scene_type):
    # update scan_id as a global vairable
    global scan_id
    scan_id = scan_id_mapper[scene_type]
    print("scan_id inside scene_type_dropdown_callback:",scan_id)
    return get_path(scan_id)['glb_file']

with gr.Blocks() as demo:
    gr.Markdown("## Transcrib3D-Demo")
    with gr.Row():
        with gr.Column():
            # Left-1: Scene Type Selection
            scene_type_dropdown = gr.Dropdown(
                choices=['Living Room', 'Bedroom', 'Kitchen', 'Bathroom', 'Office'],
                label='Scene Type',
                type='value',
                value=defualt_scene_type
            )
            scan_id_text = gr.Text(visible=False)
            
            # Left-2: 3D Scene
            model3d = gr.Model3D(
                value=f"scenes/{scan_id}_vh_clean_2_aligned.glb",
                # label=f"ScanNet {scan_id}", #how to update this label????
                # label="3D Scene from ScanNet Dataset",
                label="Note: It takes ~10 seconds to load the 3D scene online. Please be patient.",
                camera_position=(90,120,8),
                zoom_speed=0.25,
                # height=635,
                height=725
            )
            # Left-3: Tips
            # gr.Markdown("🖱️:arrow_up::arrow_down:: SCROLL to zoom in/out.\t🖱️🔁 DRAG to rotate.\tCTRL+🖱️🔁 Press CTRL and DRAG to pan.")
            html_content = """
                <div style='text-align: center;'>
                    🖱️🔼🔽: SCROLL to zoom in/out.&nbsp;&nbsp;&nbsp;🖱️🔁: DRAG to rotate.&nbsp;&nbsp;&nbsp;[CTRL]+🖱️🔁: Press CTRL and DRAG to pan.
                </div>
                """
            gr.HTML(value=html_content)

        with gr.Column():
            # Right-1: Openai Api Key Input
            inp_api_key = gr.Textbox(
                label='OpenAI API Key (This will not stored anywhere and you could revoke it after use. Generate new key at [https://platform.openai.com/api-keys].)',
                placeholder='sk-xxxxxxxxx',
                lines=1)
            # Right-2: LLM Selection
            llm_dropdown = gr.Dropdown(
                # choices=['gpt-4-turbo','gpt-4','gpt-3.5-turbo'],
                choices=['gpt-4o-2024-05-13','gpt-4-0613', 'gpt-4-0125-preview', 'gpt-4-1106-preview', 'gpt-3.5-turbo-0125'],
                label="LLM Selection",
                type='value',
                value=default_llm
            )
            llm_name_text = gr.Text(value=default_llm, visible=False)
            

            # Right-3: User Instruction Input
            user_instruction_textbox = gr.Textbox(
                label="Instruction",
                placeholder="Describe an object in the scene with its attributes and its relation with other objects, e.g. 'The plant between the armchair and the piano.'",
                # scale=4
            )
            # Right-4: Submit Button
            bt = gr.Button(
                value="Submit",
                # scale=1
            )
            # Right-5: Dialogue
            dialogue = gr.Chatbot(
                height=470
                # value = [["1","2"], [None, '3']]
            )
    
    
    # print("Type2:",type(model3d))
    # 直接在 inputs列表里写model3d，会导致实际传给callback函数的是str
    # bt.click(fn=process_instruction_callback, inputs=user_instruction_textbox, outputs=dialogue)
    bt.click(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox,llm_name_text], outputs=[model3d,dialogue])
    user_instruction_textbox.submit(fn=process_instruction_callback, inputs=[inp_api_key, user_instruction_textbox, llm_name_text], outputs=[model3d,dialogue])

    scene_type_dropdown.select(fn=scene_type_dropdown_callback, inputs=scene_type_dropdown, outputs=model3d)
    llm_dropdown.select(fn=llm_dropdown_callback, inputs=llm_dropdown, outputs=llm_name_text)

    # 直接用lambda函数定义一个映射
    # type(user_instruction_textbox.value)
    # user_instruction_textbox.
    # user_instruction_textbox.submit(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
    # user_instruction_textbox.
    # bt.click(fn=lambda: process_instruction_callback(user_instruction_textbox, model3d), inputs=[], outputs=dialogue)
    
# os.system('uname -a')  # 显示所有系统信息
# demo.launch()

    


if __name__ == "__main__":
    # demo.launch(share=True, server_name="0.0.0.0", server_port=7860)
    demo.launch(server_name="0.0.0.0", server_port=7860)