Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import os | |
| import re | |
| import spaces | |
| from leo.inference import inference | |
| MESH_DIR = 'assets/mesh' | |
| MESH_NAMES = sorted([os.path.splitext(fname)[0] for fname in os.listdir(MESH_DIR)]) | |
| STEP_COUNTS = 6 | |
| def change_scene(dropdown_scene: str): | |
| # reset 3D scene and chatbot history | |
| return os.path.join(MESH_DIR, f'{dropdown_scene}.glb') | |
| with gr.Blocks(title='LEO Demo') as demo: | |
| gr.HTML(value="<h1 align='center'>Task-oriented Sequential Grounding in 3D Scenes </h1>") | |
| with gr.Row(): | |
| with gr.Column(scale=5): | |
| dropdown_scene = gr.Dropdown( | |
| choices=MESH_NAMES, | |
| value='scene0050_00', | |
| interactive=True, | |
| label='Select a 3D scene', | |
| ) | |
| model_3d = gr.Model3D( | |
| value=os.path.join(MESH_DIR, f'scene0050_00.glb'), | |
| clear_color=[0.0, 0.0, 0.0, 0.0], | |
| label='3D Scene', | |
| camera_position=(80, 100, 6), | |
| height=659, | |
| ) | |
| gr.HTML( | |
| """<center><strong> | |
| π SCROLL and DRAG on the 3D Scene | |
| to zoom in/out and rotate. Press CTRL and DRAG to pan. | |
| </strong></center> | |
| """ | |
| ) | |
| dropdown_scene.change( | |
| fn=change_scene, | |
| inputs=[dropdown_scene], | |
| outputs=[model_3d], | |
| queue=False | |
| ) | |
| # LEO task-to-plan inference wrapper | |
| def leo_task_to_plan(task_description): | |
| task_input = { | |
| "task_description": task_description, | |
| "scan_id": "scene0050_00" | |
| } | |
| plan = inference("scene0050_00", task_input, predict_mode=True) | |
| plan = plan[0]['pred_plan_text'] | |
| # parts = re.split(r'(\d+\.)', plan)[1:] | |
| # steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)] | |
| return plan | |
| # LEO ground inference wrapper | |
| def leo_plan_to_masks(task_description, *action_steps): | |
| formatted_action_steps = [ | |
| {"action": step, "target_id": "unknown", "label": "unknown"} for step in action_steps if step != "" | |
| ] | |
| task_input = { | |
| "task_description": task_description, | |
| "action_steps": formatted_action_steps, | |
| "scan_id": "scene0050_00" | |
| } | |
| masks = inference("scene0050_00", task_input, predict_mode=False) | |
| masks = [tensor.item() for tensor in masks] | |
| return [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks] + ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks)) | |
| # LEO task-to-plan and ground inference wrapper | |
| def leo_task_to_plan_and_masks(task_description): | |
| task_input = { | |
| "task_description": task_description, | |
| "scan_id": "scene0050_00" | |
| } | |
| plan = inference("scene0050_00", task_input, predict_mode=True) | |
| plan_text = plan[0]['pred_plan_text'] | |
| parts = re.split(r'(\d+\.)', plan_text)[1:] | |
| steps = [parts[i] + parts[i + 1].rstrip() for i in range(0, len(parts), 2)] | |
| steps += ["### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###"] * (STEP_COUNTS - len(steps)) | |
| masks = plan[0]['predict_object_id'] | |
| mask_paths = [f"assets/mask/scene0050_00/scene0050_00_obj_{mask}.glb" for mask in masks] | |
| mask_paths += ["assets/mask/scene0050_00/scene0050_00_obj_empty.glb"] * (STEP_COUNTS - len(masks)) # fill with empty mask | |
| output = [] | |
| for i in range(STEP_COUNTS): | |
| output.append(steps[i]) | |
| output.append(mask_paths[i]) | |
| return output | |
| # with gr.Tab("LEO Task-to-Plan"): | |
| # gr.Interface( | |
| # fn=leo_task_to_plan, | |
| # inputs=[gr.Textbox(label="Task Description")], | |
| # outputs=["text"], | |
| # examples=[ | |
| # ["Freshen up in the bathroom."] | |
| # ], | |
| # title="LEO Task-to-Plan: Input task, Output plan text" | |
| # ) | |
| with gr.Tab("LEO Plan-to-Masks"): | |
| gr.Interface( | |
| fn=leo_plan_to_masks, | |
| inputs=[gr.Textbox(label="Task Description")] + [gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)], | |
| outputs=[gr.Model3D( | |
| clear_color=[0.0, 0.0, 0.0, 0.0], camera_position=(80, 100, 6), label=f"3D Model for Step {i+1} (if the step exists)") for i in range(STEP_COUNTS)], | |
| examples=[ | |
| [ | |
| "Start Working at the desk.", | |
| "1. Walk to the desk.", | |
| "2. Sit on the brown leather sofa chair in front of the desk.", | |
| "3. Turn on the opened laptop in front of you on the desk.", | |
| "4. Grab the cup beside the laptop to drink." | |
| ] + [""] * (STEP_COUNTS - 4) | |
| ], | |
| title="LEO Plan-to-Masks: Input plan, Output 3D Masks for each step, Red denotes predicted target object", | |
| description="Please input a task description and action steps. Examples can be found at the bottom of the interface." | |
| ) | |
| with gr.Tab("LEO Task-to-Plan and Masks"): | |
| gr.Interface( | |
| fn=leo_task_to_plan_and_masks, | |
| inputs=[gr.Textbox(label="Task Description")], | |
| outputs=[ | |
| item for sublist in zip( | |
| [gr.Textbox(label=f"Action Step {i+1}") for i in range(STEP_COUNTS)], | |
| [gr.Model3D( | |
| clear_color=[0.0, 0.0, 0.0, 0.0], | |
| camera_position=(80, 100, 6), | |
| label=f"3D Model for Step {i+1} (if the step exists)" | |
| ) for i in range(STEP_COUNTS)] | |
| ) for item in sublist | |
| ], | |
| examples=[ | |
| ["Start Working at the desk."] | |
| ], | |
| title="LEO Task-to-Plan and Masks: Input task, Output plan text and 3D Masks for each step, Red denotes predicted target object", | |
| description="Please input a task description. Examples can be found at the bottom of the interface." | |
| # js=""" | |
| # function() { | |
| # const stepCounts = """ + str(STEP_COUNTS) + """; | |
| # const stepElems = document.querySelectorAll('.output_interface .textbox_output'); | |
| # const modelElems = document.querySelectorAll('.output_interface .model3d_output'); | |
| # for (let i = 0; i < stepCounts; i++) { | |
| # if (stepElems[i].value === '### PLANNING HAS ENDED, SEE ABOVE FOR DETAILS ###' || modelElems[i].src.includes('scene0050_00_obj_empty.glb')) { | |
| # stepElems[i].style.display = 'none'; | |
| # modelElems[i].style.display = 'none'; | |
| # } | |
| # } | |
| # } | |
| # """ | |
| ) | |
| demo.queue().launch(share=True, allowed_paths=['assets']) | |