| [ | |
| { | |
| "image": [ | |
| "demo_image/nuscenes_CAM_FRONT_3896.webp" | |
| ], | |
| "question": "Question: Is white car in front of black motorcycle according to the camera coordination?", | |
| "final_answer": "Yes", | |
| "tool_result": [ | |
| { | |
| "name": "get_3d_loc_in_cam_info", | |
| "args": {"object_names":["black motorcycle"], "image_path":"demo_image/nuscenes_CAM_FRONT_3896.webp"}, | |
| "prompt": "\n" | |
| }, | |
| { | |
| "name": "get_3d_loc_in_cam_info", | |
| "args": {"object_names":["white car"], "image_path":"demo_image/nuscenes_CAM_FRONT_3896.webp"}, | |
| "prompt": "\n" | |
| } | |
| ], | |
| "system_prompts": "\n **A Language Agent for Autonomous Driving**\n Role: You are the brain of an autonomous vehicle (a.k.a. ego-vehicle).\n Task Description: In this task, you are required to determine whether one object is positioned in front of another based on their z-values. The z-value indicates the depth position of an object, where a smaller z-value means the object is closer to the front. If the coordinates of the objects are not be provided directly, you must infer or identify them yourself. \n" | |
| }, | |
| { | |
| "image": [ | |
| "demo_image/nuscenes_CAM_FRONT_3590.webp" | |
| ], | |
| "question": "Question: Which is further left in this 2D image, white car or white bus?", | |
| "final_answer": "Yes", | |
| "tool_result": [ | |
| { | |
| "name": "get_open_world_vocabulary_detection_info", | |
| "args": {"object_names":["white car"], "image_path":"demo_image/nuscenes_CAM_FRONT_3590.webp"}, | |
| "prompt": "\n" | |
| }, | |
| { | |
| "name": "get_open_world_vocabulary_detection_info", | |
| "args": {"object_names":["white bus"], "image_path":"demo_image/nuscenes_CAM_FRONT_3590.webp"}, | |
| "prompt": "\n" | |
| } | |
| ], | |
| "system_prompts": "\n **A Language Agent for Autonomous Driving**\n Role: You are the brain of an autonomous vehicle (a.k.a. ego-vehicle).\n Task Description: In this task, you are tasked with determining which of two specific objects is positioned further to the left based on their x-coordinates. Note that the smaller the x-coordinate, the closer the object is to the left. If the coordinates of the objects are not be provided directly; you must infer or identify them yourself. \n" | |
| }, | |
| { | |
| "image": [ | |
| "demo_image/nuscenes_CAM_FRONT_3757.webp" | |
| ], | |
| "question": "Question: Is pedestrian wearing black clothes in front of red and white bus(closer proximity to the camera/front)?", | |
| "final_answer": "Yes", | |
| "tool_result": [ | |
| { | |
| "name": "get_3d_loc_in_cam_info", | |
| "args": {"object_names":["pedestrian with black clothes"], "image_path":"demo_image/nuscenes_CAM_FRONT_3757.webp"}, | |
| "prompt": "\n" | |
| }, | |
| { | |
| "name": "get_3d_loc_in_cam_info", | |
| "args": {"object_names":["white and red bus"], "image_path":"demo_image/nuscenes_CAM_FRONT_3757.webp"}, | |
| "prompt": "\n" | |
| } | |
| ], | |
| "system_prompts": "\n **A Language Agent for Autonomous Driving**\n Role: You are the brain of an autonomous vehicle (a.k.a. ego-vehicle).\n Task Description: In this task, you are required to determine whether one object is positioned in front of another based on their z-values. The z-value indicates the depth position of an object, where a smaller z-value means the object is closer to the front. If the coordinates of the objects are not be provided directly, you must infer or identify them yourself. \n" | |
| } | |
| ] |