injected_thinking / Inference /inference_demo_data_drivemllm.json

Upload folder using huggingface_hub

7134ce7 verified about 2 months ago

3.79 kB

	[
	{
	"image": [
	"demo_image/nuscenes_CAM_FRONT_3896.webp"
	],
	"question": "Question: Is white car in front of black motorcycle according to the camera coordination?",
	"final_answer": "Yes",
	"tool_result": [
	{
	"name": "get_3d_loc_in_cam_info",
	"args": {"object_names":["black motorcycle"], "image_path":"demo_image/nuscenes_CAM_FRONT_3896.webp"},
	"prompt": "\n"
	},
	{
	"name": "get_3d_loc_in_cam_info",
	"args": {"object_names":["white car"], "image_path":"demo_image/nuscenes_CAM_FRONT_3896.webp"},
	"prompt": "\n"
	}
	],
	"system_prompts": "\n A Language Agent for Autonomous Driving\n Role: You are the brain of an autonomous vehicle (a.k.a. ego-vehicle).\n Task Description: In this task, you are required to determine whether one object is positioned in front of another based on their z-values. The z-value indicates the depth position of an object, where a smaller z-value means the object is closer to the front. If the coordinates of the objects are not be provided directly, you must infer or identify them yourself. \n"
	},
	{
	"image": [
	"demo_image/nuscenes_CAM_FRONT_3590.webp"
	],
	"question": "Question: Which is further left in this 2D image, white car or white bus?",
	"final_answer": "Yes",
	"tool_result": [
	{
	"name": "get_open_world_vocabulary_detection_info",
	"args": {"object_names":["white car"], "image_path":"demo_image/nuscenes_CAM_FRONT_3590.webp"},
	"prompt": "\n"
	},
	{
	"name": "get_open_world_vocabulary_detection_info",
	"args": {"object_names":["white bus"], "image_path":"demo_image/nuscenes_CAM_FRONT_3590.webp"},
	"prompt": "\n"
	}
	],
	"system_prompts": "\n A Language Agent for Autonomous Driving\n Role: You are the brain of an autonomous vehicle (a.k.a. ego-vehicle).\n Task Description: In this task, you are tasked with determining which of two specific objects is positioned further to the left based on their x-coordinates. Note that the smaller the x-coordinate, the closer the object is to the left. If the coordinates of the objects are not be provided directly; you must infer or identify them yourself. \n"
	},

	{
	"image": [
	"demo_image/nuscenes_CAM_FRONT_3757.webp"
	],
	"question": "Question: Is pedestrian wearing black clothes in front of red and white bus(closer proximity to the camera/front)?",
	"final_answer": "Yes",
	"tool_result": [
	{
	"name": "get_3d_loc_in_cam_info",
	"args": {"object_names":["pedestrian with black clothes"], "image_path":"demo_image/nuscenes_CAM_FRONT_3757.webp"},
	"prompt": "\n"
	},
	{
	"name": "get_3d_loc_in_cam_info",
	"args": {"object_names":["white and red bus"], "image_path":"demo_image/nuscenes_CAM_FRONT_3757.webp"},
	"prompt": "\n"
	}
	],
	"system_prompts": "\n A Language Agent for Autonomous Driving\n Role: You are the brain of an autonomous vehicle (a.k.a. ego-vehicle).\n Task Description: In this task, you are required to determine whether one object is positioned in front of another based on their z-values. The z-value indicates the depth position of an object, where a smaller z-value means the object is closer to the front. If the coordinates of the objects are not be provided directly, you must infer or identify them yourself. \n"
	}
	]