|
|
|
|
|
|
|
|
| from pathlib import Path
|
| import pickle
|
| import numpy as np
|
| import math
|
| import matplotlib.pyplot as plt
|
| from PIL import Image
|
| from skimage.draw import polygon
|
| from third_party.yoloworld_demo import get_2dbox_open_vocabulary_detector
|
| from third_party.depth_demo import get_3d_location
|
|
|
|
|
| class FuncAgent:
|
| def __init__(self, data_dict=None, json_data_dict=None) -> None:
|
| """
|
| Initialize function agent for visual processing tasks
|
|
|
| Args:
|
| data_dict: Dictionary containing scene data
|
| json_data_dict: Dictionary containing JSON metadata
|
| """
|
| self.data = data_dict
|
| self.json_data_dict = json_data_dict
|
| self.short_trajectory_description = False
|
|
|
|
|
| self.visual_func_infos = [
|
| get_open_world_vocabulary_detection_info,
|
| get_3d_loc_in_cam_info,
|
| resize_image_info,
|
| crop_image_info,
|
| ]
|
|
|
| def get_open_world_vocabulary_detection(self, object_names: list, cam_type: str):
|
| """
|
| Detect objects in an image using open vocabulary detection
|
|
|
| Args:
|
| object_names: List of objects to detect
|
| cam_type: Camera type to process
|
|
|
| Returns:
|
| Tuple of prompts and detected bounding boxes
|
| """
|
| cam_path_info_list = self.json_data_dict['image']
|
| for cam_path_info in cam_path_info_list:
|
| if cam_type == cam_path_info.split('/')[1]:
|
| cur_cam_type_index = cam_path_info_list.index(cam_path_info)
|
|
|
| choosed_image_path = cam_path_info_list[cur_cam_type_index]
|
| prompts, detected_2d_boxs = get_2dbox_open_vocabulary_detector(
|
| text=object_names,
|
| image_path=choosed_image_path
|
| )
|
|
|
| return prompts, detected_2d_boxs
|
|
|
| def get_open_world_vocabulary_detection_info(self, object_names: list, image_path: str):
|
| """
|
| Detect objects in an image using open vocabulary detection
|
|
|
| Args:
|
| object_names: List of objects to detect
|
| image_path: Path to the image file
|
|
|
| Returns:
|
| Tuple of prompts and detected bounding boxes
|
| """
|
| prompts, detected_2d_boxs = get_2dbox_open_vocabulary_detector(
|
| text=object_names,
|
| image_path=image_path
|
| )
|
| return prompts, detected_2d_boxs
|
|
|
| def get_3d_loc_in_cam_info(self, object_names: list, image_path: str):
|
| """
|
| Get 3D locations of objects in camera coordinates
|
|
|
| Args:
|
| object_names: List of objects to locate
|
| image_path: Path to the image file
|
|
|
| Returns:
|
| Tuple of prompts and 3D locations
|
| """
|
| prompts, detected_loc_3d = get_3d_location(
|
| text=object_names,
|
| image_path=image_path
|
| )
|
| return prompts, detected_loc_3d
|
|
|
| def get_ego_states(self):
|
| """Get ego vehicle state information"""
|
| return get_ego_prompts(self.data)
|
|
|
|
|
|
|
| resize_image_info = {
|
| "name": "resize_image",
|
| "description": "Resizes an image to specified dimensions with interpolation support",
|
| "parameters": {
|
| "type": "object",
|
| "properties": {
|
| "input_path": {"type": "string", "description": "Input image file path"},
|
| "output_path": {"type": "string", "description": "Output path for resized image"},
|
| "target_size": {
|
| "type": "array",
|
| "items": {"type": "integer"},
|
| "minItems": 2,
|
| "maxItems": 2,
|
| "description": "Target dimensions [width, height]"
|
| },
|
| "interpolation": {
|
| "type": "integer",
|
| "description": "Interpolation method (e.g., Image.BILINEAR for bilinear interpolation)"
|
| }
|
| },
|
| "required": ["input_path", "output_path", "target_size"]
|
| }
|
| }
|
|
|
|
|
| def resize_image(input_path, output_path, target_size, interpolation=Image.BILINEAR):
|
| """
|
| Resize an image to specified dimensions
|
|
|
| Args:
|
| input_path: Path to input image file
|
| output_path: Path to save resized image
|
| target_size: Target dimensions (width, height)
|
| interpolation: Interpolation method (default: bilinear)
|
| """
|
| with Image.open(input_path) as img:
|
| resized_img = img.resize(target_size, interpolation)
|
| resized_img.save(output_path)
|
|
|
|
|
| crop_image_info = {
|
| "name": "crop_image",
|
| "description": "Crops a rectangular region from an image",
|
| "parameters": {
|
| "type": "object",
|
| "properties": {
|
| "input_path": {"type": "string", "description": "Input image file path"},
|
| "output_path": {"type": "string", "description": "Output path for cropped image"},
|
| "box": {
|
| "type": "array",
|
| "items": {"type": "integer"},
|
| "minItems": 4,
|
| "maxItems": 4,
|
| "description": "Crop region coordinates [left, upper, right, lower]"
|
| }
|
| },
|
| "required": ["input_path", "output_path", "box"]
|
| }
|
| }
|
|
|
|
|
| def crop_image(input_path, output_path, box):
|
| """
|
| Crop a region from an image
|
|
|
| Args:
|
| input_path: Path to input image file
|
| output_path: Path to save cropped image
|
| box: Crop region coordinates (left, upper, right, lower)
|
| """
|
| with Image.open(input_path) as img:
|
| cropped_img = img.crop(box)
|
| cropped_img.save(output_path)
|
|
|
|
|
| rotate_image_info = {
|
| "name": "rotate_image",
|
| "description": "Rotates an image by specified degrees with canvas expansion support",
|
| "parameters": {
|
| "type": "object",
|
| "properties": {
|
| "input_path": {"type": "string", "description": "Input image file path"},
|
| "output_path": {"type": "string", "description": "Output path for rotated image"},
|
| "degrees": {"type": "number", "description": "Rotation angle in degrees (clockwise)"},
|
| "expand": {
|
| "type": "boolean",
|
| "description": "Whether to expand canvas to fit rotation (default: False)"
|
| },
|
| "fill_color": {
|
| "type": "array",
|
| "items": {"type": "integer"},
|
| "minItems": 3,
|
| "maxItems": 3,
|
| "description": "RGB fill color for expanded areas (default: [255,255,255])"
|
| }
|
| },
|
| "required": ["input_path", "output_path", "degrees"]
|
| }
|
| }
|
|
|
|
|
| def rotate_image(input_path, output_path, degrees, expand=False, fill_color=(255, 255, 255)):
|
| """
|
| Rotate an image by specified degrees
|
|
|
| Args:
|
| input_path: Path to input image file
|
| output_path: Path to save rotated image
|
| degrees: Rotation angle in degrees
|
| expand: Whether to expand canvas to fit rotation
|
| fill_color: Fill color for expanded areas
|
| """
|
| with Image.open(input_path) as img:
|
| rotated_img = img.rotate(degrees, expand=expand, fillcolor=fill_color)
|
| rotated_img.save(output_path)
|
|
|
|
|
| adjust_brightness_info = {
|
| "name": "adjust_brightness",
|
| "description": "Adjusts image brightness using enhancement factor",
|
| "parameters": {
|
| "type": "object",
|
| "properties": {
|
| "input_path": {"type": "string", "description": "Input image file path"},
|
| "output_path": {"type": "string", "description": "Output path for adjusted image"},
|
| "factor": {
|
| "type": "number",
|
| "description": "Brightness multiplier (1.0=original, >1.0=brighter, <1.0=darker)"
|
| }
|
| },
|
| "required": ["input_path", "output_path", "factor"]
|
| }
|
| }
|
|
|
|
|
| def adjust_brightness(input_path, output_path, factor):
|
| """
|
| Adjust image brightness
|
|
|
| Args:
|
| input_path: Path to input image file
|
| output_path: Path to save adjusted image
|
| factor: Brightness multiplier (1.0=original, >1.0=brighter, <1.0=darker)
|
| """
|
| with Image.open(input_path) as img:
|
| enhancer = ImageEnhance.Brightness(img)
|
| bright_img = enhancer.enhance(factor)
|
| bright_img.save(output_path)
|
|
|
|
|
| get_open_world_vocabulary_detection_info = {
|
| "name": "get_open_world_vocabulary_detection",
|
| "description": "Detects objects in an image using open vocabulary detection",
|
| "parameters": {
|
| "type": "object",
|
| "properties": {
|
| "text": {
|
| "type": "list",
|
| "description": "List of objects to detect",
|
| },
|
| "image_path": {
|
| "type": "str",
|
| "description": "Path to the image file"
|
| }
|
| },
|
| "required": ["text", "image_path"],
|
| },
|
| }
|
|
|
|
|
| get_3d_loc_in_cam_info = {
|
| "name": "get_3d_loc_in_cam",
|
| "description": "Calculates 3D locations of objects in camera coordinates",
|
| "parameters": {
|
| "type": "object",
|
| "properties": {
|
| "text": {
|
| "type": "list",
|
| "description": "List of objects to locate",
|
| },
|
| "image_path": {
|
| "type": "str",
|
| "description": "Path to the image file"
|
| }
|
| },
|
| "required": ["text", "image_path"],
|
| },
|
| } |