Spaces:
Runtime error
Runtime error
| from PIL import Image | |
| import io | |
| import json | |
| def crop_image(image, x:float, y:float, width:float, height:float): | |
| """Crop the image based on the normalized coordinates. | |
| Return the cropped image. | |
| This has the effect of zooming in on the image crop. | |
| Args: | |
| image (PIL.Image.Image): the input image | |
| x (float): the horizontal coordinate of the upper-left corner of the box | |
| y (float): the vertical coordinate of that corner | |
| width (float): the box width | |
| height (float): the box height | |
| Returns: | |
| cropped_img (PIL.Image.Image): the cropped image | |
| Example: | |
| image = Image.open("sample_img.jpg") | |
| cropped_img = crop_image(image, 0.2, 0.3, 0.5, 0.4) | |
| display(cropped_img) | |
| """ | |
| # get height and width of image | |
| w, h = image.size | |
| # limit the range of x and y | |
| x = min(max(0, x), 1) | |
| y = min(max(0, y), 1) | |
| x2 = min(max(0, x+width), 1) | |
| y2 = min(max(0, y+height), 1) | |
| cropped_img = image.crop((x*w, y*h, x2*w, y2*h)) | |
| buffer = io.BytesIO() | |
| cropped_img.save(buffer, format="JPEG") | |
| buffer.seek(0) # Reset buffer position | |
| # Load as a JpegImageFile | |
| jpeg_image = Image.open(buffer) | |
| return jpeg_image | |
| def zoom_in_image_by_bbox(image, box, padding=0.01): | |
| """A simple wrapper function to crop the image based on the bounding box. | |
| The zoom factor cannot be too small. Minimum is 0.1 | |
| Args: | |
| image (PIL.Image.Image): the input image | |
| box (List[float]): the bounding box in the format of [x, y, w, h] | |
| padding (float, optional): The padding for the image crop, outside of the bounding box. Defaults to 0.05. | |
| Returns: | |
| cropped_img (PIL.Image.Image): the cropped image | |
| Example: | |
| image = Image.open("sample_img.jpg") | |
| annotated_img, boxes = detection(image, "bus") | |
| cropped_img = zoom_in_image_by_bbox(image, boxes[0], padding=0.1) | |
| display(cropped_img) | |
| """ | |
| assert padding >= 0.01, "The padding should be at least 0.01" | |
| x, y, w, h = box | |
| x, y, w, h = x-padding, y-padding, w+2*padding, h+2*padding | |
| return crop_image(image, x, y, w, h) | |
| def parse_inch_string(inch_str: str) -> float: | |
| """ | |
| Convert a string like '12.0 Inches' into a float (12.0). | |
| """ | |
| return float(inch_str.replace(" Inches", "").strip()) | |
| def convert_pptx_bboxes_to_image_space(bbox_dict, slide_width_in, slide_height_in): | |
| """ | |
| Convert each PPTX bounding box (in inches) to normalized image coords. | |
| bbox_dict format example: | |
| { | |
| 'TitleAndAuthor': { | |
| 'left': '12.0 Inches', 'top': '1.0 Inches', | |
| 'width': '24.0 Inches', 'height': '2.0 Inches' | |
| }, | |
| ... | |
| } | |
| Returns a dictionary with the same keys, but values as [x_norm, y_norm, w_norm, h_norm]. | |
| """ | |
| result = {} | |
| for label, box in bbox_dict.items(): | |
| left_in = parse_inch_string(box['left']) | |
| top_in = parse_inch_string(box['top']) | |
| width_in = parse_inch_string(box['width']) | |
| height_in = parse_inch_string(box['height']) | |
| x_norm = left_in / slide_width_in | |
| y_norm = top_in / slide_height_in | |
| w_norm = width_in / slide_width_in | |
| h_norm = height_in / slide_height_in | |
| result[label] = [x_norm, y_norm, w_norm, h_norm] | |
| return result | |
| def convert_pptx_bboxes_json_to_image_json(bbox_json_str, slide_width_in, slide_height_in): | |
| """ | |
| Convert bounding boxes (in inches) from a JSON string to normalized image coords [0..1]. | |
| Args: | |
| bbox_json_str (str): JSON text of the bounding box dictionary you provided. | |
| Example of the structure (in JSON): | |
| { | |
| "TitleAndAuthor": { | |
| "left": "12.0 Inches", | |
| "top": "1.0 Inches", | |
| "width": "24.0 Inches", | |
| "height": "2.0 Inches" | |
| }, | |
| "Abstract-Section Title": { ... }, | |
| ... | |
| } | |
| slide_width_in (float): The total slide width in inches. | |
| slide_height_in (float): The total slide height in inches. | |
| Returns: | |
| str: A JSON string, where each key maps to [x_norm, y_norm, w_norm, h_norm]. | |
| """ | |
| def parse_inch_string(inch_str: str) -> float: | |
| """Helper to parse '12.0 Inches' -> 12.0 (float).""" | |
| return float(inch_str.replace(" Inches", "").strip()) | |
| # 1) Parse the incoming JSON string to a Python dict | |
| if type(bbox_json_str) == str: | |
| bbox_dict = json.loads(bbox_json_str) | |
| else: | |
| bbox_dict = bbox_json_str | |
| # 2) Convert each bounding box to normalized coordinates [x, y, w, h] | |
| normalized_bboxes = {} | |
| for label, box in bbox_dict.items(): | |
| left_in = parse_inch_string(box['left']) | |
| top_in = parse_inch_string(box['top']) | |
| width_in = parse_inch_string(box['width']) | |
| height_in = parse_inch_string(box['height']) | |
| x_norm = left_in / slide_width_in | |
| y_norm = top_in / slide_height_in | |
| w_norm = width_in / slide_width_in | |
| h_norm = height_in / slide_height_in | |
| normalized_bboxes[label] = [x_norm, y_norm, w_norm, h_norm] | |
| # 3) Return as a JSON string | |
| return normalized_bboxes | |