| | """ |
| | Image Cropping Utilities |
| | |
| | Functions for extracting and managing region crops from document images. |
| | """ |
| |
|
| | import hashlib |
| | import logging |
| | from pathlib import Path |
| | from typing import Any, Dict, List, Optional, Tuple, Union |
| |
|
| | import numpy as np |
| | from PIL import Image |
| |
|
| | from ..chunks.models import BoundingBox, DocumentChunk |
| |
|
| | logger = logging.getLogger(__name__) |
| |
|
| |
|
| | def crop_region( |
| | image: Union[np.ndarray, Image.Image], |
| | bbox: BoundingBox, |
| | padding_percent: float = 0.02, |
| | ) -> np.ndarray: |
| | """ |
| | Crop a region from an image. |
| | |
| | Args: |
| | image: Source image (numpy array or PIL Image) |
| | bbox: Bounding box to crop (can be normalized or pixel) |
| | padding_percent: Padding to add around the crop (0-1) |
| | |
| | Returns: |
| | Cropped image as numpy array |
| | """ |
| | |
| | if isinstance(image, Image.Image): |
| | image = np.array(image) |
| |
|
| | height, width = image.shape[:2] |
| |
|
| | |
| | if bbox.normalized: |
| | pixel_bbox = bbox.to_pixel(width, height) |
| | else: |
| | pixel_bbox = bbox |
| |
|
| | |
| | pad_x = int(pixel_bbox.width * padding_percent) |
| | pad_y = int(pixel_bbox.height * padding_percent) |
| |
|
| | x_min = max(0, int(pixel_bbox.x_min) - pad_x) |
| | y_min = max(0, int(pixel_bbox.y_min) - pad_y) |
| | x_max = min(width, int(pixel_bbox.x_max) + pad_x) |
| | y_max = min(height, int(pixel_bbox.y_max) + pad_y) |
| |
|
| | |
| | if x_max <= x_min or y_max <= y_min: |
| | logger.warning(f"Invalid crop region: ({x_min}, {y_min}, {x_max}, {y_max})") |
| | return np.zeros((1, 1, 3), dtype=np.uint8) |
| |
|
| | return image[y_min:y_max, x_min:x_max].copy() |
| |
|
| |
|
| | def crop_chunk( |
| | image: Union[np.ndarray, Image.Image], |
| | chunk: DocumentChunk, |
| | padding_percent: float = 0.02, |
| | ) -> np.ndarray: |
| | """ |
| | Crop the region corresponding to a chunk. |
| | |
| | Args: |
| | image: Page image |
| | chunk: Document chunk with bbox |
| | padding_percent: Padding around crop |
| | |
| | Returns: |
| | Cropped image |
| | """ |
| | return crop_region(image, chunk.bbox, padding_percent) |
| |
|
| |
|
| | def crop_multiple_regions( |
| | image: Union[np.ndarray, Image.Image], |
| | bboxes: List[BoundingBox], |
| | padding_percent: float = 0.02, |
| | ) -> List[np.ndarray]: |
| | """ |
| | Crop multiple regions from an image. |
| | |
| | Args: |
| | image: Source image |
| | bboxes: List of bounding boxes |
| | padding_percent: Padding around crops |
| | |
| | Returns: |
| | List of cropped images |
| | """ |
| | return [crop_region(image, bbox, padding_percent) for bbox in bboxes] |
| |
|
| |
|
| | class CropManager: |
| | """ |
| | Manages crop extraction and storage. |
| | |
| | Provides caching and organized storage for document crops. |
| | """ |
| |
|
| | def __init__( |
| | self, |
| | output_dir: Union[str, Path], |
| | format: str = "png", |
| | quality: int = 95, |
| | ): |
| | self.output_dir = Path(output_dir) |
| | self.format = format.lower() |
| | self.quality = quality |
| | self._cache: Dict[str, str] = {} |
| |
|
| | |
| | self.output_dir.mkdir(parents=True, exist_ok=True) |
| |
|
| | def get_crop_path( |
| | self, |
| | doc_id: str, |
| | page: int, |
| | bbox: BoundingBox, |
| | ) -> Path: |
| | """Generate a path for a crop.""" |
| | |
| | bbox_str = f"{bbox.x_min:.4f}_{bbox.y_min:.4f}_{bbox.x_max:.4f}_{bbox.y_max:.4f}" |
| | bbox_hash = hashlib.md5(bbox_str.encode()).hexdigest()[:8] |
| |
|
| | filename = f"{doc_id}_p{page}_{bbox_hash}.{self.format}" |
| | return self.output_dir / doc_id / filename |
| |
|
| | def save_crop( |
| | self, |
| | image: Union[np.ndarray, Image.Image], |
| | doc_id: str, |
| | page: int, |
| | bbox: BoundingBox, |
| | padding_percent: float = 0.02, |
| | ) -> str: |
| | """ |
| | Crop and save a region. |
| | |
| | Args: |
| | image: Source page image |
| | doc_id: Document ID |
| | page: Page number |
| | bbox: Region to crop |
| | padding_percent: Padding around crop |
| | |
| | Returns: |
| | Path to saved crop |
| | """ |
| | |
| | cache_key = f"{doc_id}_{page}_{bbox.xyxy}" |
| | if cache_key in self._cache: |
| | return self._cache[cache_key] |
| |
|
| | |
| | crop = crop_region(image, bbox, padding_percent) |
| |
|
| | |
| | pil_crop = Image.fromarray(crop) |
| |
|
| | |
| | crop_path = self.get_crop_path(doc_id, page, bbox) |
| | crop_path.parent.mkdir(parents=True, exist_ok=True) |
| |
|
| | |
| | if self.format == "jpg" or self.format == "jpeg": |
| | pil_crop.save(crop_path, format="JPEG", quality=self.quality) |
| | else: |
| | pil_crop.save(crop_path, format=self.format.upper()) |
| |
|
| | |
| | path_str = str(crop_path) |
| | self._cache[cache_key] = path_str |
| |
|
| | return path_str |
| |
|
| | def save_chunk_crop( |
| | self, |
| | image: Union[np.ndarray, Image.Image], |
| | chunk: DocumentChunk, |
| | padding_percent: float = 0.02, |
| | ) -> str: |
| | """ |
| | Save crop for a document chunk. |
| | |
| | Args: |
| | image: Page image |
| | chunk: Chunk to crop |
| | padding_percent: Padding around crop |
| | |
| | Returns: |
| | Path to saved crop |
| | """ |
| | return self.save_crop( |
| | image=image, |
| | doc_id=chunk.doc_id, |
| | page=chunk.page, |
| | bbox=chunk.bbox, |
| | padding_percent=padding_percent, |
| | ) |
| |
|
| | def get_cached_crop( |
| | self, |
| | doc_id: str, |
| | page: int, |
| | bbox: BoundingBox, |
| | ) -> Optional[str]: |
| | """Get path to cached crop if it exists.""" |
| | cache_key = f"{doc_id}_{page}_{bbox.xyxy}" |
| | return self._cache.get(cache_key) |
| |
|
| | def load_crop(self, path: Union[str, Path]) -> Optional[np.ndarray]: |
| | """Load a crop from disk.""" |
| | path = Path(path) |
| | if not path.exists(): |
| | return None |
| |
|
| | try: |
| | img = Image.open(path) |
| | return np.array(img) |
| | except Exception as e: |
| | logger.warning(f"Failed to load crop {path}: {e}") |
| | return None |
| |
|
| | def clear_cache(self) -> None: |
| | """Clear the path cache.""" |
| | self._cache.clear() |
| |
|
| | def cleanup_doc(self, doc_id: str) -> int: |
| | """ |
| | Remove all crops for a document. |
| | |
| | Returns number of files removed. |
| | """ |
| | doc_dir = self.output_dir / doc_id |
| | if not doc_dir.exists(): |
| | return 0 |
| |
|
| | count = 0 |
| | for crop_file in doc_dir.glob(f"*.{self.format}"): |
| | try: |
| | crop_file.unlink() |
| | count += 1 |
| | except Exception: |
| | pass |
| |
|
| | |
| | try: |
| | doc_dir.rmdir() |
| | except OSError: |
| | pass |
| |
|
| | |
| | self._cache = { |
| | k: v for k, v in self._cache.items() |
| | if not k.startswith(f"{doc_id}_") |
| | } |
| |
|
| | return count |
| |
|
| |
|
| | def create_annotated_image( |
| | image: Union[np.ndarray, Image.Image], |
| | bboxes: List[BoundingBox], |
| | labels: Optional[List[str]] = None, |
| | colors: Optional[List[Tuple[int, int, int]]] = None, |
| | line_width: int = 2, |
| | font_size: int = 12, |
| | ) -> np.ndarray: |
| | """ |
| | Create an annotated image with bounding boxes. |
| | |
| | Args: |
| | image: Source image |
| | bboxes: Bounding boxes to draw |
| | labels: Optional labels for each box |
| | colors: Optional colors for each box (RGB tuples) |
| | line_width: Line width for boxes |
| | font_size: Font size for labels |
| | |
| | Returns: |
| | Annotated image as numpy array |
| | """ |
| | from PIL import ImageDraw, ImageFont |
| |
|
| | |
| | if isinstance(image, np.ndarray): |
| | pil_image = Image.fromarray(image).copy() |
| | else: |
| | pil_image = image.copy() |
| |
|
| | draw = ImageDraw.Draw(pil_image) |
| | width, height = pil_image.size |
| |
|
| | |
| | default_colors = [ |
| | (255, 0, 0), |
| | (0, 255, 0), |
| | (0, 0, 255), |
| | (255, 255, 0), |
| | (255, 0, 255), |
| | (0, 255, 255), |
| | (255, 128, 0), |
| | (128, 0, 255), |
| | ] |
| |
|
| | |
| | try: |
| | font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", font_size) |
| | except Exception: |
| | font = ImageFont.load_default() |
| |
|
| | for i, bbox in enumerate(bboxes): |
| | |
| | if colors and i < len(colors): |
| | color = colors[i] |
| | else: |
| | color = default_colors[i % len(default_colors)] |
| |
|
| | |
| | if bbox.normalized: |
| | x_min = int(bbox.x_min * width) |
| | y_min = int(bbox.y_min * height) |
| | x_max = int(bbox.x_max * width) |
| | y_max = int(bbox.y_max * height) |
| | else: |
| | x_min = int(bbox.x_min) |
| | y_min = int(bbox.y_min) |
| | x_max = int(bbox.x_max) |
| | y_max = int(bbox.y_max) |
| |
|
| | |
| | draw.rectangle( |
| | [(x_min, y_min), (x_max, y_max)], |
| | outline=color, |
| | width=line_width, |
| | ) |
| |
|
| | |
| | if labels and i < len(labels): |
| | label = labels[i] |
| | |
| | text_bbox = draw.textbbox((x_min, y_min - font_size - 4), label, font=font) |
| | draw.rectangle(text_bbox, fill=color) |
| | |
| | draw.text( |
| | (x_min, y_min - font_size - 4), |
| | label, |
| | fill=(255, 255, 255), |
| | font=font, |
| | ) |
| |
|
| | return np.array(pil_image) |
| |
|
| |
|
| | def highlight_region( |
| | image: Union[np.ndarray, Image.Image], |
| | bbox: BoundingBox, |
| | highlight_color: Tuple[int, int, int] = (255, 255, 0), |
| | opacity: float = 0.3, |
| | ) -> np.ndarray: |
| | """ |
| | Highlight a region in an image with semi-transparent overlay. |
| | |
| | Args: |
| | image: Source image |
| | bbox: Region to highlight |
| | highlight_color: Color for highlight (RGB) |
| | opacity: Opacity of highlight (0-1) |
| | |
| | Returns: |
| | Image with highlighted region |
| | """ |
| | |
| | if isinstance(image, Image.Image): |
| | img_array = np.array(image).copy() |
| | else: |
| | img_array = image.copy() |
| |
|
| | height, width = img_array.shape[:2] |
| |
|
| | |
| | if bbox.normalized: |
| | x_min = int(bbox.x_min * width) |
| | y_min = int(bbox.y_min * height) |
| | x_max = int(bbox.x_max * width) |
| | y_max = int(bbox.y_max * height) |
| | else: |
| | x_min = int(bbox.x_min) |
| | y_min = int(bbox.y_min) |
| | x_max = int(bbox.x_max) |
| | y_max = int(bbox.y_max) |
| |
|
| | |
| | x_min = max(0, x_min) |
| | y_min = max(0, y_min) |
| | x_max = min(width, x_max) |
| | y_max = min(height, y_max) |
| |
|
| | |
| | overlay = np.full((y_max - y_min, x_max - x_min, 3), highlight_color, dtype=np.uint8) |
| |
|
| | |
| | region = img_array[y_min:y_max, x_min:x_max] |
| | blended = (region * (1 - opacity) + overlay * opacity).astype(np.uint8) |
| | img_array[y_min:y_max, x_min:x_max] = blended |
| |
|
| | return img_array |
| |
|