| """
|
| Image Processor Utilities.
|
|
|
| This module handles image processing for structure diagrams and scanned documents.
|
|
|
| Design Note:
|
| Image processing here is limited to:
|
| - Basic image validation
|
| - Path management
|
| - Potential OCR preparation (TODO)
|
|
|
| Actual chemical structure recognition would require external services
|
| (e.g., OSRA, ChemDraw API) and is out of scope for this implementation.
|
| """
|
|
|
| from typing import Optional, Tuple
|
| from pathlib import Path
|
| import base64
|
|
|
|
|
| class ImageProcessor:
|
| """
|
| Image processor for structure diagrams and scanned documents.
|
|
|
| Primary responsibilities:
|
| - Validate image files
|
| - Prepare images for LLM vision APIs (if supported)
|
| - Encode images for embedding
|
| """
|
|
|
| SUPPORTED_FORMATS = {'.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'}
|
|
|
| def __init__(self):
|
| """Initialize the image processor."""
|
| self._pil_available = self._check_pil()
|
|
|
| def _check_pil(self) -> bool:
|
| """Check if PIL/Pillow is available."""
|
| try:
|
| from PIL import Image
|
| return True
|
| except ImportError:
|
| return False
|
|
|
| def validate_image(self, file_path: str) -> bool:
|
| """
|
| Validate that a file is a readable image.
|
|
|
| Args:
|
| file_path: Path to the image file
|
|
|
| Returns:
|
| True if valid image, False otherwise
|
| """
|
| path = Path(file_path)
|
|
|
|
|
| if path.suffix.lower() not in self.SUPPORTED_FORMATS:
|
| return False
|
|
|
|
|
| if not path.exists():
|
| return False
|
|
|
|
|
| if self._pil_available:
|
| try:
|
| from PIL import Image
|
| with Image.open(file_path) as img:
|
| img.verify()
|
| return True
|
| except Exception:
|
| return False
|
|
|
| return True
|
|
|
| def get_image_info(self, file_path: str) -> Optional[dict]:
|
| """
|
| Get basic information about an image.
|
|
|
| Args:
|
| file_path: Path to the image file
|
|
|
| Returns:
|
| Dictionary with image info, or None if failed
|
| """
|
| if not self._pil_available:
|
| return {"path": file_path, "status": "PIL not available"}
|
|
|
| try:
|
| from PIL import Image
|
|
|
| with Image.open(file_path) as img:
|
| return {
|
| "path": file_path,
|
| "format": img.format,
|
| "mode": img.mode,
|
| "size": img.size,
|
| "width": img.size[0],
|
| "height": img.size[1],
|
| }
|
| except Exception as e:
|
| return {"path": file_path, "error": str(e)}
|
|
|
| def encode_base64(self, file_path: str) -> Optional[str]:
|
| """
|
| Encode an image as base64 string.
|
|
|
| Useful for embedding in HTML or sending to vision APIs.
|
|
|
| Args:
|
| file_path: Path to the image file
|
|
|
| Returns:
|
| Base64 encoded string, or None if failed
|
| """
|
| try:
|
| with open(file_path, "rb") as f:
|
| image_data = f.read()
|
| return base64.b64encode(image_data).decode('utf-8')
|
| except Exception as e:
|
| print(f"Error encoding image: {e}")
|
| return None
|
|
|
| def get_data_uri(self, file_path: str) -> Optional[str]:
|
| """
|
| Get a data URI for embedding an image directly in HTML.
|
|
|
| Args:
|
| file_path: Path to the image file
|
|
|
| Returns:
|
| Data URI string, or None if failed
|
| """
|
| path = Path(file_path)
|
| suffix = path.suffix.lower()
|
|
|
|
|
| mime_types = {
|
| '.png': 'image/png',
|
| '.jpg': 'image/jpeg',
|
| '.jpeg': 'image/jpeg',
|
| '.gif': 'image/gif',
|
| '.webp': 'image/webp',
|
| '.bmp': 'image/bmp',
|
| }
|
|
|
| mime_type = mime_types.get(suffix, 'image/png')
|
| base64_data = self.encode_base64(file_path)
|
|
|
| if base64_data:
|
| return f"data:{mime_type};base64,{base64_data}"
|
| return None
|
|
|
| def resize_for_report(
|
| self,
|
| file_path: str,
|
| max_width: int = 400,
|
| max_height: int = 300
|
| ) -> Optional[str]:
|
| """
|
| Resize an image for report embedding.
|
|
|
| Creates a temporary resized copy suitable for report generation.
|
|
|
| Args:
|
| file_path: Path to the original image
|
| max_width: Maximum width in pixels
|
| max_height: Maximum height in pixels
|
|
|
| Returns:
|
| Path to resized image, or original path if resizing fails
|
| """
|
| if not self._pil_available:
|
| return file_path
|
|
|
| try:
|
| from PIL import Image
|
| import tempfile
|
|
|
| with Image.open(file_path) as img:
|
|
|
| img.thumbnail((max_width, max_height), Image.Resampling.LANCZOS)
|
|
|
|
|
| suffix = Path(file_path).suffix
|
| with tempfile.NamedTemporaryFile(
|
| suffix=suffix,
|
| delete=False
|
| ) as tmp:
|
| img.save(tmp.name)
|
| return tmp.name
|
|
|
| except Exception as e:
|
| print(f"Error resizing image: {e}")
|
| return file_path
|
|
|
| def prepare_for_llm(self, file_path: str) -> Optional[dict]:
|
| """
|
| Prepare an image for LLM vision API submission.
|
|
|
| Returns a dictionary suitable for vision model APIs.
|
|
|
| Args:
|
| file_path: Path to the image file
|
|
|
| Returns:
|
| Dictionary with image data for API submission
|
| """
|
| if not self.validate_image(file_path):
|
| return None
|
|
|
| base64_data = self.encode_base64(file_path)
|
| if not base64_data:
|
| return None
|
|
|
| path = Path(file_path)
|
| mime_types = {
|
| '.png': 'image/png',
|
| '.jpg': 'image/jpeg',
|
| '.jpeg': 'image/jpeg',
|
| '.gif': 'image/gif',
|
| '.webp': 'image/webp',
|
| }
|
| mime_type = mime_types.get(path.suffix.lower(), 'image/png')
|
|
|
| return {
|
| "type": "image",
|
| "source": {
|
| "type": "base64",
|
| "media_type": mime_type,
|
| "data": base64_data,
|
| }
|
| }
|
|
|