Spaces:
Sleeping
Sleeping
| import logging | |
| import io | |
| import os | |
| import torch | |
| from PIL import Image | |
| from transformers import pipeline | |
| from dotenv import load_dotenv | |
| from db.postgres import db_manager | |
| # Force Python to load the .env file into the OS environment variables | |
| load_dotenv() | |
| logger = logging.getLogger("VisionEngine") | |
| class VisionEngine: | |
| """ | |
| True Forensic Vision Engine utilizing local Transformers (BLIP). | |
| Configured explicitly for Hugging Face Spaces CPU-only environment. | |
| """ | |
| def __init__(self, model_name="Salesforce/blip-image-captioning-base"): | |
| # CRITICAL: Force CPU mode (device = -1) for HF Spaces Free Tier. | |
| self.device = -1 | |
| logger.info(f"Initializing True Vision Pipeline: {model_name} on CPU") | |
| try: | |
| # FIXED: Updated task name to 'image-text-to-text' to match latest Transformers version | |
| self.captioner = pipeline( | |
| "image-text-to-text", | |
| model=model_name, | |
| device=self.device | |
| ) | |
| logger.info("Vision pipeline weights loaded successfully.") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize Vision pipeline: {e}") | |
| self.captioner = None | |
| def _optimize_image(self, binary_data: bytes) -> Image.Image: | |
| """ | |
| Converts raw database binary data into an optimized PIL Image. | |
| Instead of re-encoding to bytes, we pass the PIL Image directly to | |
| the Transformers pipeline to save processing time on the CPU. | |
| """ | |
| try: | |
| img = Image.open(io.BytesIO(binary_data)) | |
| # Normalize image mode to RGB | |
| if img.mode in ("RGBA", "P"): | |
| img = img.convert("RGB") | |
| # Constrain dimensions to max 800px for speed | |
| max_size = (800, 800) | |
| img.thumbnail(max_size, Image.Resampling.LANCZOS) | |
| return img | |
| except Exception as e: | |
| logger.warning(f"Image optimization bypassed due to error: {e}") | |
| # Return raw image if thumbnailing fails | |
| return Image.open(io.BytesIO(binary_data)).convert("RGB") | |
| def analyze_image(self, case_id: str, file_name: str) -> str: | |
| """ | |
| Orchestrates visual evidence analysis via local BLIP model. | |
| Retrieves binary from DB, optimizes it, and runs native inference. | |
| """ | |
| if not self.captioner: | |
| return "Vision capabilities are offline: Model failed to load." | |
| # Retrieve the raw media from the PostgreSQL persistence layer | |
| image_binary = db_manager.get_image_binary(case_id, file_name) | |
| if not image_binary: | |
| return f"Evidence item '{file_name}' could not be located in the repository." | |
| try: | |
| logger.info(f"Executing native forensic vision analysis for: {file_name}") | |
| # Optimize image directly into a PIL object | |
| pil_image = self._optimize_image(image_binary) | |
| # Query local HF Transformers Pipeline | |
| result = self.captioner(pil_image) | |
| if result and isinstance(result, list) and len(result) > 0: | |
| analysis_text = result[0].get("generated_text", "No description generated.") | |
| return ( | |
| f"--- Forensic Visual Analysis of {file_name} ---\n\n" | |
| f"OBSERVATION: {analysis_text.capitalize()}.\n\n" | |
| "Note: This analysis was generated natively on the Forensic Server." | |
| ) | |
| return f"Vision Engine was unable to generate a meaningful report for {file_name}." | |
| except Exception as e: | |
| logger.error(f"Native vision inference failed for {file_name}: {e}") | |
| return f"Error during visual evidence processing: {str(e)}" | |
| # Singleton engine instance | |
| vision_engine = VisionEngine() |