TraceIntel / analysis /vision_engine.py
Siddhant Belkhede
Deploying
2b13511
import logging
import io
import os
import torch
from PIL import Image
from transformers import pipeline
from dotenv import load_dotenv
from db.postgres import db_manager
# Force Python to load the .env file into the OS environment variables
load_dotenv()
logger = logging.getLogger("VisionEngine")
class VisionEngine:
"""
True Forensic Vision Engine utilizing local Transformers (BLIP).
Configured explicitly for Hugging Face Spaces CPU-only environment.
"""
def __init__(self, model_name="Salesforce/blip-image-captioning-base"):
# CRITICAL: Force CPU mode (device = -1) for HF Spaces Free Tier.
self.device = -1
logger.info(f"Initializing True Vision Pipeline: {model_name} on CPU")
try:
# FIXED: Updated task name to 'image-text-to-text' to match latest Transformers version
self.captioner = pipeline(
"image-text-to-text",
model=model_name,
device=self.device
)
logger.info("Vision pipeline weights loaded successfully.")
except Exception as e:
logger.error(f"Failed to initialize Vision pipeline: {e}")
self.captioner = None
def _optimize_image(self, binary_data: bytes) -> Image.Image:
"""
Converts raw database binary data into an optimized PIL Image.
Instead of re-encoding to bytes, we pass the PIL Image directly to
the Transformers pipeline to save processing time on the CPU.
"""
try:
img = Image.open(io.BytesIO(binary_data))
# Normalize image mode to RGB
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")
# Constrain dimensions to max 800px for speed
max_size = (800, 800)
img.thumbnail(max_size, Image.Resampling.LANCZOS)
return img
except Exception as e:
logger.warning(f"Image optimization bypassed due to error: {e}")
# Return raw image if thumbnailing fails
return Image.open(io.BytesIO(binary_data)).convert("RGB")
def analyze_image(self, case_id: str, file_name: str) -> str:
"""
Orchestrates visual evidence analysis via local BLIP model.
Retrieves binary from DB, optimizes it, and runs native inference.
"""
if not self.captioner:
return "Vision capabilities are offline: Model failed to load."
# Retrieve the raw media from the PostgreSQL persistence layer
image_binary = db_manager.get_image_binary(case_id, file_name)
if not image_binary:
return f"Evidence item '{file_name}' could not be located in the repository."
try:
logger.info(f"Executing native forensic vision analysis for: {file_name}")
# Optimize image directly into a PIL object
pil_image = self._optimize_image(image_binary)
# Query local HF Transformers Pipeline
result = self.captioner(pil_image)
if result and isinstance(result, list) and len(result) > 0:
analysis_text = result[0].get("generated_text", "No description generated.")
return (
f"--- Forensic Visual Analysis of {file_name} ---\n\n"
f"OBSERVATION: {analysis_text.capitalize()}.\n\n"
"Note: This analysis was generated natively on the Forensic Server."
)
return f"Vision Engine was unable to generate a meaningful report for {file_name}."
except Exception as e:
logger.error(f"Native vision inference failed for {file_name}: {e}")
return f"Error during visual evidence processing: {str(e)}"
# Singleton engine instance
vision_engine = VisionEngine()