""" ColPali Query Embedding Processor Handles query embedding generation using ColSmol-500M model. This is a standalone implementation for inference only (no PDF processing). """ import logging from typing import Optional import torch logger = logging.getLogger(__name__) # Check if colpali_engine is available try: from colpali_engine.models import ColIdefics3, ColIdefics3Processor COLPALI_AVAILABLE = True except ImportError: COLPALI_AVAILABLE = False logger.warning("colpali_engine not installed. Install with: pip install colpali-engine") class ColPaliProcessor: """ Processes queries using ColPali for visual document retrieval. This is a lightweight processor focused on query embedding generation. """ def __init__( self, model_name: str = "vidore/colSmol-500M", device: str = "cpu", torch_dtype: torch.dtype = torch.float32, batch_size: int = 4 ): """ Initialize ColPali processor. Args: model_name: HuggingFace model name for ColPali device: Device to use ("cuda", "cpu", "mps") torch_dtype: Data type for model weights batch_size: Batch size for processing """ if not COLPALI_AVAILABLE: raise ImportError( "colpali_engine not installed. Install with: " "pip install colpali-engine" ) # Validate model name (must include organization prefix) if '/' not in model_name: logger.warning(f"⚠️ Model name '{model_name}' missing organization prefix, adding 'vidore/'") model_name = f"vidore/{model_name}" self.model_name = model_name self.device = device self.torch_dtype = torch_dtype self.batch_size = batch_size logger.info(f"🤖 Loading ColPali model: {model_name}") logger.info(f" Device: {device}, dtype: {torch_dtype}") # Load model and processor try: # Determine attention implementation attn_implementation = "eager" # Default for compatibility if device != "cpu": try: import flash_attn attn_implementation = "flash_attention_2" logger.info(" Using FlashAttention2 for faster inference") except ImportError: logger.info(" FlashAttention2 not available, using eager attention") self.model = ColIdefics3.from_pretrained( model_name, dtype=torch_dtype, device_map=device, attn_implementation=attn_implementation ).eval() self.processor = ColIdefics3Processor.from_pretrained(model_name) logger.info(f"✅ ColPali model loaded successfully") logger.info(f" Attention implementation: {attn_implementation}") except Exception as e: logger.error(f"❌ Failed to load ColPali model: {e}") raise def embed_query(self, query_text: str) -> torch.Tensor: """ Generate embedding for a text query. Args: query_text: Natural language query string Returns: Query embedding tensor of shape [num_patches, embedding_dim] """ with torch.no_grad(): # Process query using ColPali's query processing processed_query = self.processor.process_queries([query_text]).to(self.model.device) query_embedding = self.model(**processed_query) return query_embedding @property def embedding_dim(self) -> int: """Get the embedding dimension of the model.""" return self.model.config.hidden_size @property def image_token_id(self) -> int: """Get the image token ID from the processor.""" return self.processor.image_token_id