amaye15
/

colqwen2-1.0-alpha-inference

Visual Document Retrieval

Safetensors

Model card Files Files and versions

xet

Community

amaye15 commited on Nov 7, 2024

Commit

8d41aec

1 Parent(s): aea7238

Docstring added

Browse files

Files changed (1) hide show

handler.py +35 -12

handler.py CHANGED Viewed

@@ -66,50 +66,74 @@ from io import BytesIO
 class EndpointHandler:
     def __init__(self, path: str = "", default_batch_size: int = 4):
-        # Import your model and processor inside the class
         from colpali_engine.models import ColQwen2, ColQwen2Processor
-        # Load the model and processor
         self.model = ColQwen2.from_pretrained(
             path,
             torch_dtype=torch.bfloat16,
         ).eval()
         self.processor = ColQwen2Processor.from_pretrained(path)
-        # Determine the device
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
-        # Set default batch size
         self.default_batch_size = default_batch_size
     def _process_batch(self, images: List[Image.Image]) -> List[List[float]]:
-        # Prepare inputs for a batch
         batch_images = self.processor.process_images(images)
         batch_images = {k: v.to(self.device) for k, v in batch_images.items()}
-        # Generate embeddings
         with torch.no_grad():
             image_embeddings = self.model(**batch_images)
-        # Convert embeddings to list format
         return image_embeddings.cpu().tolist()
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        # Extract images from the input data
         images_data = data.get("inputs", [])
         batch_size = data.get("batch_size", self.default_batch_size)
         if not images_data:
             return {"error": "No images provided in 'inputs'."}
-        # Decode and validate images
         images = []
         for img_data in images_data:
             if isinstance(img_data, str):
                 try:
-                    # Assume base64-encoded image
                     image_bytes = base64.b64decode(img_data)
                     image = Image.open(BytesIO(image_bytes)).convert("RGB")
                     images.append(image)
@@ -118,7 +142,6 @@ class EndpointHandler:
             else:
                 return {"error": "Images should be base64-encoded strings."}
-        # Process in batches with the specified or default batch size
         embeddings = []
         for i in range(0, len(images), batch_size):
             batch_images = images[i : i + batch_size]

 class EndpointHandler:
+    """
+    A handler class for processing image data, generating embeddings using a specified model and processor.
+    Attributes:
+        model: The pre-trained model used for generating embeddings.
+        processor: The pre-trained processor used to process images before model inference.
+        device: The device (CPU or CUDA) used to run model inference.
+        default_batch_size: The default batch size for processing images in batches.
+    """
     def __init__(self, path: str = "", default_batch_size: int = 4):
+        """
+        Initializes the EndpointHandler with a specified model path and default batch size.
+        Args:
+            path (str): Path to the pre-trained model and processor.
+            default_batch_size (int): Default batch size for image processing.
+        """
         from colpali_engine.models import ColQwen2, ColQwen2Processor
         self.model = ColQwen2.from_pretrained(
             path,
             torch_dtype=torch.bfloat16,
         ).eval()
         self.processor = ColQwen2Processor.from_pretrained(path)
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         self.model.to(self.device)
         self.default_batch_size = default_batch_size
     def _process_batch(self, images: List[Image.Image]) -> List[List[float]]:
+        """
+        Processes a batch of images and generates embeddings.
+        Args:
+            images (List[Image.Image]): List of images to process.
+        Returns:
+            List[List[float]]: List of embeddings for each image.
+        """
         batch_images = self.processor.process_images(images)
         batch_images = {k: v.to(self.device) for k, v in batch_images.items()}
         with torch.no_grad():
             image_embeddings = self.model(**batch_images)
         return image_embeddings.cpu().tolist()
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Processes input data containing base64-encoded images, decodes them, and generates embeddings.
+        Args:
+            data (Dict[str, Any]): Dictionary containing input images and optional batch size.
+        Returns:
+            Dict[str, Any]: Dictionary containing generated embeddings or error messages.
+        """
         images_data = data.get("inputs", [])
         batch_size = data.get("batch_size", self.default_batch_size)
         if not images_data:
             return {"error": "No images provided in 'inputs'."}
         images = []
         for img_data in images_data:
             if isinstance(img_data, str):
                 try:
                     image_bytes = base64.b64decode(img_data)
                     image = Image.open(BytesIO(image_bytes)).convert("RGB")
                     images.append(image)
             else:
                 return {"error": "Images should be base64-encoded strings."}
         embeddings = []
         for i in range(0, len(images), batch_size):
             batch_images = images[i : i + batch_size]