| | from colpali_engine.models import ColPali |
| | from colpali_engine.models.paligemma.colpali.processing_colpali import ColPaliProcessor |
| | from colpali_engine.utils.processing_utils import BaseVisualRetrieverProcessor |
| | from colpali_engine.utils.torch_utils import ListDataset, get_torch_device |
| | from torch.utils.data import DataLoader |
| | import torch |
| | from typing import List, cast |
| |
|
| | |
| |
|
| | from tqdm import tqdm |
| | from PIL import Image |
| | import os |
| |
|
| | import spaces |
| |
|
| |
|
| | |
| | torch.cuda.empty_cache() |
| |
|
| | |
| | import dotenv |
| | |
| | dotenv_file = dotenv.find_dotenv() |
| | dotenv.load_dotenv(dotenv_file) |
| |
|
| | model_name = os.environ['colpali'] |
| | device = get_torch_device("cuda") |
| |
|
| | |
| | |
| |
|
| | current_working_directory = os.getcwd() |
| | save_directory = model_name |
| | save_directory = os.path.join(current_working_directory, save_directory) |
| |
|
| | processor_directory = model_name+'_processor' |
| | processor_directory = os.path.join(current_working_directory, processor_directory) |
| |
|
| |
|
| |
|
| | if not os.path.exists(save_directory): |
| | |
| | model = ColPali.from_pretrained( |
| | model_name, |
| | torch_dtype=torch.bfloat16, |
| | device_map=device, |
| | ).eval() |
| | processor = cast(ColPaliProcessor, ColPaliProcessor.from_pretrained(model_name)) |
| | os.makedirs(save_directory) |
| | print(f"Directory '{save_directory}' created.") |
| | model.save_pretrained(save_directory) |
| | os.makedirs(processor_directory) |
| | processor.save_pretrained(processor_directory) |
| |
|
| | else: |
| |
|
| | model = ColPali.from_pretrained(save_directory) |
| | processor = ColPaliProcessor.from_pretrained(processor_directory, use_fast=True) |
| |
|
| |
|
| | class ColpaliManager: |
| |
|
| | |
| | def __init__(self, device = "cuda", model_name = model_name): |
| |
|
| | print(f"Initializing ColpaliManager with device {device} and model {model_name}") |
| |
|
| | |
| |
|
| | |
| | |
| | |
| | |
| | |
| |
|
| | |
| |
|
| | @spaces.GPU |
| | def get_images(self, paths: list[str]) -> List[Image.Image]: |
| | model.to("cuda") |
| | return [Image.open(path) for path in paths] |
| |
|
| | @spaces.GPU |
| | def process_images(self, image_paths:list[str], batch_size=5): |
| | model.to("cuda") |
| | print(f"Processing {len(image_paths)} image_paths") |
| | |
| | images = self.get_images(image_paths) |
| |
|
| | dataloader = DataLoader( |
| | dataset=ListDataset[str](images), |
| | batch_size=batch_size, |
| | shuffle=False, |
| | collate_fn=lambda x: processor.process_images(x), |
| | ) |
| |
|
| | ds: List[torch.Tensor] = [] |
| | for batch_doc in tqdm(dataloader): |
| | with torch.no_grad(): |
| | batch_doc = {k: v.to(model.device) for k, v in batch_doc.items()} |
| | embeddings_doc = model(**batch_doc) |
| | ds.extend(list(torch.unbind(embeddings_doc.to(device)))) |
| | |
| | ds_np = [d.float().cpu().numpy() for d in ds] |
| |
|
| | return ds_np |
| | |
| |
|
| | @spaces.GPU |
| | def process_text(self, texts: list[str]): |
| | model.to("cuda") |
| | print(f"Processing {len(texts)} texts") |
| |
|
| | dataloader = DataLoader( |
| | dataset=ListDataset[str](texts), |
| | batch_size=5, |
| | shuffle=False, |
| | collate_fn=lambda x: processor.process_queries(x), |
| | ) |
| |
|
| | qs: List[torch.Tensor] = [] |
| | for batch_query in dataloader: |
| | with torch.no_grad(): |
| | batch_query = {k: v.to(model.device) for k, v in batch_query.items()} |
| | embeddings_query = model(**batch_query) |
| |
|
| | qs.extend(list(torch.unbind(embeddings_query.to(device)))) |
| |
|
| | qs_np = [q.float().cpu().numpy() for q in qs] |
| | model.to("cpu") |
| |
|
| | return qs_np |
| | |
| |
|
| |
|
| |
|