SajayR
/

Triad

Safetensors

Model card Files Files and versions

xet

Community

SajayR commited on Feb 26, 2025

Commit

8948365

verified ·

1 Parent(s): ce081c7

Added batch support for images

Browse files

Files changed (1) hide show

hf_model.py +33 -9

hf_model.py CHANGED Viewed

@@ -186,16 +186,40 @@ class Triad(nn.Module):
         if text_list is not None:
             assert isinstance(text_list, list) and len(text_list) == 1, "Text list must be a list of strings of length 1"
         if image is not None:
-            image = Image.open(image).convert('RGB')
-            transform = transforms.Compose([
-            transforms.Resize((224, 224)),
-            transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406],
-                              std=[0.229, 0.224, 0.225])
-            ])
-            image = transform(image)
             device = next(self.parameters()).device
-            image = image.to(device)
         embeddings = {}
         if image is not None:
             embeddings['visual_feats'] = self.visual_embedder(image)

         if text_list is not None:
             assert isinstance(text_list, list) and len(text_list) == 1, "Text list must be a list of strings of length 1"
         if image is not None:
             device = next(self.parameters()).device
+            # Handle batch of file paths
+            if isinstance(image, list):
+                # Process a list of image paths
+                processed_images = []
+                for img_path in image:
+                    img = Image.open(img_path).convert('RGB')
+                    transform = transforms.Compose([
+                        transforms.Resize((224, 224)),
+                        transforms.ToTensor(),
+                        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+                    ])
+                    processed_img = transform(img).to(device)
+                    processed_images.append(processed_img)
+                image = torch.stack(processed_images, dim=0)  # [B, 3, 224, 224]
+            # Handle single file path
+            elif isinstance(image, str):
+                img = Image.open(image).convert('RGB')
+                transform = transforms.Compose([
+                    transforms.Resize((224, 224)),
+                    transforms.ToTensor(),
+                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+                ])
+                image = transform(img).to(device).unsqueeze(0)  # Add batch dimension [1, 3, 224, 224]
+            # Handle tensor input (assume it's already processed but may need device transfer)
+            elif isinstance(image, torch.Tensor):
+                # If single image without batch dimension
+                if image.dim() == 3:
+                    image = image.unsqueeze(0)  # Add batch dimension
+                image = image.to(device)
         embeddings = {}
         if image is not None:
             embeddings['visual_feats'] = self.visual_embedder(image)