kumararvindibs
/

ibs_imgToTextGeneration

Image-Text-to-Text

Model card Files Files and versions

kumararvindibs commited on Mar 8, 2024

Commit

043d595

·

verified ·

1 Parent(s): ed105db

Update handler.py

Files changed (1) hide show

handler.py +6 -4

handler.py CHANGED Viewed

@@ -15,12 +15,15 @@ class EndpointHandler():
         self.model.eval()
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         input_data = data.get("inputs", {})
         encoded_images = input_data.get("images")
         if not encoded_images:
             return {"captions": [], "error": "No images provided"}
-        texts = input_data.get("texts", ["a photography of"] * len(encoded_images))
         try:
             byteImgIO = io.BytesIO()
@@ -39,12 +42,10 @@ class EndpointHandler():
             if not raw_images:
                 print("No valid images found.")
             processed_inputs = [
-                self.processor(image, text, return_tensors="pt") for image, text in zip(raw_images, texts)
             ]
             processed_inputs = {
                 "pixel_values": torch.cat([inp["pixel_values"] for inp in processed_inputs], dim=0).to(device),
-                "input_ids": torch.cat([inp["input_ids"] for inp in processed_inputs], dim=0).to(device),
-                "attention_mask": torch.cat([inp["attention_mask"] for inp in processed_inputs], dim=0).to(device),
                 "max_new_tokens":40
             }
@@ -52,6 +53,7 @@ class EndpointHandler():
                 out = self.model.generate(**processed_inputs)
             captions = self.processor.batch_decode(out, skip_special_tokens=True)
             return {"captions": captions}
         except Exception as e:
             print(f"Error during processing: {str(e)}")

         self.model.eval()
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        print("input data is here------------",data)
         input_data = data.get("inputs", {})
+        print("input data is here-2-----------",input_data)
         encoded_images = input_data.get("images")
+        print("input encoded_images is here------------",encoded_images)
         if not encoded_images:
             return {"captions": [], "error": "No images provided"}
+        #texts = input_data.get("texts", ["a photography of"] * len(encoded_images))
         try:
             byteImgIO = io.BytesIO()
             if not raw_images:
                 print("No valid images found.")
             processed_inputs = [
+                self.processor(image, return_tensors="pt") for image in zip(raw_images)
             ]
             processed_inputs = {
                 "pixel_values": torch.cat([inp["pixel_values"] for inp in processed_inputs], dim=0).to(device),
                 "max_new_tokens":40
             }
                 out = self.model.generate(**processed_inputs)
             captions = self.processor.batch_decode(out, skip_special_tokens=True)
+            print("caption is here-------",captions)
             return {"captions": captions}
         except Exception as e:
             print(f"Error during processing: {str(e)}")