Vidensogende
/

image-captioning-with-blip

@@ -1,59 +1,3 @@
-# import requests
-# from PIL import Image
-# from transformers import BlipProcessor, BlipForConditionalGeneration
-# import torch
-# from typing import Dict, List, Any
-# class EndpointHandler():
-#     def __init__(self, path=""):
-#         self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
-#         self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-large")
-#         self.device = "cuda" if torch.cuda.is_available() else "cpu"
-#         self.model.to(self.device)
-#     def process_single_image(self, img_url, text=None):
-#         # Loading and processing the image
-#         raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
-#         if text:
-#             # Conditional image captioning
-#             inputs = self.processor(raw_image, text, return_tensors="pt").to(self.device)
-#         else:
-#             # Unconditional image captioning
-#             inputs = self.processor(raw_image, return_tensors="pt").to(self.device)
-#         out = self.model.generate(**inputs)
-#         return self.processor.decode(out[0], skip_special_tokens=True)
-#     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
-#         try:
-#             print(f"Received data: {data}")
-#             if not data or "image_urls" not in data:
-#                 return [{"error": "No image URLs provided in the request."}]
-#             img_urls = data.get("image_urls")
-#             texts = data.get("texts", [None] * len(img_urls))  # Texts are optional for conditional captioning
-#             # Check if inputs are for single or multiple images
-#             if isinstance(img_urls, str):
-#                 img_urls = [img_urls]
-#                 texts = [texts]
-#             captions = []
-#             for img_url, text in zip(img_urls, texts):
-#                 caption = self.process_single_image(img_url, text)
-#                 captions.append({"image_url": img_url, "caption": caption})
-#             return captions
-#         except Exception as e:
-#             print(f"Error processing data: {e}")
-#             return [{"error": str(e)}]
-# # You may need to add a function to load this handler if the inference toolkit expects it
-# def get_pipeline(model_dir, task):
-#     return EndpointHandler(model_dir)
 import requests
 from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
@@ -69,7 +13,6 @@ class EndpointHandler():
         self.model.to(self.device)
     def process_single_image(self, img_url, text=None):
-        # Loading and processing the image
         raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
         if text:
             inputs = self.processor(raw_image, text, return_tensors="pt").to(self.device)

 import requests
 from PIL import Image
 from transformers import BlipProcessor, BlipForConditionalGeneration
         self.model.to(self.device)
     def process_single_image(self, img_url, text=None):
         raw_image = Image.open(requests.get(img_url, stream=True).raw).convert('RGB')
         if text:
             inputs = self.processor(raw_image, text, return_tensors="pt").to(self.device)