import io import torch import requests from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration class EndpointHandler: def __init__(self, model_dir): self.processor = BlipProcessor.from_pretrained(model_dir) self.model = BlipForConditionalGeneration.from_pretrained(model_dir) self.model.eval() self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.model.to(self.device) def __call__(self, data): # Unwrap Hugging Face "inputs" key if present if isinstance(data, dict) and "inputs" in data: data = data["inputs"] image_url = data.get("image_url") if not image_url: return {"error": "Please provide an 'image_url' in the JSON payload."} try: response = requests.get(image_url) response.raise_for_status() image = Image.open(io.BytesIO(response.content)).convert("RGB") except Exception as e: return {"error": f"Failed to load image: {str(e)}"} inputs = self.processor(images=image, return_tensors="pt").to(self.device) with torch.no_grad(): out = self.model.generate(**inputs)