| from typing import Dict, List, Any | |
| from transformers import BlipProcessor, BlipForQuestionAnswering | |
| from PIL import Image | |
| from io import BytesIO | |
| import base64 | |
| import json | |
| class EndpointHandler(): | |
| def __init__(self, path=""): | |
| self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base") | |
| self.model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base").to("cuda") | |
| def __call__(self, data): | |
| info=data['inputs'] | |
| image_bytes=info.pop('image',data) | |
| raw_image=base64.b64decode(image_bytes) | |
| image=Image.open(BytesIO(raw_image)) | |
| question=info.pop('text',data) | |
| info = self.processor(image, question, return_tensors="pt").to("cuda") | |
| out = self.model.generate(**info) | |
| return {'text':self.processor.decode(out[0], skip_special_tokens=True)} | |
| if __name__=="__main__": | |
| my_handler=EndpointHandler(path='.') | |
| with open("/home/ubuntu/guoling/1.png",'rb') as img: | |
| image_bytes=img.read() | |
| image_base64=base64.b64encode(image_bytes).decode('utf-8') | |
| question="are there any people in the picture?" | |
| test_payload=json.dumps({"inputs": | |
| {'image':image_base64,'question':question} | |
| }) | |
| test_result=my_handler(test_payload) | |
| print(test_result) | |