# import requests # import base64 # import os # hf_token = os.environ.get("HUGGINGFACE_API_TOKEN") # API_URL = "https://api-inference.huggingface.co/models/nlpconnect/vit-gpt2-image-captioning" # headers = { # "Authorization": f"Bearer {hf_token}" # } # def generateCaption(image_path): # with open(image_path, "rb") as image_file: # image_bytes = image_file.read() # response = requests.post(API_URL, headers=headers, files={"file": image_bytes}) # if response.status_code == 200: # result = response.json() # return result[0]['generated_text'] # else: # return f"Error generating caption: {response.text}" from PIL import Image from transformers import BlipProcessor , BlipForConditionalGeneration import torch processor = BlipProcessor.from_pretrained("src/models/Caption") model = BlipForConditionalGeneration.from_pretrained("src/models/Caption") def generateCaption(image_path): image = Image.open(image_path).convert("RGB") inputs = processor(images = image , return_tensors="pt") output = model.generate(**inputs) caption = processor.decode(output[0], skip_special_tokens = True) return caption