import torch,sys from PIL import Image from transformers import BlipProcessor, BlipForConditionalGeneration processor = BlipProcessor.from_pretrained("norwoodsystems/image-caption") model = BlipForConditionalGeneration.from_pretrained("norwoodsystems/image-caption", use_safetensors=True) image_path = sys.argv[1] raw_image = Image.open(image_path).convert('RGB') inputs = processor(images=raw_image, return_tensors="pt") with torch.no_grad(): generated_ids = model.generate(**inputs, do_sample=True, top_p=0.9, temperature=1.0) description = processor.decode(generated_ids[0], skip_special_tokens=True) print("Description:", description)