| import torch,sys | |
| from PIL import Image | |
| from transformers import BlipProcessor, BlipForConditionalGeneration | |
| processor = BlipProcessor.from_pretrained("norwoodsystems/image-caption") | |
| model = BlipForConditionalGeneration.from_pretrained("norwoodsystems/image-caption", use_safetensors=True) | |
| image_path = sys.argv[1] | |
| raw_image = Image.open(image_path).convert('RGB') | |
| inputs = processor(images=raw_image, return_tensors="pt") | |
| with torch.no_grad(): | |
| generated_ids = model.generate(**inputs, do_sample=True, top_p=0.9, temperature=1.0) | |
| description = processor.decode(generated_ids[0], skip_special_tokens=True) | |
| print("Description:", description) | |