File size: 1,026 Bytes
5e90518 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
import torch
# Load model + processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Candidate tags
CANDIDATE_TAGS = [
"portrait", "landscape", "abstract", "surreal", "dark", "bright",
"melancholy", "joyful", "blue tones", "warm colors", "minimalist", "detailed"
]
def generate_tags(image_path):
image = Image.open(image_path).convert("RGB")
inputs = processor(text=CANDIDATE_TAGS, images=image, return_tensors="pt", padding=True)
outputs = model(**inputs)
logits_per_image = outputs.logits_per_image
probs = logits_per_image.softmax(dim=1)
top_probs, indices = probs.topk(5)
tags = [CANDIDATE_TAGS[i] for i in indices[0]]
return tags
def generate_caption(image_path):
# Placeholder caption - replace this with real captioning logic
return "This is a placeholder caption."
|