dincali's picture
Create app.py
d8999d7
from transformers import AutoModelForImageGeneration, AutoTokenizer
import torch
from PIL import Image
# Load DALL-E model and tokenizer
model = AutoModelForImageGeneration.from_pretrained("openai/clip-dall-e-4x")
tokenizer = AutoTokenizer.from_pretrained("openai/clip-dall-e-4x")
# Generate an image from text
text = "A red apple on a white background"
input_ids = tokenizer(text, return_tensors="pt").input_ids
image = model.generate(input_ids)
# Save the generated image
image = Image.fromarray(image[0].numpy())
image.save("generated_image.png")
from transformers import CLIPProcessor, CLIPModel
import torch
# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
# Load and preprocess the image
image_path = "generated_image.png"
image = processor(images=image_path, return_tensors="pt")
# Generate text from the image
text = "A description of the image: " # You can add any additional text if needed
inputs = processor(text, images=image_path, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
# Get the text representation of the image
image_text = processor.decode(outputs.logits_per_image, skip_special_tokens=True)
print("Generated text from the image:", image_text)