import torch import gradio as gr from PIL import Image from transformers import pipeline from diffusers import StableDiffusion3Pipeline model_path = "../Models/models--Salesforce--blip-image-captioning-base/snapshots/82a37760796d32b1411fe092ab5d4e227313294b" device = "cuda" if torch.cuda.is_available() else "cpu" caption_image = pipeline("image-to-text", model=model_path, device=device) # caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base", device=device) def image_generation(prompt): # is_cuda = False pipeline = StableDiffusion3Pipeline.from_pretrained("stabilityai/stable-diffusion-3-medium-diffusers", torch_dtype=torch.float32, text_encoder_3=None, tokenizer_3=None) # pipeline.enable_model_cpu_offload() pipeline.to('cpu') image = pipeline( prompt=prompt, negative_prompt="blurred, ugly, watermark, low resolution, blurry", num_inference_steps=15, height=192, width=192, guidance_scale=7.0 ).images[0] return image def caption_my_image(pil_image): semantics = caption_image(images=pil_image)[0]['generated_text'] image = image_generation(semantics) return image gr.close_all() demo = gr.Interface(fn=caption_my_image, inputs=[gr.Image(label="Select Image",type="pil")], outputs=[gr.Image(label="New Generated Image using SD3", type="pil")], title="@GenAILearniverse Project 10: Generate Similar image", description="THIS APPLICATION WILL BE USED TO GENERATE SIMILAR IMAGE BASED ON IMAGE UPLOADED.") demo.launch()