Spaces:
Runtime error
Runtime error
| import spaces | |
| import gradio as gr | |
| import torch | |
| from diffusers import StableDiffusion3Pipeline | |
| from huggingface_hub import snapshot_download,login | |
| from transformers import pipeline | |
| from PIL import Image | |
| import os | |
| # Retrieve the API token from the environment variable | |
| huggingface_token = os.getenv("HUGGINGFACE_TOKEN") | |
| if huggingface_token is None: | |
| raise ValueError("HUGGINGFACE_TOKEN environment variable is not set.") | |
| # Log in to Hugging Face | |
| login(token=huggingface_token) | |
| # Check if CUDA is available | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| # Ensure GPU is available | |
| if device == "cuda": | |
| print("CUDA is available. Using GPU.") | |
| else: | |
| print("CUDA is not available. Using CPU.") | |
| # Download and load the Stable Diffusion model | |
| model_path = snapshot_download( | |
| repo_id="stabilityai/stable-diffusion-3-medium", | |
| revision="refs/pr/26", | |
| repo_type="model", | |
| ignore_patterns=["*.md", "*.gitattributes"], | |
| local_dir="stable-diffusion-3-medium", | |
| token=huggingface_token | |
| ) | |
| image_gen = StableDiffusion3Pipeline.from_pretrained(model_path, text_encoder_3=None, tokenizer_3=None,torch_dtype=torch.float16) | |
| image_gen = image_gen.to(device) | |
| # Load the image-to-text pipeline | |
| caption_image = pipeline("image-to-text", model="Salesforce/blip-image-captioning-large", device=device) | |
| def generate_image_from_caption(image, num_inference_steps=50, guidance_scale=7.5): | |
| # Generate the caption | |
| caption = caption_image(image)[0]['generated_text'] | |
| print("Generated Caption:", caption) | |
| # Generate the image from the caption | |
| result = image_gen( | |
| prompt=caption, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| negative_prompt="blurred, ugly, watermark, low resolution, blurry", | |
| height=512, | |
| width=512 | |
| ) | |
| # Get the generated image | |
| generated_image = result.images[0] | |
| return generated_image | |
| # Create the Gradio interface | |
| iface = gr.Interface( | |
| fn=generate_image_from_caption, | |
| inputs=[ | |
| gr.Image(type="pil",label="Upload an image"), | |
| gr.Slider(label="Number of inference steps", minimum=1, maximum=100, value=50), | |
| gr.Slider(label="Guidance scale", minimum=1.0, maximum=20.0, value=7.5) | |
| ], | |
| outputs=gr.Image(label="Generated Image"), | |
| title="Image-to-Image Generator using Caption", | |
| description="Upload an image to generate a caption, and then use the caption as a prompt to generate a new image using Stable Diffusion." | |
| ) | |
| # Launch the Gradio app | |
| iface.launch() | |