Spaces:
Sleeping
Sleeping
File size: 2,480 Bytes
8f61bf1 1ef3684 6b6ca07 8f61bf1 1975373 8f61bf1 5810306 8f61bf1 5d02f57 8f61bf1 34f1bac 5d02f57 bd84758 34f1bac 8f61bf1 34f1bac 8f61bf1 bd84758 34f1bac 5d02f57 34f1bac 5d02f57 34f1bac 5d02f57 8f61bf1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | import os
import io
from PIL import Image
import base64
import torch
import spaces
from transformers import pipeline
from diffusers import EulerDiscreteScheduler
from diffusers import StableDiffusionPipeline
import gradio as gr
# Move pipeline to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
# Set Hugging Face API (needed for gated models)
hf_api_key = os.environ.get('HF_API_KEY')
# Load the Stable Diffusion pipeline
model_id = "sd-legacy/stable-diffusion-v1-5"
# Use the Euler scheduler here instead
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
# Load the image-to-text pipeline with BLIP model
get_completion = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
# Load the Stable Diffusion pipeline
pipe = StableDiffusionPipeline.from_pretrained(
model_id,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, # Use float16 on GPU, float32 on CPU
scheduler=scheduler,
use_auth_token=hf_api_key # Required for gated model
)
pipe = pipe.to(device)
# Caption generate function
@spaces.GPU(duration=120) # Designed to be effect-free in non-ZeroGPU environments, ensuring compatibility across different setups.
def captioner(image):
# The BLIP model expects a PIL image directly
result = get_completion(image)
return result[0]['generated_text']
# Image generate function
@spaces.GPU(duration=120) # Designed to be effect-free in non-ZeroGPU environments, ensuring compatibility across different setups.
def generate(prompt, steps):
# Generate an image with Stable Diffusion
output = pipe(
prompt,
negative_prompt=None, # Handle empty negative prompt
num_inference_steps=25,
)
return output.images[0] # Return the first generated image (PIL format)
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Describe-and-Generate game 🖍️")
image_upload = gr.Image(label="Your first image",type="pil")
btn_caption = gr.Button("Generate caption")
caption = gr.Textbox(label="Generated caption")
btn_image = gr.Button("Generate image")
image_output = gr.Image(label="Generated Image")
btn_caption.click(fn=captioner, inputs=[image_upload], outputs=[caption])
btn_image.click(fn=generate, inputs=[caption], outputs=[image_output])
# Launch the app
demo.launch(
share=True,
#server_port=int(os.environ['PORT4'])
) |