File size: 2,480 Bytes
8f61bf1
 
 
 
 
 
1ef3684
6b6ca07
8f61bf1
1975373
 
 
 
8f61bf1
 
5810306
 
 
8f61bf1
 
 
5d02f57
 
 
 
 
 
8f61bf1
34f1bac
5d02f57
 
 
 
 
 
 
 
 
 
bd84758
34f1bac
8f61bf1
 
34f1bac
8f61bf1
 
 
bd84758
34f1bac
5d02f57
34f1bac
5d02f57
 
 
 
 
34f1bac
5d02f57
 
8f61bf1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import io
from PIL import Image
import base64 

import torch

import spaces
from transformers import pipeline

from diffusers import EulerDiscreteScheduler
from diffusers import StableDiffusionPipeline

import gradio as gr

# Move pipeline to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"

# Set Hugging Face API (needed for gated models)
hf_api_key = os.environ.get('HF_API_KEY')

# Load the Stable Diffusion pipeline
model_id = "sd-legacy/stable-diffusion-v1-5"

# Use the Euler scheduler here instead
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")

# Load the image-to-text pipeline with BLIP model
get_completion = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")

# Load the Stable Diffusion pipeline
pipe = StableDiffusionPipeline.from_pretrained(
    model_id, 
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 on GPU, float32 on CPU
    scheduler=scheduler,
    use_auth_token=hf_api_key  # Required for gated model
)
pipe = pipe.to(device)

# Caption generate function
@spaces.GPU(duration=120) # Designed to be effect-free in non-ZeroGPU environments, ensuring compatibility across different setups.
def captioner(image):
    # The BLIP model expects a PIL image directly
    result = get_completion(image)
    
    return result[0]['generated_text']

# Image generate function
@spaces.GPU(duration=120) # Designed to be effect-free in non-ZeroGPU environments, ensuring compatibility across different setups.
def generate(prompt, steps):  
    # Generate an image with Stable Diffusion
    output = pipe(
        prompt,
        negative_prompt=None,  # Handle empty negative prompt
        num_inference_steps=25,
    )

    return output.images[0]  # Return the first generated image (PIL format)


# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Describe-and-Generate game 🖍️")
    image_upload = gr.Image(label="Your first image",type="pil")
    
    btn_caption = gr.Button("Generate caption")
    caption = gr.Textbox(label="Generated caption")

    btn_image = gr.Button("Generate image")
    image_output = gr.Image(label="Generated Image")
    
    btn_caption.click(fn=captioner, inputs=[image_upload], outputs=[caption])
    btn_image.click(fn=generate, inputs=[caption], outputs=[image_output])


# Launch the app
demo.launch(
    share=True,
    #server_port=int(os.environ['PORT4'])
)