File size: 4,577 Bytes
563652b
 
 
 
 
e363ea3
9fedccd
 
18d2393
9fedccd
 
e17cfd4
18d2393
37a666c
 
3d7854c
563652b
 
e363ea3
 
563652b
e17cfd4
 
 
9fedccd
 
 
 
 
 
 
 
 
 
e363ea3
9fedccd
 
d01c487
9fedccd
d01c487
 
 
b36b7af
243df67
9fedccd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243df67
e363ea3
243df67
 
e363ea3
 
3d7854c
b36b7af
 
 
d01c487
 
 
 
 
 
9fedccd
 
 
 
b36b7af
243df67
37a666c
243df67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9fedccd
e363ea3
d5037dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243df67
 
b36b7af
e363ea3
b36b7af
 
d5037dc
b36b7af
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import io
from PIL import Image
import base64 

import torch

from diffusers import EulerDiscreteScheduler
from diffusers import StableDiffusionXLPipeline
from diffusers import StableDiffusion3Pipeline
from diffusers import StableDiffusionPipeline
from diffusers import DiffusionPipeline

import spaces

#from transformers import pipeline
import gradio as gr

# Set Hugging Face API (needed for gated models)
hf_api_key = os.environ.get('HF_API_KEY')

# Load the Stable Diffusion pipeline
model_id = "sd-legacy/stable-diffusion-v1-5"

# Use the Euler scheduler here instead
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")

pipe = StableDiffusionPipeline.from_pretrained(
    model_id, 
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 on GPU, float32 on CPU
    scheduler=scheduler,
    use_auth_token=hf_api_key  # Required for gated model
)

# Load the Stable Diffusion pipeline
#model_id = "stabilityai/stable-diffusion-3.5-medium"
#pipe = SD3Transformer2DModel.from_pretrained(
#    model_id,
#    subfolder="transformer",
#    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 on GPU, float32 on CPU
#    use_auth_token=hf_api_key  # Required for gated model
#)

# Load the Stable Diffusion XL pipeline
#model_id = "stabilityai/stable-diffusion-xl-base-1.0"
#pipe = StableDiffusionXLPipeline.from_pretrained(
#    model_id,
#    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 on GPU, float32 on CPU
#    use_auth_token=hf_api_key  # Required for gated model
#)

# Load the Stable Diffusion pipeline
#model_id = "stabilityai/stable-diffusion-3-medium"
#model_id = "stabilityai/stable-diffusion-3-medium-diffusers"
#pipe = StableDiffusion3Pipeline.from_pretrained(
#    model_id,
#    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,  # Use float16 on GPU, float32 on CPU,
#    scheduler=scheduler,
#    use_auth_token=hf_api_key  # Required for gated model
#)


# Move pipeline to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
pipe = pipe.to(device)

# Text-to-image endpoint
#get_completion = pipeline("text-to-image", model="stabilityai/stable-diffusion-xl-base-1.0")

# A helper function to convert the PIL image to base64,
# so you can send it to the API
def base64_to_pil(img_base64):
    base64_decoded = base64.b64decode(img_base64)
    byte_stream = io.BytesIO(base64_decoded)
    pil_image = Image.open(byte_stream)
    return pil_image

#def generate(prompt):
#    output = get_completion(prompt)
#    result_image = base64_to_pil(output)
#    return result_image

# Generate function
@spaces.GPU(duration=120) # Designed to be effect-free in non-ZeroGPU environments, ensuring compatibility across different setups.
def generate(prompt, negative_prompt, steps, guidance, width, height):
    # Ensure width and height are multiples of 8 (required by Stable Diffusion)
    width = int(width) - (int(width) % 8)
    height = int(height) - (int(height) % 8)
    
    # Generate image with Stable Diffusion
    output = pipe(
        prompt,
        negative_prompt=negative_prompt or None,  # Handle empty negative prompt
        num_inference_steps=int(steps),
        guidance_scale=float(guidance),
        width=width,
        height=height
    )
    return output.images[0]  # Return the first generated image (PIL format)


# Create Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Image Generation with Stable Diffusion")
    prompt = gr.Textbox(label="Your prompt")

    with gr.Row():
        with gr.Column():
            negative_prompt = gr.Textbox(label="Negative prompt")
            steps = gr.Slider(label="Inference Steps", minimum=1, maximum=100, value=25,
                      info="In many steps will the denoiser denoise the image?")
            guidance = gr.Slider(label="Guidance Scale", minimum=1, maximum=20, value=7,
                      info="Controls how much the text prompt influences the result")
            width = gr.Slider(label="Width", minimum=64, maximum=512, step=64, value=512)
            height = gr.Slider(label="Height", minimum=64, maximum=512, step=64, value=512)
            btn = gr.Button("Submit")
        with gr.Column():
            output = gr.Image(label="Result")

    btn.click(fn=generate, inputs=[prompt, negative_prompt, steps, guidance, width, height], outputs=[output])


# Launch the app
demo.launch(
    share=True,
    #server_port=int(os.environ['PORT3'])
)