Spaces:
Runtime error
Runtime error
ImageStudio Maintainer Claude Opus 4.8 (1M context) commited on
Commit Β·
d6b6688
1
Parent(s): ad4a177
Fix RUNTIME_ERROR (kernels/transformers import) and add NoobAI-XL model selector
Browse files- Root cause: transformers hub_kernels builds LayerRepository() which the
installed kernels version rejects ('Either a revision or a version must be
specified'), crashing 'from transformers import PreTrainedModel' and all of
diffusers at import time. kernels was only needed for the commented-out
FA3/AoTI block, so it is removed from requirements.
- Add a Model selector: Z-Image-Turbo (guidance-free, ~9 steps) and
NoobAI-XL v1.1 (SDXL + sdxl-vae-fp16-fix + Compel prompt weighting).
- Per-model defaults, negative-prompt fields shown only for NoobAI-XL.
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
- app.py +431 -153
- requirements.txt +5 -3
app.py
CHANGED
|
@@ -1,31 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import torch
|
| 2 |
import spaces
|
| 3 |
import gradio as gr
|
| 4 |
-
from diffusers import DiffusionPipeline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
#
|
|
|
|
|
|
|
|
|
|
| 7 |
print("Loading Z-Image-Turbo pipeline...")
|
| 8 |
-
|
| 9 |
"Tongyi-MAI/Z-Image-Turbo",
|
| 10 |
torch_dtype=torch.bfloat16,
|
| 11 |
low_cpu_mem_usage=False,
|
| 12 |
)
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
# spaces.aoti_blocks_load(pipe.transformer.layers, "zerogpu-aoti/Z-Image", variant="fa3")
|
| 18 |
|
| 19 |
-
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
@spaces.GPU
|
| 22 |
-
def generate_image(
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
if randomize_seed:
|
| 25 |
-
seed =
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
prompt=prompt,
|
| 30 |
height=int(height),
|
| 31 |
width=int(width),
|
|
@@ -33,15 +315,37 @@ def generate_image(prompt, height, width, num_inference_steps, seed, randomize_s
|
|
| 33 |
guidance_scale=0.0,
|
| 34 |
generator=generator,
|
| 35 |
).images[0]
|
| 36 |
-
|
| 37 |
return image, seed
|
| 38 |
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
examples = [
|
| 41 |
["Young Chinese woman in red Hanfu, intricate embroidery. Impeccable makeup, red floral forehead pattern. Elaborate high bun, golden phoenix headdress, red flowers, beads. Holds round folding fan with lady, trees, bird. Neon lightning-bolt lamp, bright yellow glow, above extended left palm. Soft-lit outdoor night background, silhouetted tiered pagoda, blurred colorful distant lights."],
|
| 42 |
["A majestic dragon soaring through clouds at sunset, scales shimmering with iridescent colors, detailed fantasy art style"],
|
| 43 |
["Cozy coffee shop interior, warm lighting, rain on windows, plants on shelves, vintage aesthetic, photorealistic"],
|
| 44 |
-
["
|
| 45 |
["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff, magical forest background"],
|
| 46 |
]
|
| 47 |
|
|
@@ -60,20 +364,64 @@ custom_theme = gr.themes.Soft(
|
|
| 60 |
block_title_text_weight="600",
|
| 61 |
)
|
| 62 |
|
| 63 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
with gr.Blocks(fill_height=True) as demo:
|
| 65 |
-
# Header
|
| 66 |
gr.Markdown(
|
| 67 |
"""
|
| 68 |
-
# π¨
|
| 69 |
-
**Ultra-fast AI image generation** β’
|
| 70 |
""",
|
| 71 |
elem_classes="header-text"
|
| 72 |
)
|
| 73 |
-
|
| 74 |
with gr.Row(equal_height=False):
|
| 75 |
# Left column - Input controls
|
| 76 |
with gr.Column(scale=1, min_width=320):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
prompt = gr.Textbox(
|
| 78 |
label="β¨ Your Prompt",
|
| 79 |
placeholder="Describe the image you want to create...",
|
|
@@ -81,71 +429,59 @@ with gr.Blocks(fill_height=True) as demo:
|
|
| 81 |
max_lines=10,
|
| 82 |
autofocus=True,
|
| 83 |
)
|
| 84 |
-
|
| 85 |
with gr.Accordion("βοΈ Advanced Settings", open=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
with gr.Row():
|
| 87 |
height = gr.Slider(
|
| 88 |
-
minimum=512,
|
| 89 |
-
|
| 90 |
-
value=1024,
|
| 91 |
-
step=64,
|
| 92 |
-
label="Height",
|
| 93 |
-
info="Image height in pixels"
|
| 94 |
)
|
| 95 |
width = gr.Slider(
|
| 96 |
-
minimum=512,
|
| 97 |
-
|
| 98 |
-
value=1024,
|
| 99 |
-
step=64,
|
| 100 |
-
label="Width",
|
| 101 |
-
info="Image width in pixels"
|
| 102 |
)
|
| 103 |
-
|
| 104 |
num_inference_steps = gr.Slider(
|
| 105 |
-
minimum=1,
|
| 106 |
-
maximum=20,
|
| 107 |
-
value=9,
|
| 108 |
-
step=1,
|
| 109 |
label="Inference Steps",
|
| 110 |
-
info="9
|
| 111 |
)
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
seed = gr.Number(
|
| 119 |
-
label="Seed",
|
| 120 |
-
value=42,
|
| 121 |
-
precision=0,
|
| 122 |
-
visible=False,
|
| 123 |
-
)
|
| 124 |
-
|
| 125 |
-
def toggle_seed(randomize):
|
| 126 |
-
return gr.Number(visible=not randomize)
|
| 127 |
-
|
| 128 |
-
randomize_seed.change(
|
| 129 |
-
toggle_seed,
|
| 130 |
-
inputs=[randomize_seed],
|
| 131 |
-
outputs=[seed]
|
| 132 |
)
|
| 133 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
generate_btn = gr.Button(
|
| 135 |
-
"π Generate Image",
|
| 136 |
-
variant="primary",
|
| 137 |
-
size="lg",
|
| 138 |
-
scale=1
|
| 139 |
)
|
| 140 |
-
|
| 141 |
-
# Example prompts
|
| 142 |
gr.Examples(
|
| 143 |
examples=examples,
|
| 144 |
inputs=[prompt],
|
| 145 |
label="π‘ Try these prompts",
|
| 146 |
examples_per_page=5,
|
| 147 |
)
|
| 148 |
-
|
| 149 |
# Right column - Output
|
| 150 |
with gr.Column(scale=1, min_width=320):
|
| 151 |
output_image = gr.Image(
|
|
@@ -156,105 +492,47 @@ with gr.Blocks(fill_height=True) as demo:
|
|
| 156 |
height=600,
|
| 157 |
buttons=["download", "share"],
|
| 158 |
)
|
| 159 |
-
|
| 160 |
used_seed = gr.Number(
|
| 161 |
-
label="π² Seed Used",
|
| 162 |
-
interactive=False,
|
| 163 |
-
container=True,
|
| 164 |
)
|
| 165 |
-
|
| 166 |
-
# Footer credits
|
| 167 |
gr.Markdown(
|
| 168 |
"""
|
| 169 |
---
|
| 170 |
<div style="text-align: center; opacity: 0.7; font-size: 0.9em; margin-top: 1rem;">
|
| 171 |
-
<strong>
|
| 172 |
-
<
|
| 173 |
-
<
|
| 174 |
-
<strong>
|
| 175 |
</div>
|
| 176 |
""",
|
| 177 |
elem_classes="footer-text"
|
| 178 |
)
|
| 179 |
-
|
| 180 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
generate_btn.click(
|
| 182 |
-
fn=generate_image,
|
| 183 |
-
inputs=[prompt, height, width, num_inference_steps, seed, randomize_seed],
|
| 184 |
-
outputs=[output_image, used_seed],
|
| 185 |
)
|
| 186 |
-
|
| 187 |
-
# Also allow generating by pressing Enter in the prompt box
|
| 188 |
prompt.submit(
|
| 189 |
-
fn=generate_image,
|
| 190 |
-
inputs=[prompt, height, width, num_inference_steps, seed, randomize_seed],
|
| 191 |
-
outputs=[output_image, used_seed],
|
| 192 |
)
|
| 193 |
|
| 194 |
if __name__ == "__main__":
|
| 195 |
demo.launch(
|
| 196 |
theme=custom_theme,
|
| 197 |
-
css=
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
margin-bottom: 0.5rem !important;
|
| 202 |
-
background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
|
| 203 |
-
-webkit-background-clip: text;
|
| 204 |
-
-webkit-text-fill-color: transparent;
|
| 205 |
-
background-clip: text;
|
| 206 |
-
}
|
| 207 |
-
|
| 208 |
-
.header-text p {
|
| 209 |
-
font-size: 1.1rem !important;
|
| 210 |
-
color: #64748b !important;
|
| 211 |
-
margin-top: 0 !important;
|
| 212 |
-
}
|
| 213 |
-
|
| 214 |
-
.footer-text {
|
| 215 |
-
padding: 1rem 0;
|
| 216 |
-
}
|
| 217 |
-
|
| 218 |
-
.footer-text a {
|
| 219 |
-
color: #f59e0b !important;
|
| 220 |
-
text-decoration: none !important;
|
| 221 |
-
font-weight: 500;
|
| 222 |
-
}
|
| 223 |
-
|
| 224 |
-
.footer-text a:hover {
|
| 225 |
-
text-decoration: underline !important;
|
| 226 |
-
}
|
| 227 |
-
|
| 228 |
-
/* Mobile optimizations */
|
| 229 |
-
@media (max-width: 768px) {
|
| 230 |
-
.header-text h1 {
|
| 231 |
-
font-size: 1.8rem !important;
|
| 232 |
-
}
|
| 233 |
-
|
| 234 |
-
.header-text p {
|
| 235 |
-
font-size: 1rem !important;
|
| 236 |
-
}
|
| 237 |
-
}
|
| 238 |
-
|
| 239 |
-
/* Smooth transitions */
|
| 240 |
-
button, .gr-button {
|
| 241 |
-
transition: all 0.2s ease !important;
|
| 242 |
-
}
|
| 243 |
-
|
| 244 |
-
button:hover, .gr-button:hover {
|
| 245 |
-
transform: translateY(-1px);
|
| 246 |
-
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15) !important;
|
| 247 |
-
}
|
| 248 |
-
|
| 249 |
-
/* Better spacing */
|
| 250 |
-
.gradio-container {
|
| 251 |
-
max-width: 1400px !important;
|
| 252 |
-
margin: 0 auto !important;
|
| 253 |
-
}
|
| 254 |
-
""",
|
| 255 |
-
footer_links=[
|
| 256 |
-
"api",
|
| 257 |
-
"gradio"
|
| 258 |
-
],
|
| 259 |
-
mcp_server=True
|
| 260 |
-
)
|
|
|
|
| 1 |
+
import random
|
| 2 |
+
import re
|
| 3 |
+
|
| 4 |
+
import numpy as np
|
| 5 |
import torch
|
| 6 |
import spaces
|
| 7 |
import gradio as gr
|
| 8 |
+
from diffusers import DiffusionPipeline, AutoPipelineForText2Image, AutoencoderKL
|
| 9 |
+
from compel import Compel, ReturnedEmbeddingsType
|
| 10 |
+
|
| 11 |
+
# =============================================================================
|
| 12 |
+
# Model registry
|
| 13 |
+
# =============================================================================
|
| 14 |
+
# Two pipelines are exposed through a single UI via the selector below:
|
| 15 |
+
# * Z-Image-Turbo -> fast, guidance-free distilled model (8 DiT forwards)
|
| 16 |
+
# * NoobAI-XL v1.1 -> SDXL anime model with Compel prompt weighting
|
| 17 |
+
MODEL_ZIMAGE = "Z-Image-Turbo"
|
| 18 |
+
MODEL_NOOBXL = "NoobAI-XL v1.1"
|
| 19 |
+
MODEL_CHOICES = [MODEL_ZIMAGE, MODEL_NOOBXL]
|
| 20 |
+
|
| 21 |
+
MAX_SEED = np.iinfo(np.int32).max
|
| 22 |
+
|
| 23 |
+
NOOBXL_NEGATIVE = (
|
| 24 |
+
"lowres, {bad}, error, fewer, extra, missing, worst quality, jpeg artifacts, "
|
| 25 |
+
"bad quality, watermark, unfinished, displeasing, chromatic aberration, signature, "
|
| 26 |
+
"extra digits, artistic error, username, scan, [abstract]"
|
| 27 |
+
)
|
| 28 |
|
| 29 |
+
# -----------------------------------------------------------------------------
|
| 30 |
+
# Load both pipelines once at startup. On ZeroGPU the `.to("cuda")` calls are
|
| 31 |
+
# captured by the runtime and the work happens inside the @spaces.GPU function.
|
| 32 |
+
# -----------------------------------------------------------------------------
|
| 33 |
print("Loading Z-Image-Turbo pipeline...")
|
| 34 |
+
zimage_pipe = DiffusionPipeline.from_pretrained(
|
| 35 |
"Tongyi-MAI/Z-Image-Turbo",
|
| 36 |
torch_dtype=torch.bfloat16,
|
| 37 |
low_cpu_mem_usage=False,
|
| 38 |
)
|
| 39 |
+
zimage_pipe.to("cuda")
|
| 40 |
+
|
| 41 |
+
print("Loading NoobAI-XL v1.1 pipeline...")
|
| 42 |
+
noobxl_vae = AutoencoderKL.from_pretrained(
|
| 43 |
+
"madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
|
| 44 |
+
)
|
| 45 |
+
noobxl_pipe = AutoPipelineForText2Image.from_pretrained(
|
| 46 |
+
"Menyu/noobaiXLNAIXL_epsilonPred11Version",
|
| 47 |
+
vae=noobxl_vae,
|
| 48 |
+
torch_dtype=torch.float16,
|
| 49 |
+
use_safetensors=True,
|
| 50 |
+
add_watermarker=False,
|
| 51 |
+
)
|
| 52 |
+
noobxl_pipe.to("cuda")
|
| 53 |
+
|
| 54 |
+
print("Pipelines loaded!")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
# =============================================================================
|
| 58 |
+
# Compel-based prompt weighting helpers (ported from the NoobXL11 reference
|
| 59 |
+
# space so long prompts and (weight:1.2) syntax work for the SDXL model).
|
| 60 |
+
# =============================================================================
|
| 61 |
+
def parse_prompt_attention(text):
|
| 62 |
+
re_attention = re.compile(r"""
|
| 63 |
+
\\\(|
|
| 64 |
+
\\\)|
|
| 65 |
+
\\\[|
|
| 66 |
+
\\]|
|
| 67 |
+
\\\\|
|
| 68 |
+
\\|
|
| 69 |
+
\(|
|
| 70 |
+
\[|
|
| 71 |
+
:([+-]?[.\d]+)\)|
|
| 72 |
+
\)|
|
| 73 |
+
]|
|
| 74 |
+
[^\\()\[\]:]+|
|
| 75 |
+
:
|
| 76 |
+
""", re.X)
|
| 77 |
+
|
| 78 |
+
res = []
|
| 79 |
+
round_brackets = []
|
| 80 |
+
square_brackets = []
|
| 81 |
+
|
| 82 |
+
round_bracket_multiplier = 1.1
|
| 83 |
+
square_bracket_multiplier = 1 / 1.1
|
| 84 |
+
|
| 85 |
+
def multiply_range(start_position, multiplier):
|
| 86 |
+
for p in range(start_position, len(res)):
|
| 87 |
+
res[p][1] *= multiplier
|
| 88 |
+
|
| 89 |
+
for m in re_attention.finditer(text):
|
| 90 |
+
text = m.group(0)
|
| 91 |
+
weight = m.group(1)
|
| 92 |
+
|
| 93 |
+
if text.startswith('\\'):
|
| 94 |
+
res.append([text[1:], 1.0])
|
| 95 |
+
elif text == '(':
|
| 96 |
+
round_brackets.append(len(res))
|
| 97 |
+
elif text == '[':
|
| 98 |
+
square_brackets.append(len(res))
|
| 99 |
+
elif weight is not None and len(round_brackets) > 0:
|
| 100 |
+
multiply_range(round_brackets.pop(), float(weight))
|
| 101 |
+
elif text == ')' and len(round_brackets) > 0:
|
| 102 |
+
multiply_range(round_brackets.pop(), round_bracket_multiplier)
|
| 103 |
+
elif text == ']' and len(square_brackets) > 0:
|
| 104 |
+
multiply_range(square_brackets.pop(), square_bracket_multiplier)
|
| 105 |
+
else:
|
| 106 |
+
parts = re.split(re.compile(r"\s*\bBREAK\b\s*", re.S), text)
|
| 107 |
+
for i, part in enumerate(parts):
|
| 108 |
+
if i > 0:
|
| 109 |
+
res.append(["BREAK", -1])
|
| 110 |
+
res.append([part, 1.0])
|
| 111 |
+
|
| 112 |
+
for pos in round_brackets:
|
| 113 |
+
multiply_range(pos, round_bracket_multiplier)
|
| 114 |
+
|
| 115 |
+
for pos in square_brackets:
|
| 116 |
+
multiply_range(pos, square_bracket_multiplier)
|
| 117 |
+
|
| 118 |
+
if len(res) == 0:
|
| 119 |
+
res = [["", 1.0]]
|
| 120 |
+
|
| 121 |
+
i = 0
|
| 122 |
+
while i + 1 < len(res):
|
| 123 |
+
if res[i][1] == res[i + 1][1]:
|
| 124 |
+
res[i][0] += res[i + 1][0]
|
| 125 |
+
res.pop(i + 1)
|
| 126 |
+
else:
|
| 127 |
+
i += 1
|
| 128 |
+
|
| 129 |
+
return res
|
| 130 |
+
|
| 131 |
+
|
| 132 |
+
def prompt_attention_to_invoke_prompt(attention):
|
| 133 |
+
tokens = []
|
| 134 |
+
for text, weight in attention:
|
| 135 |
+
weight = round(weight, 2)
|
| 136 |
+
if weight == 1.0:
|
| 137 |
+
tokens.append(text)
|
| 138 |
+
elif weight < 1.0:
|
| 139 |
+
if weight < 0.8:
|
| 140 |
+
tokens.append(f"({text}){weight}")
|
| 141 |
+
else:
|
| 142 |
+
tokens.append(f"({text})-" + "-" * int((1.0 - weight) * 10))
|
| 143 |
+
else:
|
| 144 |
+
if weight < 1.3:
|
| 145 |
+
tokens.append(f"({text})" + "+" * int((weight - 1.0) * 10))
|
| 146 |
+
else:
|
| 147 |
+
tokens.append(f"({text}){weight}")
|
| 148 |
+
return "".join(tokens)
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
def merge_embeds(prompt_chanks, compel):
|
| 152 |
+
num_chanks = len(prompt_chanks)
|
| 153 |
+
if num_chanks != 0:
|
| 154 |
+
power_prompt = 1 / (num_chanks * (num_chanks + 1) // 2)
|
| 155 |
+
prompt_embs = compel(prompt_chanks)
|
| 156 |
+
t_list = list(torch.split(prompt_embs, 1, dim=0))
|
| 157 |
+
for i in range(num_chanks):
|
| 158 |
+
t_list[-(i + 1)] = t_list[-(i + 1)] * ((i + 1) * power_prompt)
|
| 159 |
+
prompt_emb = torch.stack(t_list, dim=0).sum(dim=0)
|
| 160 |
+
else:
|
| 161 |
+
prompt_emb = compel('')
|
| 162 |
+
return prompt_emb
|
| 163 |
+
|
| 164 |
+
|
| 165 |
+
def detokenize(chunk, actual_prompt):
|
| 166 |
+
chunk[-1] = chunk[-1].replace('</w>', '')
|
| 167 |
+
chanked_prompt = ''.join(chunk).strip()
|
| 168 |
+
while '</w>' in chanked_prompt:
|
| 169 |
+
if actual_prompt[chanked_prompt.find('</w>')] == ' ':
|
| 170 |
+
chanked_prompt = chanked_prompt.replace('</w>', ' ', 1)
|
| 171 |
+
else:
|
| 172 |
+
chanked_prompt = chanked_prompt.replace('</w>', '', 1)
|
| 173 |
+
actual_prompt = actual_prompt.replace(chanked_prompt, '')
|
| 174 |
+
return chanked_prompt.strip(), actual_prompt.strip()
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def tokenize_line(line, tokenizer):
|
| 178 |
+
actual_prompt = line.lower().strip()
|
| 179 |
+
actual_tokens = tokenizer.tokenize(actual_prompt)
|
| 180 |
+
max_tokens = tokenizer.model_max_length - 2
|
| 181 |
+
comma_token = tokenizer.tokenize(',')[0]
|
| 182 |
+
|
| 183 |
+
chunks = []
|
| 184 |
+
chunk = []
|
| 185 |
+
for item in actual_tokens:
|
| 186 |
+
chunk.append(item)
|
| 187 |
+
if len(chunk) == max_tokens:
|
| 188 |
+
if chunk[-1] != comma_token:
|
| 189 |
+
for i in range(max_tokens - 1, -1, -1):
|
| 190 |
+
if chunk[i] == comma_token:
|
| 191 |
+
actual_chunk, actual_prompt = detokenize(chunk[:i + 1], actual_prompt)
|
| 192 |
+
chunks.append(actual_chunk)
|
| 193 |
+
chunk = chunk[i + 1:]
|
| 194 |
+
break
|
| 195 |
+
else:
|
| 196 |
+
actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
|
| 197 |
+
chunks.append(actual_chunk)
|
| 198 |
+
chunk = []
|
| 199 |
+
else:
|
| 200 |
+
actual_chunk, actual_prompt = detokenize(chunk, actual_prompt)
|
| 201 |
+
chunks.append(actual_chunk)
|
| 202 |
+
chunk = []
|
| 203 |
+
if chunk:
|
| 204 |
+
actual_chunk, _ = detokenize(chunk, actual_prompt)
|
| 205 |
+
chunks.append(actual_chunk)
|
| 206 |
+
|
| 207 |
+
return chunks
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def get_embed_new(prompt, pipeline, compel, only_convert_string=False, compel_process_sd=False):
|
| 211 |
+
if compel_process_sd:
|
| 212 |
+
return merge_embeds(tokenize_line(prompt, pipeline.tokenizer), compel)
|
| 213 |
+
else:
|
| 214 |
+
prompt = prompt.replace("((", "(").replace("))", ")").replace("\\", "\\\\\\")
|
| 215 |
+
|
| 216 |
+
attention = parse_prompt_attention(prompt)
|
| 217 |
+
global_attention_chanks = []
|
| 218 |
+
|
| 219 |
+
for att in attention:
|
| 220 |
+
for chank in att[0].split(','):
|
| 221 |
+
temp_prompt_chanks = tokenize_line(chank, pipeline.tokenizer)
|
| 222 |
+
for small_chank in temp_prompt_chanks:
|
| 223 |
+
temp_dict = {
|
| 224 |
+
"weight": round(att[1], 2),
|
| 225 |
+
"lenght": len(pipeline.tokenizer.tokenize(f'{small_chank},')),
|
| 226 |
+
"prompt": f'{small_chank},'
|
| 227 |
+
}
|
| 228 |
+
global_attention_chanks.append(temp_dict)
|
| 229 |
+
|
| 230 |
+
max_tokens = pipeline.tokenizer.model_max_length - 2
|
| 231 |
+
global_prompt_chanks = []
|
| 232 |
+
current_list = []
|
| 233 |
+
current_length = 0
|
| 234 |
+
for item in global_attention_chanks:
|
| 235 |
+
if current_length + item['lenght'] > max_tokens:
|
| 236 |
+
global_prompt_chanks.append(current_list)
|
| 237 |
+
current_list = [[item['prompt'], item['weight']]]
|
| 238 |
+
current_length = item['lenght']
|
| 239 |
+
else:
|
| 240 |
+
if not current_list:
|
| 241 |
+
current_list.append([item['prompt'], item['weight']])
|
| 242 |
+
else:
|
| 243 |
+
if item['weight'] != current_list[-1][1]:
|
| 244 |
+
current_list.append([item['prompt'], item['weight']])
|
| 245 |
+
else:
|
| 246 |
+
current_list[-1][0] += f" {item['prompt']}"
|
| 247 |
+
current_length += item['lenght']
|
| 248 |
+
if current_list:
|
| 249 |
+
global_prompt_chanks.append(current_list)
|
| 250 |
|
| 251 |
+
if only_convert_string:
|
| 252 |
+
return ' '.join([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chanks])
|
|
|
|
| 253 |
|
| 254 |
+
return merge_embeds([prompt_attention_to_invoke_prompt(i) for i in global_prompt_chanks], compel)
|
| 255 |
|
| 256 |
+
|
| 257 |
+
# =============================================================================
|
| 258 |
+
# Generation
|
| 259 |
+
# =============================================================================
|
| 260 |
@spaces.GPU
|
| 261 |
+
def generate_image(
|
| 262 |
+
model_name,
|
| 263 |
+
prompt,
|
| 264 |
+
negative_prompt,
|
| 265 |
+
use_negative_prompt,
|
| 266 |
+
height,
|
| 267 |
+
width,
|
| 268 |
+
num_inference_steps,
|
| 269 |
+
guidance_scale,
|
| 270 |
+
seed,
|
| 271 |
+
randomize_seed,
|
| 272 |
+
progress=gr.Progress(track_tqdm=True),
|
| 273 |
+
):
|
| 274 |
+
"""Generate an image from the given prompt using the selected model."""
|
| 275 |
if randomize_seed:
|
| 276 |
+
seed = random.randint(0, MAX_SEED)
|
| 277 |
+
seed = int(seed)
|
| 278 |
+
|
| 279 |
+
if model_name == MODEL_NOOBXL:
|
| 280 |
+
generator = torch.Generator().manual_seed(seed)
|
| 281 |
+
compel = Compel(
|
| 282 |
+
tokenizer=[noobxl_pipe.tokenizer, noobxl_pipe.tokenizer_2],
|
| 283 |
+
text_encoder=[noobxl_pipe.text_encoder, noobxl_pipe.text_encoder_2],
|
| 284 |
+
returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
|
| 285 |
+
requires_pooled=[False, True],
|
| 286 |
+
truncate_long_prompts=False,
|
| 287 |
+
)
|
| 288 |
+
if not use_negative_prompt:
|
| 289 |
+
negative_prompt = ""
|
| 290 |
+
conv_prompt = get_embed_new(prompt, noobxl_pipe, compel, only_convert_string=True)
|
| 291 |
+
conv_negative = get_embed_new(negative_prompt, noobxl_pipe, compel, only_convert_string=True)
|
| 292 |
+
conditioning, pooled = compel([conv_prompt, conv_negative])
|
| 293 |
+
|
| 294 |
+
image = noobxl_pipe(
|
| 295 |
+
prompt_embeds=conditioning[0:1],
|
| 296 |
+
pooled_prompt_embeds=pooled[0:1],
|
| 297 |
+
negative_prompt_embeds=conditioning[1:2],
|
| 298 |
+
negative_pooled_prompt_embeds=pooled[1:2],
|
| 299 |
+
width=int(width),
|
| 300 |
+
height=int(height),
|
| 301 |
+
guidance_scale=float(guidance_scale),
|
| 302 |
+
num_inference_steps=int(num_inference_steps),
|
| 303 |
+
generator=generator,
|
| 304 |
+
use_resolution_binning=True,
|
| 305 |
+
).images[0]
|
| 306 |
+
return image, seed
|
| 307 |
+
|
| 308 |
+
# Default: Z-Image-Turbo (guidance-free distilled model)
|
| 309 |
+
generator = torch.Generator("cuda").manual_seed(seed)
|
| 310 |
+
image = zimage_pipe(
|
| 311 |
prompt=prompt,
|
| 312 |
height=int(height),
|
| 313 |
width=int(width),
|
|
|
|
| 315 |
guidance_scale=0.0,
|
| 316 |
generator=generator,
|
| 317 |
).images[0]
|
|
|
|
| 318 |
return image, seed
|
| 319 |
|
| 320 |
+
|
| 321 |
+
# Recommended defaults per model: (steps, guidance, height, width)
|
| 322 |
+
MODEL_DEFAULTS = {
|
| 323 |
+
MODEL_ZIMAGE: dict(steps=9, guidance=0.0, height=1024, width=1024),
|
| 324 |
+
MODEL_NOOBXL: dict(steps=28, guidance=5.0, height=1536, width=1024),
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def apply_model_defaults(model_name):
|
| 329 |
+
"""Update sliders and helper visibility when the model changes."""
|
| 330 |
+
d = MODEL_DEFAULTS[model_name]
|
| 331 |
+
is_noob = model_name == MODEL_NOOBXL
|
| 332 |
+
return (
|
| 333 |
+
gr.update(value=d["steps"]),
|
| 334 |
+
gr.update(value=d["guidance"], interactive=is_noob,
|
| 335 |
+
info="Z-Image-Turbo is guidance-free (forced to 0)" if not is_noob else "Classifier-free guidance"),
|
| 336 |
+
gr.update(value=d["height"]),
|
| 337 |
+
gr.update(value=d["width"]),
|
| 338 |
+
gr.update(visible=is_noob),
|
| 339 |
+
gr.update(visible=is_noob),
|
| 340 |
+
)
|
| 341 |
+
|
| 342 |
+
|
| 343 |
+
# Example prompts (work well across both models)
|
| 344 |
examples = [
|
| 345 |
["Young Chinese woman in red Hanfu, intricate embroidery. Impeccable makeup, red floral forehead pattern. Elaborate high bun, golden phoenix headdress, red flowers, beads. Holds round folding fan with lady, trees, bird. Neon lightning-bolt lamp, bright yellow glow, above extended left palm. Soft-lit outdoor night background, silhouetted tiered pagoda, blurred colorful distant lights."],
|
| 346 |
["A majestic dragon soaring through clouds at sunset, scales shimmering with iridescent colors, detailed fantasy art style"],
|
| 347 |
["Cozy coffee shop interior, warm lighting, rain on windows, plants on shelves, vintage aesthetic, photorealistic"],
|
| 348 |
+
["1girl, nahida (genshin impact), white dress, green hair, looking at viewer, masterpiece, best quality, very aesthetic"],
|
| 349 |
["Portrait of a wise old wizard with a long white beard, holding a glowing crystal staff, magical forest background"],
|
| 350 |
]
|
| 351 |
|
|
|
|
| 364 |
block_title_text_weight="600",
|
| 365 |
)
|
| 366 |
|
| 367 |
+
APP_CSS = """
|
| 368 |
+
.header-text h1 {
|
| 369 |
+
font-size: 2.5rem !important;
|
| 370 |
+
font-weight: 700 !important;
|
| 371 |
+
margin-bottom: 0.5rem !important;
|
| 372 |
+
background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
|
| 373 |
+
-webkit-background-clip: text;
|
| 374 |
+
-webkit-text-fill-color: transparent;
|
| 375 |
+
background-clip: text;
|
| 376 |
+
}
|
| 377 |
+
.header-text p {
|
| 378 |
+
font-size: 1.1rem !important;
|
| 379 |
+
color: #64748b !important;
|
| 380 |
+
margin-top: 0 !important;
|
| 381 |
+
}
|
| 382 |
+
.footer-text { padding: 1rem 0; }
|
| 383 |
+
.footer-text a {
|
| 384 |
+
color: #f59e0b !important;
|
| 385 |
+
text-decoration: none !important;
|
| 386 |
+
font-weight: 500;
|
| 387 |
+
}
|
| 388 |
+
.footer-text a:hover { text-decoration: underline !important; }
|
| 389 |
+
@media (max-width: 768px) {
|
| 390 |
+
.header-text h1 { font-size: 1.8rem !important; }
|
| 391 |
+
.header-text p { font-size: 1rem !important; }
|
| 392 |
+
}
|
| 393 |
+
button, .gr-button { transition: all 0.2s ease !important; }
|
| 394 |
+
button:hover, .gr-button:hover {
|
| 395 |
+
transform: translateY(-1px);
|
| 396 |
+
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15) !important;
|
| 397 |
+
}
|
| 398 |
+
.gradio-container {
|
| 399 |
+
max-width: 1400px !important;
|
| 400 |
+
margin: 0 auto !important;
|
| 401 |
+
}
|
| 402 |
+
"""
|
| 403 |
+
|
| 404 |
+
# Build the Gradio interface. In Gradio 6.x, theme/css/footer_links/mcp_server
|
| 405 |
+
# are arguments to demo.launch() (see bottom of file), not to Blocks().
|
| 406 |
with gr.Blocks(fill_height=True) as demo:
|
|
|
|
| 407 |
gr.Markdown(
|
| 408 |
"""
|
| 409 |
+
# π¨ Image Studio
|
| 410 |
+
**Ultra-fast AI image generation** β’ Choose a model and create stunning images
|
| 411 |
""",
|
| 412 |
elem_classes="header-text"
|
| 413 |
)
|
| 414 |
+
|
| 415 |
with gr.Row(equal_height=False):
|
| 416 |
# Left column - Input controls
|
| 417 |
with gr.Column(scale=1, min_width=320):
|
| 418 |
+
model_name = gr.Dropdown(
|
| 419 |
+
choices=MODEL_CHOICES,
|
| 420 |
+
value=MODEL_ZIMAGE,
|
| 421 |
+
label="π§ Model",
|
| 422 |
+
info="Z-Image-Turbo: fast & general β’ NoobAI-XL: anime / illustration",
|
| 423 |
+
)
|
| 424 |
+
|
| 425 |
prompt = gr.Textbox(
|
| 426 |
label="β¨ Your Prompt",
|
| 427 |
placeholder="Describe the image you want to create...",
|
|
|
|
| 429 |
max_lines=10,
|
| 430 |
autofocus=True,
|
| 431 |
)
|
| 432 |
+
|
| 433 |
with gr.Accordion("βοΈ Advanced Settings", open=False):
|
| 434 |
+
use_negative_prompt = gr.Checkbox(
|
| 435 |
+
label="Use Negative Prompt",
|
| 436 |
+
value=True,
|
| 437 |
+
visible=False, # NoobXL only
|
| 438 |
+
)
|
| 439 |
+
negative_prompt = gr.Textbox(
|
| 440 |
+
label="π« Negative Prompt",
|
| 441 |
+
value=NOOBXL_NEGATIVE,
|
| 442 |
+
lines=3,
|
| 443 |
+
max_lines=6,
|
| 444 |
+
visible=False, # NoobXL only
|
| 445 |
+
)
|
| 446 |
+
|
| 447 |
with gr.Row():
|
| 448 |
height = gr.Slider(
|
| 449 |
+
minimum=512, maximum=2048, value=1024, step=64,
|
| 450 |
+
label="Height", info="Image height in pixels",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 451 |
)
|
| 452 |
width = gr.Slider(
|
| 453 |
+
minimum=512, maximum=2048, value=1024, step=64,
|
| 454 |
+
label="Width", info="Image width in pixels",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 455 |
)
|
| 456 |
+
|
| 457 |
num_inference_steps = gr.Slider(
|
| 458 |
+
minimum=1, maximum=50, value=9, step=1,
|
|
|
|
|
|
|
|
|
|
| 459 |
label="Inference Steps",
|
| 460 |
+
info="Z-Image: ~9 β’ NoobAI-XL: ~28",
|
| 461 |
)
|
| 462 |
+
|
| 463 |
+
guidance_scale = gr.Slider(
|
| 464 |
+
minimum=0.0, maximum=10.0, value=0.0, step=0.1,
|
| 465 |
+
label="Guidance Scale",
|
| 466 |
+
info="Z-Image-Turbo is guidance-free (forced to 0)",
|
| 467 |
+
interactive=False,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 468 |
)
|
| 469 |
+
|
| 470 |
+
with gr.Row():
|
| 471 |
+
randomize_seed = gr.Checkbox(label="π² Random Seed", value=True)
|
| 472 |
+
seed = gr.Number(label="Seed", value=42, precision=0, minimum=0, maximum=MAX_SEED)
|
| 473 |
+
|
| 474 |
generate_btn = gr.Button(
|
| 475 |
+
"π Generate Image", variant="primary", size="lg", scale=1
|
|
|
|
|
|
|
|
|
|
| 476 |
)
|
| 477 |
+
|
|
|
|
| 478 |
gr.Examples(
|
| 479 |
examples=examples,
|
| 480 |
inputs=[prompt],
|
| 481 |
label="π‘ Try these prompts",
|
| 482 |
examples_per_page=5,
|
| 483 |
)
|
| 484 |
+
|
| 485 |
# Right column - Output
|
| 486 |
with gr.Column(scale=1, min_width=320):
|
| 487 |
output_image = gr.Image(
|
|
|
|
| 492 |
height=600,
|
| 493 |
buttons=["download", "share"],
|
| 494 |
)
|
|
|
|
| 495 |
used_seed = gr.Number(
|
| 496 |
+
label="π² Seed Used", interactive=False, container=True,
|
|
|
|
|
|
|
| 497 |
)
|
| 498 |
+
|
|
|
|
| 499 |
gr.Markdown(
|
| 500 |
"""
|
| 501 |
---
|
| 502 |
<div style="text-align: center; opacity: 0.7; font-size: 0.9em; margin-top: 1rem;">
|
| 503 |
+
<strong>Models:</strong>
|
| 504 |
+
<a href="https://huggingface.co/Tongyi-MAI/Z-Image-Turbo" target="_blank">Z-Image-Turbo</a> β’
|
| 505 |
+
<a href="https://huggingface.co/Menyu/noobaiXLNAIXL_epsilonPred11Version" target="_blank">NoobAI-XL v1.1</a> β’
|
| 506 |
+
<strong>Demo by:</strong> <a href="https://x.com/realmrfakename" target="_blank">@mrfakename</a>
|
| 507 |
</div>
|
| 508 |
""",
|
| 509 |
elem_classes="footer-text"
|
| 510 |
)
|
| 511 |
+
|
| 512 |
+
# Update sliders / helper visibility when the model changes
|
| 513 |
+
model_name.change(
|
| 514 |
+
fn=apply_model_defaults,
|
| 515 |
+
inputs=[model_name],
|
| 516 |
+
outputs=[num_inference_steps, guidance_scale, height, width,
|
| 517 |
+
use_negative_prompt, negative_prompt],
|
| 518 |
+
)
|
| 519 |
+
|
| 520 |
+
gen_inputs = [
|
| 521 |
+
model_name, prompt, negative_prompt, use_negative_prompt,
|
| 522 |
+
height, width, num_inference_steps, guidance_scale, seed, randomize_seed,
|
| 523 |
+
]
|
| 524 |
+
|
| 525 |
generate_btn.click(
|
| 526 |
+
fn=generate_image, inputs=gen_inputs, outputs=[output_image, used_seed],
|
|
|
|
|
|
|
| 527 |
)
|
|
|
|
|
|
|
| 528 |
prompt.submit(
|
| 529 |
+
fn=generate_image, inputs=gen_inputs, outputs=[output_image, used_seed],
|
|
|
|
|
|
|
| 530 |
)
|
| 531 |
|
| 532 |
if __name__ == "__main__":
|
| 533 |
demo.launch(
|
| 534 |
theme=custom_theme,
|
| 535 |
+
css=APP_CSS,
|
| 536 |
+
footer_links=["api", "gradio"],
|
| 537 |
+
mcp_server=True,
|
| 538 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
-
gradio
|
| 2 |
git+https://github.com/huggingface/diffusers
|
| 3 |
transformers
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio[mcp]
|
| 2 |
git+https://github.com/huggingface/diffusers
|
| 3 |
transformers
|
| 4 |
+
accelerate
|
| 5 |
+
compel
|
| 6 |
+
sentencepiece
|
| 7 |
+
numpy
|