text-to-img / app.py
hydffgg's picture
Update app.py
05d2197 verified
import gradio as gr
import numpy as np
import random
from diffusers import DiffusionPipeline, EulerAncestralDiscreteScheduler
import torch
# ── Device & model setup ────
device = "cuda" if torch.cuda.is_available() else "cpu"
model_repo_id = "Tongyi-MAI/Z-Image-Turbo"
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
pipe = DiffusionPipeline.from_pretrained(model_repo_id, torch_dtype=torch_dtype)
# FIX: Turbo models work best with EulerAncestralDiscreteScheduler
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
pipe = pipe.to(device)
MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 1024
# FIX: Z-Image-Turbo is a distilled model β€” it does NOT support negative_prompt.
# Passing negative_prompt to a distilled pipeline raises an error because
# it skips the unconditional forward pass that CFG relies on.
# We probe at startup so we know which call signature to use.
_SUPPORTS_NEG_PROMPT: bool = False
try:
_sig = pipe.__call__.__code__.co_varnames
_SUPPORTS_NEG_PROMPT = "negative_prompt" in _sig
except Exception:
pass
DEFAULT_NEGATIVE_PROMPT = (
"anime, cartoon, illustration, drawing, painting, sketch, "
"extra people, missing people, wrong number of people, "
"bad anatomy, deformed, low quality, blurry, watermark"
)
TURBO_STEPS_DEFAULT = 4
TURBO_GUIDANCE_DEFAULT = 0.0
# @spaces.GPU
def infer(
prompt,
negative_prompt,
seed,
randomize_seed,
width,
height,
guidance_scale,
num_inference_steps,
progress=gr.Progress(track_tqdm=True),
):
if not prompt.strip():
raise gr.Error("Please enter a prompt before generating.")
if randomize_seed:
seed = random.randint(0, MAX_SEED)
generator = torch.Generator().manual_seed(seed)
# Build kwargs β€” only include negative_prompt if the pipeline supports it
call_kwargs = dict(
prompt=prompt,
guidance_scale=guidance_scale,
num_inference_steps=num_inference_steps,
width=width,
height=height,
generator=generator,
)
if _SUPPORTS_NEG_PROMPT:
combined_negative = DEFAULT_NEGATIVE_PROMPT
if negative_prompt.strip():
combined_negative = f"{negative_prompt}, {DEFAULT_NEGATIVE_PROMPT}"
call_kwargs["negative_prompt"] = combined_negative
try:
image = pipe(**call_kwargs).images[0]
except TypeError as e:
# Fallback: if negative_prompt still slips through and causes TypeError,
# retry without it
if "negative_prompt" in str(e):
call_kwargs.pop("negative_prompt", None)
image = pipe(**call_kwargs).images[0]
else:
raise gr.Error(f"Generation failed: {e}")
except Exception as e:
raise gr.Error(f"Generation failed: {e}")
return image, seed
examples = [
"Astronaut in a jungle, cold color palette, muted colors, detailed, 8k",
"An astronaut riding a green horse",
"A delicious ceviche cheesecake slice",
"Two people shaking hands in an office, photorealistic",
"A family of four sitting at a dinner table, realistic photo",
]
# ── CSS ───────────────────────────────────────────────────────────────────────
css = """
@import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;600;700;800&family=DM+Sans:ital,opsz,wght@0,9..40,300;0,9..40,400;0,9..40,500;1,9..40,300&display=swap');
:root {
--bg: #0a0a0b;
--surface: #111114;
--surface2: #18181d;
--border: #2a2a35;
--accent: #e8c547;
--accent2: #f0a030;
--text: #e8e8ec;
--muted: #6b6b7a;
--r: 12px;
}
*, *::before, *::after { box-sizing: border-box; }
body, .gradio-container {
background: var(--bg) !important;
font-family: 'DM Sans', sans-serif !important;
color: var(--text) !important;
min-height: 100vh;
}
body::before {
content: '';
position: fixed;
inset: 0;
background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 200 200' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)' opacity='0.03'/%3E%3C/svg%3E");
pointer-events: none;
z-index: 9999;
opacity: .4;
}
#main-wrap {
max-width: 800px;
margin: 0 auto;
padding: 48px 24px 100px;
}
/* ── Header ── */
#app-header { text-align: center; margin-bottom: 52px; }
#app-header .line {
width: 1px; height: 56px;
background: linear-gradient(to bottom, transparent, var(--accent));
margin: 0 auto 28px;
}
#app-header .badge {
display: inline-flex; align-items: center; gap: 6px;
font-size: 11px; font-weight: 500; letter-spacing: .18em;
text-transform: uppercase; color: var(--accent);
border: 1px solid rgba(232,197,71,.35);
background: rgba(232,197,71,.06);
padding: 5px 16px; border-radius: 100px; margin-bottom: 22px;
}
#app-header h1 {
font-family: 'Syne', sans-serif !important;
font-size: clamp(2.2rem, 5.5vw, 3.6rem) !important;
font-weight: 800 !important; line-height: 1.08 !important;
letter-spacing: -.025em !important; color: var(--text) !important;
margin: 0 0 12px !important; padding: 0 !important;
}
#app-header .gold {
background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%);
-webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text;
}
#app-header p {
color: var(--muted) !important; font-size: 15px !important;
font-weight: 300 !important; margin: 0 !important; line-height: 1.6 !important;
}
#turbo-pill {
display: inline-flex; align-items: center; gap: 5px; margin-top: 14px;
font-size: 11px; color: #5fde8a;
background: rgba(95,222,138,.08); border: 1px solid rgba(95,222,138,.25);
padding: 4px 12px; border-radius: 100px;
letter-spacing: .1em; text-transform: uppercase; font-weight: 500;
}
#turbo-pill::before { content: '⚑'; font-size: 12px; }
/* ── Prompt area ── */
#prompt-shell {
background: var(--surface); border: 1px solid var(--border);
border-radius: var(--r); padding: 6px;
display: flex; gap: 8px; align-items: stretch;
transition: border-color .2s, box-shadow .2s; margin-bottom: 12px;
}
#prompt-shell:focus-within {
border-color: var(--accent);
box-shadow: 0 0 0 3px rgba(232,197,71,.07);
}
#prompt-box, #prompt-box > label, #prompt-box > div {
background: transparent !important; border: none !important;
box-shadow: none !important; flex: 1;
}
#prompt-box textarea {
background: transparent !important; border: none !important;
box-shadow: none !important; color: var(--text) !important;
font-family: 'DM Sans', sans-serif !important; font-size: 15.5px !important;
padding: 12px 14px !important; resize: none !important;
}
#prompt-box textarea::placeholder { color: var(--muted) !important; }
/* ── Run button ── */
#run-btn {
background: linear-gradient(135deg, var(--accent) 0%, var(--accent2) 100%) !important;
color: #08080a !important; border: none !important; border-radius: 8px !important;
font-family: 'Syne', sans-serif !important; font-weight: 700 !important;
font-size: 12.5px !important; letter-spacing: .08em !important;
text-transform: uppercase !important; padding: 0 24px !important;
min-width: 110px; height: auto; align-self: stretch;
transition: opacity .15s, transform .1s !important; cursor: pointer !important;
}
#run-btn:hover { opacity: .84 !important; transform: translateY(-1px) !important; }
#run-btn:active { transform: scale(.98) translateY(0) !important; }
/* ── Output image ── */
#result-img {
border-radius: var(--r) !important; border: 1px solid var(--border) !important;
background: var(--surface) !important; overflow: hidden !important;
min-height: 320px !important; margin-top: 8px;
}
#result-img img { border-radius: var(--r) !important; display: block; width: 100%; }
/* ── Accordion ── */
.gr-accordion {
background: var(--surface) !important; border: 1px solid var(--border) !important;
border-radius: var(--r) !important; overflow: hidden !important; margin-top: 16px !important;
}
.gr-accordion > button, .gr-accordion .label-wrap {
padding: 14px 20px !important; font-family: 'Syne', sans-serif !important;
font-size: 12px !important; font-weight: 700 !important;
letter-spacing: .1em !important; text-transform: uppercase !important;
color: var(--muted) !important; background: transparent !important;
}
.gr-accordion > button:hover, .gr-accordion .label-wrap:hover { color: var(--text) !important; }
.gr-accordion .wrap { padding: 4px 20px 20px !important; }
label span, .gr-slider label span {
font-size: 11.5px !important; font-weight: 500 !important;
letter-spacing: .08em !important; text-transform: uppercase !important; color: var(--muted) !important;
}
input[type=range] { accent-color: var(--accent) !important; }
/* ── Turbo hint ── */
#turbo-hint p {
font-size: 12px !important; color: #5fde8a !important;
border-left: 2px solid rgba(95,222,138,.3);
padding-left: 10px !important; margin: 0 0 14px !important;
}
/* ── Neg prompt warning ── */
#neg-warning p {
font-size: 12px !important; color: #e8824a !important;
border-left: 2px solid rgba(232,130,74,.3);
padding-left: 10px !important; margin: 0 0 10px !important;
}
#neg-input textarea {
background: var(--surface2) !important; border: 1px solid var(--border) !important;
border-radius: 8px !important; color: var(--text) !important;
font-family: 'DM Sans', sans-serif !important; font-size: 13.5px !important;
transition: border-color .2s !important;
}
#neg-input textarea:focus {
border-color: var(--accent) !important;
box-shadow: 0 0 0 2px rgba(232,197,71,.07) !important;
}
input[type=checkbox] { accent-color: var(--accent) !important; }
#neg-info p {
font-size: 12px !important; color: var(--muted) !important;
border-left: 2px solid var(--border); padding-left: 10px !important;
margin: 6px 0 0 !important; font-style: italic;
}
/* ── Examples ── */
.gr-samples-table, table.gr-samples-table { border: none !important; }
.gr-samples-table td, table.gr-samples-table td {
background: var(--surface2) !important; border: 1px solid var(--border) !important;
border-radius: 8px !important; color: var(--muted) !important; font-size: 13px !important;
font-family: 'DM Sans', sans-serif !important; padding: 10px 14px !important;
transition: all .15s !important; cursor: pointer !important;
}
.gr-samples-table td:hover, table.gr-samples-table td:hover {
background: var(--surface) !important; color: var(--text) !important;
border-color: rgba(232,197,71,.5) !important;
}
.hdivider {
height: 1px;
background: linear-gradient(to right, transparent, var(--border), transparent);
margin: 32px 0 28px;
}
#app-footer {
text-align: center; margin-top: 72px; font-size: 12px;
color: var(--muted); letter-spacing: .04em;
}
#app-footer::before {
content: ''; display: block; width: 1px; height: 48px;
background: linear-gradient(to top, transparent, var(--border));
margin: 0 auto 20px;
}
"""
# ── Build UI ──────────────────────────────────────────────────────────────────
_neg_warning = (
"⚠️ **Negative prompt is disabled** β€” Z-Image-Turbo is a distilled model "
"that does not support negative prompts. The field below is ignored."
if not _SUPPORTS_NEG_PROMPT else
"✦ Negative prompt is **supported** by this pipeline."
)
with gr.Blocks(css=css, title="Imagine Β· Z-Image-Turbo") as demo:
with gr.Column(elem_id="main-wrap"):
gr.HTML("""
<div id="app-header">
<div class="line"></div>
<div class="badge"><span>✦</span> Z-Image-Turbo · Tongyi-MAI</div>
<h1>Transform words<br>into <span class="gold">visual worlds</span></h1>
<p>Type a description β€” get a photorealistic image in seconds.</p>
<div id="turbo-pill">Turbo mode Β· 4-step generation</div>
</div>
""")
with gr.Row(elem_id="prompt-shell"):
prompt = gr.Text(
label="", show_label=False, max_lines=1,
placeholder="A lone wolf standing on a cliff at golden hour, cinematic 8k…",
container=False, elem_id="prompt-box",
)
run_button = gr.Button("Generate β†—", elem_id="run-btn", scale=0, variant="primary")
result = gr.Image(label="", show_label=False, elem_id="result-img")
with gr.Accordion("βš™ Advanced Controls", open=False):
gr.Markdown(
"⚑ **Turbo model** β€” optimal at 4–8 steps, guidance scale = 0.0. "
"Higher values may degrade quality.",
elem_id="turbo-hint",
)
# FIX: Warn user if negative_prompt is not supported
gr.Markdown(_neg_warning, elem_id="neg-warning")
negative_prompt = gr.Text(
label="Additional negative terms (may be ignored β€” see note above)",
max_lines=2,
placeholder="ugly, oversaturated…",
elem_id="neg-input",
interactive=_SUPPORTS_NEG_PROMPT, # disable input if not supported
)
with gr.Row():
seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
with gr.Row():
width = gr.Slider(label="Width", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
height = gr.Slider(label="Height", minimum=256, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
with gr.Row():
guidance_scale = gr.Slider(
label="Guidance scale", minimum=0.0, maximum=10.0, step=0.1,
value=TURBO_GUIDANCE_DEFAULT,
)
num_inference_steps = gr.Slider(
label="Inference steps", minimum=1, maximum=50, step=1,
value=TURBO_STEPS_DEFAULT,
)
gr.HTML('<div class="hdivider"></div>')
gr.Examples(examples=examples, inputs=[prompt], label="✦ Try an example")
gr.HTML('<div id="app-footer">Powered by Z-Image-Turbo Β· Tongyi-MAI</div>')
gr.on(
triggers=[run_button.click, prompt.submit],
fn=infer,
inputs=[
prompt, negative_prompt, seed, randomize_seed,
width, height, guidance_scale, num_inference_steps,
],
outputs=[result, seed],
)
if __name__ == "__main__":
demo.launch()