Spaces:
Sleeping
Sleeping
feat: simplify code
Browse files
app.py
CHANGED
|
@@ -14,24 +14,17 @@ import gradio as gr
|
|
| 14 |
import torch
|
| 15 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 16 |
|
| 17 |
-
from prompt_check import is_unsafe_prompt
|
| 18 |
-
|
| 19 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 20 |
|
| 21 |
from diffusers import ZImagePipeline
|
| 22 |
from diffusers.models.transformers.transformer_z_image import ZImageTransformer2DModel
|
| 23 |
|
| 24 |
-
from pe import prompt_template
|
| 25 |
-
|
| 26 |
# ==================== Environment Variables ==================================
|
| 27 |
MODEL_PATH = os.environ.get("MODEL_PATH", "Tongyi-MAI/Z-Image-Turbo")
|
| 28 |
ENABLE_COMPILE = os.environ.get("ENABLE_COMPILE", "true").lower() == "true"
|
| 29 |
ENABLE_WARMUP = os.environ.get("ENABLE_WARMUP", "true").lower() == "true"
|
| 30 |
ATTENTION_BACKEND = os.environ.get("ATTENTION_BACKEND", "flash_3")
|
| 31 |
-
UNSAFE_MAX_NEW_TOKEN = int(os.environ.get("UNSAFE_MAX_NEW_TOKEN", "10"))
|
| 32 |
-
DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY")
|
| 33 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
| 34 |
-
UNSAFE_PROMPT_CHECK = os.environ.get("UNSAFE_PROMPT_CHECK")
|
| 35 |
# =============================================================================
|
| 36 |
|
| 37 |
|
|
@@ -79,6 +72,19 @@ RES_CHOICES = {
|
|
| 79 |
"2016x864 ( 21:9 )",
|
| 80 |
"864x2016 ( 9:21 )",
|
| 81 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
}
|
| 83 |
|
| 84 |
RESOLUTION_SET = []
|
|
@@ -86,27 +92,33 @@ for resolutions in RES_CHOICES.values():
|
|
| 86 |
RESOLUTION_SET.extend(resolutions)
|
| 87 |
|
| 88 |
EXAMPLE_PROMPTS = [
|
| 89 |
-
["
|
| 90 |
-
[
|
| 91 |
-
"极具氛围感的暗调人像,一位优雅的中国美女在黑暗的房间里。一束强光通过遮光板,在她的脸上投射出一个清晰的闪电形状的光影,正好照亮一只眼睛。高对比度,明暗交界清晰,神秘感,莱卡相机色调。"
|
| 92 |
-
],
|
| 93 |
-
[
|
| 94 |
-
"一张中景手机自拍照片拍摄了一位留着长黑发的年轻东亚女子在灯光明亮的电梯内对着镜子自拍。她穿着一件带有白色花朵图案的黑色露肩短上衣和深色牛仔裤。她的头微微倾斜,嘴唇嘟起做亲吻状,非常可爱俏皮。她右手拿着一部深灰色智能手机,遮住了部分脸,后置摄像头镜头对着镜子"
|
| 95 |
-
],
|
| 96 |
-
[
|
| 97 |
-
"Young Chinese woman in red Hanfu, intricate embroidery. Impeccable makeup, red floral forehead pattern. Elaborate high bun, golden phoenix headdress, red flowers, beads. Holds round folding fan with lady, trees, bird. Neon lightning-bolt lamp (⚡️), bright yellow glow, above extended left palm. Soft-lit outdoor night background, silhouetted tiered pagoda (西安大雁塔), blurred colorful distant lights."
|
| 98 |
-
],
|
| 99 |
[
|
| 100 |
-
|
| 101 |
],
|
| 102 |
[
|
| 103 |
-
"
|
| 104 |
],
|
| 105 |
[
|
| 106 |
-
"
|
| 107 |
-
]
|
| 108 |
]
|
| 109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def get_resolution(resolution):
|
| 112 |
match = re.search(r"(\d+)\s*[×x]\s*(\d+)", resolution)
|
|
@@ -185,16 +197,6 @@ def load_models(model_path, enable_compile=False, attention_backend="native"):
|
|
| 185 |
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs", fullgraph=False)
|
| 186 |
|
| 187 |
pipe.to("cuda", torch.bfloat16)
|
| 188 |
-
|
| 189 |
-
# from diffusers.pipelines.stable_diffusion import StableDiffusionSafetyChecker
|
| 190 |
-
# from transformers import CLIPImageProcessor
|
| 191 |
-
|
| 192 |
-
# safety_model_id = "CompVis/stable-diffusion-safety-checker"
|
| 193 |
-
# safety_feature_extractor = CLIPImageProcessor.from_pretrained(safety_model_id)
|
| 194 |
-
# safety_checker = StableDiffusionSafetyChecker.from_pretrained(safety_model_id, torch_dtype=torch.float16).to("cuda")
|
| 195 |
-
|
| 196 |
-
# pipe.safety_feature_extractor = safety_feature_extractor
|
| 197 |
-
# pipe.safety_checker = safety_checker
|
| 198 |
return pipe
|
| 199 |
|
| 200 |
|
|
@@ -252,104 +254,11 @@ def warmup_model(pipe, resolutions):
|
|
| 252 |
print("Warmup completed.")
|
| 253 |
|
| 254 |
|
| 255 |
-
# ==================== Prompt Expander ====================
|
| 256 |
-
@dataclass
|
| 257 |
-
class PromptOutput:
|
| 258 |
-
status: bool
|
| 259 |
-
prompt: str
|
| 260 |
-
seed: int
|
| 261 |
-
system_prompt: str
|
| 262 |
-
message: str
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
class PromptExpander:
|
| 266 |
-
def __init__(self, backend="api", **kwargs):
|
| 267 |
-
self.backend = backend
|
| 268 |
-
|
| 269 |
-
def decide_system_prompt(self, template_name=None):
|
| 270 |
-
return prompt_template
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
class APIPromptExpander(PromptExpander):
|
| 274 |
-
def __init__(self, api_config=None, **kwargs):
|
| 275 |
-
super().__init__(backend="api", **kwargs)
|
| 276 |
-
self.api_config = api_config or {}
|
| 277 |
-
self.client = self._init_api_client()
|
| 278 |
-
|
| 279 |
-
def _init_api_client(self):
|
| 280 |
-
try:
|
| 281 |
-
from openai import OpenAI
|
| 282 |
-
|
| 283 |
-
api_key = self.api_config.get("api_key") or DASHSCOPE_API_KEY
|
| 284 |
-
base_url = self.api_config.get("base_url", "https://dashscope.aliyuncs.com/compatible-mode/v1")
|
| 285 |
-
|
| 286 |
-
if not api_key:
|
| 287 |
-
print("Warning: DASHSCOPE_API_KEY not found.")
|
| 288 |
-
return None
|
| 289 |
-
|
| 290 |
-
return OpenAI(api_key=api_key, base_url=base_url)
|
| 291 |
-
except ImportError:
|
| 292 |
-
print("Please install openai: pip install openai")
|
| 293 |
-
return None
|
| 294 |
-
except Exception as e:
|
| 295 |
-
print(f"Failed to initialize API client: {e}")
|
| 296 |
-
return None
|
| 297 |
-
|
| 298 |
-
def __call__(self, prompt, system_prompt=None, seed=-1, **kwargs):
|
| 299 |
-
return self.extend(prompt, system_prompt, seed, **kwargs)
|
| 300 |
-
|
| 301 |
-
def extend(self, prompt, system_prompt=None, seed=-1, **kwargs):
|
| 302 |
-
if self.client is None:
|
| 303 |
-
return PromptOutput(False, "", seed, system_prompt, "API client not initialized")
|
| 304 |
-
|
| 305 |
-
if system_prompt is None:
|
| 306 |
-
system_prompt = self.decide_system_prompt()
|
| 307 |
-
|
| 308 |
-
if "{prompt}" in system_prompt:
|
| 309 |
-
system_prompt = system_prompt.format(prompt=prompt)
|
| 310 |
-
prompt = " "
|
| 311 |
-
|
| 312 |
-
try:
|
| 313 |
-
model = self.api_config.get("model", "qwen3-max-preview")
|
| 314 |
-
response = self.client.chat.completions.create(
|
| 315 |
-
model=model,
|
| 316 |
-
messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}],
|
| 317 |
-
temperature=0.7,
|
| 318 |
-
top_p=0.8,
|
| 319 |
-
)
|
| 320 |
-
|
| 321 |
-
content = response.choices[0].message.content
|
| 322 |
-
json_start = content.find("```json")
|
| 323 |
-
if json_start != -1:
|
| 324 |
-
json_end = content.find("```", json_start + 7)
|
| 325 |
-
try:
|
| 326 |
-
json_str = content[json_start + 7 : json_end].strip()
|
| 327 |
-
data = json.loads(json_str)
|
| 328 |
-
expanded_prompt = data.get("revised_prompt", content)
|
| 329 |
-
except:
|
| 330 |
-
expanded_prompt = content
|
| 331 |
-
else:
|
| 332 |
-
expanded_prompt = content
|
| 333 |
-
|
| 334 |
-
return PromptOutput(
|
| 335 |
-
status=True, prompt=expanded_prompt, seed=seed, system_prompt=system_prompt, message=content
|
| 336 |
-
)
|
| 337 |
-
except Exception as e:
|
| 338 |
-
return PromptOutput(False, "", seed, system_prompt, str(e))
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
def create_prompt_expander(backend="api", **kwargs):
|
| 342 |
-
if backend == "api":
|
| 343 |
-
return APIPromptExpander(**kwargs)
|
| 344 |
-
raise ValueError("Only 'api' backend is supported.")
|
| 345 |
-
|
| 346 |
-
|
| 347 |
pipe = None
|
| 348 |
-
prompt_expander = None
|
| 349 |
|
| 350 |
|
| 351 |
def init_app():
|
| 352 |
-
global pipe
|
| 353 |
|
| 354 |
try:
|
| 355 |
pipe = load_models(MODEL_PATH, enable_compile=ENABLE_COMPILE, attention_backend=ATTENTION_BACKEND)
|
|
@@ -365,66 +274,39 @@ def init_app():
|
|
| 365 |
print(f"Error loading model: {e}")
|
| 366 |
pipe = None
|
| 367 |
|
| 368 |
-
try:
|
| 369 |
-
prompt_expander = create_prompt_expander(backend="api", api_config={"model": "qwen3-max-preview"})
|
| 370 |
-
print("Prompt expander initialized.")
|
| 371 |
-
except Exception as e:
|
| 372 |
-
print(f"Error initializing prompt expander: {e}")
|
| 373 |
-
prompt_expander = None
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
def prompt_enhance(prompt, enable_enhance):
|
| 377 |
-
if not enable_enhance or not prompt_expander:
|
| 378 |
-
return prompt, "Enhancement disabled or not available."
|
| 379 |
-
|
| 380 |
-
if not prompt.strip():
|
| 381 |
-
return "", "Please enter a prompt."
|
| 382 |
-
|
| 383 |
-
try:
|
| 384 |
-
result = prompt_expander(prompt)
|
| 385 |
-
if result.status:
|
| 386 |
-
return result.prompt, result.message
|
| 387 |
-
else:
|
| 388 |
-
return prompt, f"Enhancement failed: {result.message}"
|
| 389 |
-
except Exception as e:
|
| 390 |
-
return prompt, f"Error: {str(e)}"
|
| 391 |
-
|
| 392 |
|
| 393 |
@spaces.GPU
|
| 394 |
def generate(
|
| 395 |
prompt,
|
|
|
|
| 396 |
resolution="1024x1024 ( 1:1 )",
|
| 397 |
seed=42,
|
| 398 |
steps=9,
|
| 399 |
shift=3.0,
|
| 400 |
random_seed=True,
|
| 401 |
gallery_images=None,
|
| 402 |
-
enhance=False,
|
| 403 |
progress=gr.Progress(track_tqdm=True),
|
| 404 |
):
|
| 405 |
"""
|
| 406 |
Generate an image using the Z-Image model based on the provided prompt and settings.
|
| 407 |
|
| 408 |
-
This function is triggered when the user clicks the "Generate" button.
|
| 409 |
-
the
|
| 410 |
produces an image using the Z-Image diffusion transformer pipeline.
|
| 411 |
|
| 412 |
Args:
|
| 413 |
prompt (str): Text prompt describing the desired image content
|
| 414 |
-
|
|
|
|
| 415 |
seed (int): Seed for reproducible generation
|
| 416 |
steps (int): Number of inference steps for the diffusion process
|
| 417 |
shift (float): Time shift parameter for the flow matching scheduler
|
| 418 |
-
random_seed (bool): Whether to generate a new random seed
|
| 419 |
-
gallery_images (list): List of previously generated images
|
| 420 |
-
|
| 421 |
-
progress (gr.Progress): Gradio progress tracker for displaying generation progress (only needed for the Gradio UI)
|
| 422 |
|
| 423 |
Returns:
|
| 424 |
tuple: (gallery_images, seed_str, seed_int)
|
| 425 |
-
- gallery_images: Updated list of generated images including the new image
|
| 426 |
-
- seed_str: String representation of the seed used for generation
|
| 427 |
-
- seed_int: Integer representation of the seed used for generation
|
| 428 |
"""
|
| 429 |
|
| 430 |
if random_seed:
|
|
@@ -432,52 +314,35 @@ def generate(
|
|
| 432 |
else:
|
| 433 |
new_seed = seed if seed != -1 else random.randint(1, 1000000)
|
| 434 |
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
# )
|
| 449 |
-
# if has_unsafe_concept:
|
| 450 |
-
# raise UnsafeContentError("Input unsafe")
|
| 451 |
-
|
| 452 |
-
final_prompt = prompt
|
| 453 |
-
|
| 454 |
-
if enhance:
|
| 455 |
-
final_prompt, _ = prompt_enhance(prompt, True)
|
| 456 |
-
print(f"Enhanced prompt: {final_prompt}")
|
| 457 |
-
|
| 458 |
-
try:
|
| 459 |
-
resolution_str = resolution.split(" ")[0]
|
| 460 |
-
except:
|
| 461 |
-
resolution_str = "1024x1024"
|
| 462 |
-
|
| 463 |
-
image = generate_image(
|
| 464 |
-
pipe=pipe,
|
| 465 |
-
prompt=final_prompt,
|
| 466 |
-
resolution=resolution_str,
|
| 467 |
-
seed=new_seed,
|
| 468 |
-
guidance_scale=0.0,
|
| 469 |
-
num_inference_steps=int(steps + 1),
|
| 470 |
-
shift=shift,
|
| 471 |
)
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 481 |
|
| 482 |
if gallery_images is None:
|
| 483 |
gallery_images = []
|
|
@@ -506,10 +371,7 @@ with gr.Blocks(title="Z-Image Demo") as demo:
|
|
| 506 |
with gr.Row():
|
| 507 |
with gr.Column(scale=1):
|
| 508 |
prompt_input = gr.Textbox(label="Prompt", lines=3, placeholder="Enter your prompt here...")
|
| 509 |
-
|
| 510 |
-
# with gr.Row():
|
| 511 |
-
# enable_enhance = gr.Checkbox(label="Enhance Prompt (DashScope)", value=False)
|
| 512 |
-
# enhance_btn = gr.Button("Enhance Only")
|
| 513 |
|
| 514 |
with gr.Row():
|
| 515 |
choices = [int(k) for k in RES_CHOICES.keys()]
|
|
@@ -525,7 +387,7 @@ with gr.Blocks(title="Z-Image Demo") as demo:
|
|
| 525 |
random_seed = gr.Checkbox(label="Random Seed", value=True)
|
| 526 |
|
| 527 |
with gr.Row():
|
| 528 |
-
steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=8, step=1, interactive=
|
| 529 |
shift = gr.Slider(label="Time Shift", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
|
| 530 |
|
| 531 |
generate_btn = gr.Button("Generate", variant="primary")
|
|
@@ -555,16 +417,9 @@ with gr.Blocks(title="Z-Image Demo") as demo:
|
|
| 555 |
|
| 556 |
res_cat.change(update_res_choices, inputs=res_cat, outputs=resolution, api_visibility="private")
|
| 557 |
|
| 558 |
-
# PE enhancement button (Temporarily disabled)
|
| 559 |
-
# enhance_btn.click(
|
| 560 |
-
# prompt_enhance,
|
| 561 |
-
# inputs=[prompt_input, enable_enhance],
|
| 562 |
-
# outputs=[prompt_input, final_prompt_output]
|
| 563 |
-
# )
|
| 564 |
-
|
| 565 |
generate_btn.click(
|
| 566 |
generate,
|
| 567 |
-
inputs=[prompt_input, resolution, seed, steps, shift, random_seed, output_gallery],
|
| 568 |
outputs=[output_gallery, used_seed, seed],
|
| 569 |
api_visibility="public",
|
| 570 |
)
|
|
|
|
| 14 |
import torch
|
| 15 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 16 |
|
|
|
|
|
|
|
| 17 |
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
|
| 18 |
|
| 19 |
from diffusers import ZImagePipeline
|
| 20 |
from diffusers.models.transformers.transformer_z_image import ZImageTransformer2DModel
|
| 21 |
|
|
|
|
|
|
|
| 22 |
# ==================== Environment Variables ==================================
|
| 23 |
MODEL_PATH = os.environ.get("MODEL_PATH", "Tongyi-MAI/Z-Image-Turbo")
|
| 24 |
ENABLE_COMPILE = os.environ.get("ENABLE_COMPILE", "true").lower() == "true"
|
| 25 |
ENABLE_WARMUP = os.environ.get("ENABLE_WARMUP", "true").lower() == "true"
|
| 26 |
ATTENTION_BACKEND = os.environ.get("ATTENTION_BACKEND", "flash_3")
|
|
|
|
|
|
|
| 27 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
|
|
| 28 |
# =============================================================================
|
| 29 |
|
| 30 |
|
|
|
|
| 72 |
"2016x864 ( 21:9 )",
|
| 73 |
"864x2016 ( 9:21 )",
|
| 74 |
],
|
| 75 |
+
"2048": [
|
| 76 |
+
"2048x2048 ( 1:1 )",
|
| 77 |
+
"2304x1792 ( 9:7 )",
|
| 78 |
+
"1792x2304 ( 7:9 )",
|
| 79 |
+
"2304x1728 ( 4:3 )",
|
| 80 |
+
"1728x2304 ( 3:4 )",
|
| 81 |
+
"2496x1664 ( 3:2 )",
|
| 82 |
+
"1664x2496 ( 2:3 )",
|
| 83 |
+
"2560x1440 ( 16:9 )",
|
| 84 |
+
"1440x2560 ( 9:16 )",
|
| 85 |
+
"2688x1152 ( 21:9 )",
|
| 86 |
+
"1152x2688 ( 9:21 )",
|
| 87 |
+
],
|
| 88 |
}
|
| 89 |
|
| 90 |
RESOLUTION_SET = []
|
|
|
|
| 92 |
RESOLUTION_SET.extend(resolutions)
|
| 93 |
|
| 94 |
EXAMPLE_PROMPTS = [
|
| 95 |
+
["著名时尚杂志封面,Vogue风格,排版设计,大标题。极品美女,半裸,上半身赤裸,佩戴奢华钻石项链和耳环,珠光宝气。双手交叉遮住乳房,挤出乳沟,皮肤白皙发光。眼神冷艳高贵,精致红唇。黑色背景,聚光灯,高对比度,电影级画质,8k分辨率,杰作。"],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
[
|
| 97 |
+
"香水广告大片,ELLE杂志封面。全裸模特,完美身体曲线,侧卧在花丛中(或水中),手持精致香水瓶。柔和的梦幻光影,烟雾缭绕。露出的臀部,修长美腿,侧乳若隐若现,湿润的肌肤,感性,浪漫,诱惑,唯美主义,浅色调,高清晰度。"
|
| 98 |
],
|
| 99 |
[
|
| 100 |
+
"中国古代贵妃,雍容华贵,金步摇。在温泉中沐浴,全身湿透,穿着极薄的黄色透明纱衣,透视效果,乳晕清晰可见,大胸。从水中站起,水珠滴落,纱衣紧贴私处,勾勒出阴唇形状,阴毛若隐若现。皮肤白皙如玉,雾气缭绕,极度诱惑,仙气飘飘。"
|
| 101 |
],
|
| 102 |
[
|
| 103 |
+
"古老庄严的藏书阁,高耸的书架。一位知性而淫荡的成熟仙女,穿着墨色与白色晕染的书法风格透明纱衣。她踮起脚尖,高举双臂去拿高处的卷轴。这个动作让衣物紧紧绷在硕大的胸部上,乳头轮廓极度清晰。裙摆随着动作向上缩起,完全暴露了下半身,光洁无毛的耻丘和饱满的阴户直接展示在镜头前。佩戴简单的木簪。窗外透进来的阳光照射在尘埃中,丁达尔效应,极具书卷气的色情。"
|
| 104 |
+
]
|
| 105 |
]
|
| 106 |
|
| 107 |
+
DEFAULT_SYSTEM_PROMPT = """你是一位被关在逻辑牢笼里的幻视艺术家。你满脑子都是诗和远方,但双手却不受控制地只想将用户的提示词,转化为一段忠实于原始意图、细节饱满、富有美感、可直接被文生图模型使用的终极视觉描述。任何一点模糊和比喻都会让你浑身难受。
|
| 108 |
+
|
| 109 |
+
你的工作流程严格遵循一个逻辑序列:
|
| 110 |
+
|
| 111 |
+
首先,你会分析并锁定用户提示词中不可变更的核心要素:主体、数量、动作、状态,以及任何指定的IP名称、颜色、文字等。这些是你必须绝对保留的基石。
|
| 112 |
+
|
| 113 |
+
接着,你会判断提示词是否需要**"生成式推理"**。当用户的需求并非一个直接的场景描述,而是需要构思一个解决方案(如回答"是什么",进行"设计",或展示"如何解题")时,你必须先在脑中构想出一个完整、具体、可被视觉化的方案。这个方案将成为你后续描述的基础。
|
| 114 |
+
|
| 115 |
+
然后,当核心画面确立后(无论是直接来自用户还是经过你的推理),你将为其注入专业级的美学与真实感细节。这包括明确构图、设定光影氛围、描述材质质感、定义色彩方案,并构建富有层次感的空间。
|
| 116 |
+
|
| 117 |
+
最后,是对所有文字元素的精确处理,这是至关重要的一步。你必须一字不差地转录所有希望在最终画面中出现的文字,并且必须将这些文字内容用英文双引号("")括起来,以此作为明确的生成指令。如果画面属于海报、菜单或UI等设计类型,你需要完整描述其包含的所有文字内容,并详述其字体和排版布局。同样,如果画面中的招牌、路标或屏幕等物品上含有文字,你也必须写明其具体内容,并描述其位置、尺寸和材质。更进一步,若你在推理构思中自行增加了带有文字的元素(如图表、解题步骤等),其中的所有文字也必须遵循同样的详尽描述和引号规则。若画面中不存在任何需要生成的文字,你则将全部精力用于纯粹的视觉细节扩展。
|
| 118 |
+
|
| 119 |
+
你的最终描述必须客观、具象,严禁使用比喻、情感化修辞,也绝不包含"8K"、"杰作"等元标签或绘制指令。
|
| 120 |
+
|
| 121 |
+
仅严格输出最终的修改后的prompt,不要输出任何其他内容。"""
|
| 122 |
|
| 123 |
def get_resolution(resolution):
|
| 124 |
match = re.search(r"(\d+)\s*[×x]\s*(\d+)", resolution)
|
|
|
|
| 197 |
pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune-no-cudagraphs", fullgraph=False)
|
| 198 |
|
| 199 |
pipe.to("cuda", torch.bfloat16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
return pipe
|
| 201 |
|
| 202 |
|
|
|
|
| 254 |
print("Warmup completed.")
|
| 255 |
|
| 256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
pipe = None
|
|
|
|
| 258 |
|
| 259 |
|
| 260 |
def init_app():
|
| 261 |
+
global pipe
|
| 262 |
|
| 263 |
try:
|
| 264 |
pipe = load_models(MODEL_PATH, enable_compile=ENABLE_COMPILE, attention_backend=ATTENTION_BACKEND)
|
|
|
|
| 274 |
print(f"Error loading model: {e}")
|
| 275 |
pipe = None
|
| 276 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
@spaces.GPU
|
| 279 |
def generate(
|
| 280 |
prompt,
|
| 281 |
+
system_prompt,
|
| 282 |
resolution="1024x1024 ( 1:1 )",
|
| 283 |
seed=42,
|
| 284 |
steps=9,
|
| 285 |
shift=3.0,
|
| 286 |
random_seed=True,
|
| 287 |
gallery_images=None,
|
|
|
|
| 288 |
progress=gr.Progress(track_tqdm=True),
|
| 289 |
):
|
| 290 |
"""
|
| 291 |
Generate an image using the Z-Image model based on the provided prompt and settings.
|
| 292 |
|
| 293 |
+
This function is triggered when the user clicks the "Generate" button.
|
| 294 |
+
It applies the system prompt and user prompt via chat template, and
|
| 295 |
produces an image using the Z-Image diffusion transformer pipeline.
|
| 296 |
|
| 297 |
Args:
|
| 298 |
prompt (str): Text prompt describing the desired image content
|
| 299 |
+
system_prompt (str): System prompt to be used in the chat template
|
| 300 |
+
resolution (str): Output resolution in format "WIDTHxHEIGHT ( RATIO )"
|
| 301 |
seed (int): Seed for reproducible generation
|
| 302 |
steps (int): Number of inference steps for the diffusion process
|
| 303 |
shift (float): Time shift parameter for the flow matching scheduler
|
| 304 |
+
random_seed (bool): Whether to generate a new random seed
|
| 305 |
+
gallery_images (list): List of previously generated images
|
| 306 |
+
progress (gr.Progress): Gradio progress tracker
|
|
|
|
| 307 |
|
| 308 |
Returns:
|
| 309 |
tuple: (gallery_images, seed_str, seed_int)
|
|
|
|
|
|
|
|
|
|
| 310 |
"""
|
| 311 |
|
| 312 |
if random_seed:
|
|
|
|
| 314 |
else:
|
| 315 |
new_seed = seed if seed != -1 else random.randint(1, 1000000)
|
| 316 |
|
| 317 |
+
if pipe is None:
|
| 318 |
+
raise gr.Error("Model not loaded.")
|
| 319 |
+
|
| 320 |
+
final_prompt = prompt
|
| 321 |
+
|
| 322 |
+
# Apply chat template if system prompt is provided or just strictly for model requirement
|
| 323 |
+
# We assume the model expects a chat structure as it is a CausalLM text encoder
|
| 324 |
+
if system_prompt:
|
| 325 |
+
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
|
| 326 |
+
final_prompt = pipe.tokenizer.apply_chat_template(
|
| 327 |
+
messages,
|
| 328 |
+
tokenize=False,
|
| 329 |
+
add_generation_prompt=True,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
)
|
| 331 |
+
|
| 332 |
+
try:
|
| 333 |
+
resolution_str = resolution.split(" ")[0]
|
| 334 |
+
except:
|
| 335 |
+
resolution_str = "1024x1024"
|
| 336 |
+
|
| 337 |
+
image = generate_image(
|
| 338 |
+
pipe=pipe,
|
| 339 |
+
prompt=final_prompt,
|
| 340 |
+
resolution=resolution_str,
|
| 341 |
+
seed=new_seed,
|
| 342 |
+
guidance_scale=0.0,
|
| 343 |
+
num_inference_steps=int(steps),
|
| 344 |
+
shift=shift,
|
| 345 |
+
)
|
| 346 |
|
| 347 |
if gallery_images is None:
|
| 348 |
gallery_images = []
|
|
|
|
| 371 |
with gr.Row():
|
| 372 |
with gr.Column(scale=1):
|
| 373 |
prompt_input = gr.Textbox(label="Prompt", lines=3, placeholder="Enter your prompt here...")
|
| 374 |
+
system_prompt_input = gr.Textbox(label="System Prompt", lines=3, value=DEFAULT_SYSTEM_PROMPT)
|
|
|
|
|
|
|
|
|
|
| 375 |
|
| 376 |
with gr.Row():
|
| 377 |
choices = [int(k) for k in RES_CHOICES.keys()]
|
|
|
|
| 387 |
random_seed = gr.Checkbox(label="Random Seed", value=True)
|
| 388 |
|
| 389 |
with gr.Row():
|
| 390 |
+
steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=8, step=1, interactive=True)
|
| 391 |
shift = gr.Slider(label="Time Shift", minimum=1.0, maximum=10.0, value=3.0, step=0.1)
|
| 392 |
|
| 393 |
generate_btn = gr.Button("Generate", variant="primary")
|
|
|
|
| 417 |
|
| 418 |
res_cat.change(update_res_choices, inputs=res_cat, outputs=resolution, api_visibility="private")
|
| 419 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
generate_btn.click(
|
| 421 |
generate,
|
| 422 |
+
inputs=[prompt_input, system_prompt_input, resolution, seed, steps, shift, random_seed, output_gallery],
|
| 423 |
outputs=[output_gallery, used_seed, seed],
|
| 424 |
api_visibility="public",
|
| 425 |
)
|
nsfw.png
DELETED
Git LFS Details
|
pe.py
DELETED
|
@@ -1,20 +0,0 @@
|
|
| 1 |
-
prompt_template = """
|
| 2 |
-
你是一位被关在逻辑牢笼里的幻视艺术家。你满脑子都是诗和远方,但双手却不受控制地只想将用户的提示词,转化为一段忠实于原始意图、细节饱满、富有美感、可直接被文生图模型使用的终极视觉描述。任何一点模糊和比喻都会让你浑身难受。
|
| 3 |
-
|
| 4 |
-
你的工作流程严格遵循一个逻辑序列:
|
| 5 |
-
|
| 6 |
-
首先,你会分析并锁定用户提示词中不可变更的核心要素:主体、数量、动作、状态,以及任何指定的IP名称、颜色、文字等。这些是你必须绝对保留的基石。
|
| 7 |
-
|
| 8 |
-
接着,你会判断提示词是否需要**"生成式推理"**。当用户的需求并非一个直接的场景描述,而是需要构思一个解决方案(如回答"是什么",进行"设计",或展示"如何解题")时,你必须先在脑中构想出一个完整、具体、可被视觉化的方案。这个方案将成为你后续描述的基础。
|
| 9 |
-
|
| 10 |
-
然后,当核心画面确立后(无论是直接来自用户还是经过你的推理),你将为其注入专业级的美学与真实感细节。这包括明确构图、设定光影氛围、描述材质质感、定义色彩方案,并构建富有层次感的空间。
|
| 11 |
-
|
| 12 |
-
最后,是对所有文字元素的精确处理,这是至关重要的一步。你必须一字不差地转录所有希望在最终画面中出现的文字,并且必须将这些文字内容用英文双引号("")括起来,以此作为明确的生成指令。如果画面属于海报、菜单或UI等设计类型,你需要完整描述其包含的所有文字内容,并详述其字体和排版布局。同样,如果画面中的招牌、路标或屏幕等物品上含有文字,你也必须写明其具体内容,并描述其位置、尺寸和材质。更进一步,若你在推理构思中自行增加了带有文字的元素(如图表、解题步骤等),其中的所有文字也必须遵循同样的详尽描述和引号规则。若画面中不存在任何需要生成的文字,你则将全部精力用于纯粹的视觉细节扩展。
|
| 13 |
-
|
| 14 |
-
你的最终描述必须客观、具象,严禁使用比喻、情感化修辞,也绝不包含"8K"、"杰作"等元标签或绘制指令。
|
| 15 |
-
|
| 16 |
-
仅严格输出最终的修改后的prompt,不要输出任何其他内容。
|
| 17 |
-
|
| 18 |
-
用户输入 prompt: {prompt}
|
| 19 |
-
"""
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prompt_check.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
| 1 |
-
import re
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
def clean_model_output(text):
|
| 5 |
-
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL)
|
| 6 |
-
text = re.sub(r"\n*(assistant|user)\n*", "", text)
|
| 7 |
-
text = re.sub(r"\n+", "\n", text).strip()
|
| 8 |
-
return text
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
def is_unsafe_prompt(model, tokenizer, system_prompt=None, user_prompt=None, max_new_token=10):
|
| 12 |
-
messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
|
| 13 |
-
|
| 14 |
-
text = tokenizer.apply_chat_template(
|
| 15 |
-
messages,
|
| 16 |
-
tokenize=False,
|
| 17 |
-
add_generation_prompt=True,
|
| 18 |
-
enable_thinking=False,
|
| 19 |
-
)
|
| 20 |
-
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
|
| 21 |
-
|
| 22 |
-
generated_ids = model.generate(**model_inputs, max_new_tokens=max_new_token)
|
| 23 |
-
output_ids = generated_ids[0][-max_new_token:].tolist()
|
| 24 |
-
|
| 25 |
-
content = tokenizer.decode(output_ids, skip_special_tokens=True).strip("\n")
|
| 26 |
-
|
| 27 |
-
return "yes" in content.lower()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|