Husr commited on
Commit
46e910a
·
1 Parent(s): f278e43

修复PERT和AIOT

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +5 -3
  3. app.py +93 -20
  4. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ AGENTS.md
README.md CHANGED
@@ -26,16 +26,18 @@ Gradio Space using the official Z-Image pipeline (`Tongyi-MAI/Z-Image-Turbo`) wi
26
  3) Manually add the LoRA file from https://civitai.com/models/2206377/zit-mystic-xxx to `lora/zit-mystic-xxx.safetensors` (or set `LORA_PATH`). Network fetch of Civitai is not handled in the Space.
27
  4) If model download fails with a token error, set `HF_TOKEN` in the Space secrets (some repos require authentication).
28
  5) (Optional) Toggle advanced envs below; then the Space will launch `app.py`. The header shows whether the LoRA was detected/loaded.
 
29
 
30
  ## Environment variables
31
  - `MODEL_PATH` (default `Tongyi-MAI/Z-Image-Turbo`): HF repo or local path for the Z-Image model.
32
  - `LORA_PATH` (default `lora/zit-mystic-xxx.safetensors`): Path to the LoRA file; loaded if present.
33
  - `HF_TOKEN`: HF token for gated/private models or faster pulls.
 
34
  - `ENABLE_COMPILE` (default `false`): Enable `torch.compile` on the transformer.
35
  - `ENABLE_WARMUP` (default `false`): Run a quick warmup across resolutions after load (adds startup time).
36
- - `ATTENTION_BACKEND` (default `flash_3`): Backend for transformer attention.
37
- - `OFFLOAD_TO_CPU_AFTER_RUN` (default `true`): Move the model back to CPU after each generation to play nicer with ZeroGPU.
38
- - `ENABLE_AOTI` (default `false`): Try to load ZeroGPU AoTI blocks via `spaces.aoti_blocks_load` for faster inference.
39
  - `AOTI_REPO` (default `zerogpu-aoti/Z-Image`): AoTI blocks repo.
40
  - `AOTI_VARIANT` (default `fa3`): AoTI variant.
41
 
 
26
  3) Manually add the LoRA file from https://civitai.com/models/2206377/zit-mystic-xxx to `lora/zit-mystic-xxx.safetensors` (or set `LORA_PATH`). Network fetch of Civitai is not handled in the Space.
27
  4) If model download fails with a token error, set `HF_TOKEN` in the Space secrets (some repos require authentication).
28
  5) (Optional) Toggle advanced envs below; then the Space will launch `app.py`. The header shows whether the LoRA was detected/loaded.
29
+ - If the header/log says `PEFT backend is required for LoRA`, install `peft` (already included in `requirements.txt`) and restart/rebuild.
30
 
31
  ## Environment variables
32
  - `MODEL_PATH` (default `Tongyi-MAI/Z-Image-Turbo`): HF repo or local path for the Z-Image model.
33
  - `LORA_PATH` (default `lora/zit-mystic-xxx.safetensors`): Path to the LoRA file; loaded if present.
34
  - `HF_TOKEN`: HF token for gated/private models or faster pulls.
35
+ - `MODEL_DTYPE` (default `auto`): `bf16` if supported, else `fp16` (override with `bf16`/`fp16`/`fp32`).
36
  - `ENABLE_COMPILE` (default `false`): Enable `torch.compile` on the transformer.
37
  - `ENABLE_WARMUP` (default `false`): Run a quick warmup across resolutions after load (adds startup time).
38
+ - `ATTENTION_BACKEND` (default `_flash_3`): Backend for transformer attention (falls back to `flash`/`xformers`/`native`).
39
+ - `OFFLOAD_TO_CPU_AFTER_RUN` (default `false`): Move the model back to CPU after each generation (useful on ZeroGPU; slower on normal GPUs).
40
+ - `ENABLE_AOTI` (default `true`): Try to load ZeroGPU AoTI blocks via `spaces.aoti_blocks_load` for faster inference.
41
  - `AOTI_REPO` (default `zerogpu-aoti/Z-Image`): AoTI blocks repo.
42
  - `AOTI_VARIANT` (default `fa3`): AoTI variant.
43
 
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import os
 
2
  import random
3
  import re
4
  import threading
@@ -25,6 +26,52 @@ AOTI_REPO = os.environ.get("AOTI_REPO", "zerogpu-aoti/Z-Image")
25
  AOTI_VARIANT = os.environ.get("AOTI_VARIANT", "fa3")
26
  DEFAULT_CFG = float(os.environ.get("DEFAULT_CFG", "0.0"))
27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  if torch.cuda.is_available():
29
  torch.backends.cuda.matmul.allow_tf32 = True
30
  torch.set_float32_matmul_precision("high")
@@ -106,6 +153,8 @@ lora_error: str | None = None
106
  pipe_lock = threading.Lock()
107
  pipe_on_gpu: bool = False
108
  aoti_loaded: bool = False
 
 
109
 
110
  SCHEDULERS = {"FlowMatch Euler": FlowMatchEulerDiscreteScheduler}
111
  try:
@@ -116,6 +165,13 @@ except Exception:
116
  pass
117
 
118
 
 
 
 
 
 
 
 
119
  def parse_resolution(resolution: str) -> Tuple[int, int]:
120
  match = re.search(r"(\d+)\s*[×x]\s*(\d+)", resolution)
121
  if match:
@@ -150,6 +206,8 @@ def set_attention_backend_safe(transformer, backend: str) -> str:
150
  def attach_lora(pipeline: ZImagePipeline) -> Tuple[bool, str | None]:
151
  if not LORA_PATH or not os.path.isfile(LORA_PATH):
152
  return False, "LoRA file not found"
 
 
153
  try:
154
  folder, weight_name = os.path.split(LORA_PATH)
155
  folder = folder or "."
@@ -169,13 +227,14 @@ def set_lora_scale(pipeline: ZImagePipeline, scale: float) -> None:
169
 
170
 
171
  def load_models() -> Tuple[ZImagePipeline, bool, str | None]:
172
- global pipe, lora_loaded, lora_error, pipe_on_gpu
173
  if pipe is not None and getattr(pipe, "transformer", None) is not None:
174
  return pipe, lora_loaded, lora_error
175
 
176
  use_auth_token = HF_TOKEN if HF_TOKEN else None
177
  hf_kwargs = {"use_auth_token": use_auth_token} if use_auth_token else {}
178
  print(f"Loading Z-Image from {MODEL_PATH}...")
 
179
 
180
  if not torch.cuda.is_available():
181
  raise RuntimeError("CUDA is not available. This app requires a GPU.")
@@ -184,24 +243,24 @@ def load_models() -> Tuple[ZImagePipeline, bool, str | None]:
184
  vae = AutoencoderKL.from_pretrained(
185
  MODEL_PATH,
186
  subfolder="vae",
187
- torch_dtype=torch.bfloat16,
188
  **hf_kwargs,
189
- ).to("cuda", torch.bfloat16)
190
  text_encoder = AutoModelForCausalLM.from_pretrained(
191
  MODEL_PATH,
192
  subfolder="text_encoder",
193
- torch_dtype=torch.bfloat16,
194
  **hf_kwargs,
195
- ).to("cuda", torch.bfloat16).eval()
196
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, subfolder="tokenizer", **hf_kwargs)
197
  else:
198
- vae = AutoencoderKL.from_pretrained(os.path.join(MODEL_PATH, "vae"), torch_dtype=torch.bfloat16).to(
199
- "cuda", torch.bfloat16
200
  )
201
  text_encoder = AutoModelForCausalLM.from_pretrained(
202
  os.path.join(MODEL_PATH, "text_encoder"),
203
- torch_dtype=torch.bfloat16,
204
- ).to("cuda", torch.bfloat16).eval()
205
  tokenizer = AutoTokenizer.from_pretrained(os.path.join(MODEL_PATH, "tokenizer"))
206
 
207
  tokenizer.padding_side = "left"
@@ -212,20 +271,20 @@ def load_models() -> Tuple[ZImagePipeline, bool, str | None]:
212
  transformer = ZImageTransformer2DModel.from_pretrained(
213
  MODEL_PATH,
214
  subfolder="transformer",
215
- torch_dtype=torch.bfloat16,
216
  **hf_kwargs,
217
  )
218
  else:
219
  transformer = ZImageTransformer2DModel.from_pretrained(
220
  os.path.join(MODEL_PATH, "transformer"),
221
- torch_dtype=torch.bfloat16,
222
  )
223
 
224
- applied_backend = set_attention_backend_safe(transformer, ATTENTION_BACKEND)
225
- print(f"Attention backend: {applied_backend}")
226
 
227
- pipeline.transformer = transformer.to("cuda", torch.bfloat16)
228
- pipeline.to("cuda", torch.bfloat16)
229
 
230
  loaded, error = attach_lora(pipeline)
231
  lora_loaded, lora_error = loaded, error
@@ -387,7 +446,7 @@ def warmup_model(pipeline: ZImagePipeline, resolutions: List[str]) -> None:
387
 
388
 
389
  def init_app() -> None:
390
- global aoti_loaded
391
  try:
392
  ensure_models_loaded()
393
  if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
@@ -395,8 +454,10 @@ def init_app() -> None:
395
  pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
396
  spaces.aoti_blocks_load(pipe.transformer.layers, AOTI_REPO, variant=AOTI_VARIANT)
397
  aoti_loaded = True
 
398
  print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
399
  except Exception as exc: # noqa: BLE001
 
400
  print(f"AoTI load failed (continuing without AoTI): {exc}")
401
  if ENABLE_WARMUP and pipe is not None:
402
  ensure_on_gpu()
@@ -468,9 +529,18 @@ def generate(
468
  init_app()
469
 
470
  with gr.Blocks(title="Z-Image + LoRA") as demo:
471
- pipe_status = "loaded (CPU)" if pipe else "not loaded"
472
  lora_file_status = "found" if os.path.isfile(LORA_PATH) else "missing"
473
- lora_status = f"LoRA file: {LORA_PATH} ({lora_file_status})"
 
 
 
 
 
 
 
 
 
474
 
475
  gr.Markdown(
476
  f"""<div align="center">
@@ -478,6 +548,8 @@ with gr.Blocks(title="Z-Image + LoRA") as demo:
478
  # Z-Image Generation (No SD fallback)
479
 
480
  Model: `{MODEL_PATH}` | {pipe_status}
 
 
481
  {lora_status}
482
 
483
  </div>"""
@@ -530,14 +602,15 @@ Model: `{MODEL_PATH}` | {pipe_status}
530
  max_seq = gr.Slider(label="Max Sequence Length", minimum=256, maximum=1024, value=512, step=16)
531
 
532
  with gr.Row():
533
- use_lora = gr.Checkbox(label="Use LoRA", value=True, interactive=True)
 
534
  lora_strength = gr.Slider(
535
  label="LoRA Strength",
536
  minimum=0.0,
537
  maximum=1.5,
538
  value=1.0,
539
  step=0.05,
540
- interactive=True,
541
  )
542
 
543
  generate_btn = gr.Button("Generate", variant="primary")
 
1
  import os
2
+ import importlib.util
3
  import random
4
  import re
5
  import threading
 
26
  AOTI_VARIANT = os.environ.get("AOTI_VARIANT", "fa3")
27
  DEFAULT_CFG = float(os.environ.get("DEFAULT_CFG", "0.0"))
28
 
29
+
30
+ def resolve_model_dtype() -> torch.dtype:
31
+ override = os.environ.get("MODEL_DTYPE")
32
+ if override:
33
+ key = override.strip().lower()
34
+ if key in {"bf16", "bfloat16"}:
35
+ return torch.bfloat16
36
+ if key in {"fp16", "float16", "half"}:
37
+ return torch.float16
38
+ if key in {"fp32", "float32"}:
39
+ return torch.float32
40
+ print(f"Unknown MODEL_DTYPE={override!r}; falling back to auto.")
41
+
42
+ if torch.cuda.is_available():
43
+ is_bf16_supported = getattr(torch.cuda, "is_bf16_supported", None)
44
+ if callable(is_bf16_supported) and is_bf16_supported():
45
+ return torch.bfloat16
46
+ return torch.float16
47
+ return torch.float32
48
+
49
+
50
+ def dtype_label(dtype: torch.dtype) -> str:
51
+ if dtype == torch.bfloat16:
52
+ return "bf16"
53
+ if dtype == torch.float16:
54
+ return "fp16"
55
+ if dtype == torch.float32:
56
+ return "fp32"
57
+ return str(dtype).replace("torch.", "")
58
+
59
+
60
+ def get_gpu_summary() -> str:
61
+ if not torch.cuda.is_available():
62
+ return "CPU"
63
+ try:
64
+ name = torch.cuda.get_device_name(0)
65
+ major, minor = torch.cuda.get_device_capability(0)
66
+ return f"{name} (cc {major}.{minor})"
67
+ except Exception:
68
+ return "CUDA"
69
+
70
+
71
+ MODEL_DTYPE = resolve_model_dtype()
72
+ MODEL_DTYPE_LABEL = dtype_label(MODEL_DTYPE)
73
+ GPU_SUMMARY = get_gpu_summary()
74
+
75
  if torch.cuda.is_available():
76
  torch.backends.cuda.matmul.allow_tf32 = True
77
  torch.set_float32_matmul_precision("high")
 
153
  pipe_lock = threading.Lock()
154
  pipe_on_gpu: bool = False
155
  aoti_loaded: bool = False
156
+ applied_attention_backend: str | None = None
157
+ aoti_error: str | None = None
158
 
159
  SCHEDULERS = {"FlowMatch Euler": FlowMatchEulerDiscreteScheduler}
160
  try:
 
165
  pass
166
 
167
 
168
+ def module_available(module_name: str) -> bool:
169
+ try:
170
+ return importlib.util.find_spec(module_name) is not None
171
+ except (ImportError, ValueError):
172
+ return False
173
+
174
+
175
  def parse_resolution(resolution: str) -> Tuple[int, int]:
176
  match = re.search(r"(\d+)\s*[×x]\s*(\d+)", resolution)
177
  if match:
 
206
  def attach_lora(pipeline: ZImagePipeline) -> Tuple[bool, str | None]:
207
  if not LORA_PATH or not os.path.isfile(LORA_PATH):
208
  return False, "LoRA file not found"
209
+ if not module_available("peft"):
210
+ return False, "PEFT backend is required for LoRA. Install `peft` and restart."
211
  try:
212
  folder, weight_name = os.path.split(LORA_PATH)
213
  folder = folder or "."
 
227
 
228
 
229
  def load_models() -> Tuple[ZImagePipeline, bool, str | None]:
230
+ global pipe, lora_loaded, lora_error, pipe_on_gpu, applied_attention_backend
231
  if pipe is not None and getattr(pipe, "transformer", None) is not None:
232
  return pipe, lora_loaded, lora_error
233
 
234
  use_auth_token = HF_TOKEN if HF_TOKEN else None
235
  hf_kwargs = {"use_auth_token": use_auth_token} if use_auth_token else {}
236
  print(f"Loading Z-Image from {MODEL_PATH}...")
237
+ print(f"GPU: {GPU_SUMMARY} | dtype: {MODEL_DTYPE_LABEL}")
238
 
239
  if not torch.cuda.is_available():
240
  raise RuntimeError("CUDA is not available. This app requires a GPU.")
 
243
  vae = AutoencoderKL.from_pretrained(
244
  MODEL_PATH,
245
  subfolder="vae",
246
+ torch_dtype=MODEL_DTYPE,
247
  **hf_kwargs,
248
+ ).to("cuda", MODEL_DTYPE)
249
  text_encoder = AutoModelForCausalLM.from_pretrained(
250
  MODEL_PATH,
251
  subfolder="text_encoder",
252
+ torch_dtype=MODEL_DTYPE,
253
  **hf_kwargs,
254
+ ).to("cuda", MODEL_DTYPE).eval()
255
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, subfolder="tokenizer", **hf_kwargs)
256
  else:
257
+ vae = AutoencoderKL.from_pretrained(os.path.join(MODEL_PATH, "vae"), torch_dtype=MODEL_DTYPE).to(
258
+ "cuda", MODEL_DTYPE
259
  )
260
  text_encoder = AutoModelForCausalLM.from_pretrained(
261
  os.path.join(MODEL_PATH, "text_encoder"),
262
+ torch_dtype=MODEL_DTYPE,
263
+ ).to("cuda", MODEL_DTYPE).eval()
264
  tokenizer = AutoTokenizer.from_pretrained(os.path.join(MODEL_PATH, "tokenizer"))
265
 
266
  tokenizer.padding_side = "left"
 
271
  transformer = ZImageTransformer2DModel.from_pretrained(
272
  MODEL_PATH,
273
  subfolder="transformer",
274
+ torch_dtype=MODEL_DTYPE,
275
  **hf_kwargs,
276
  )
277
  else:
278
  transformer = ZImageTransformer2DModel.from_pretrained(
279
  os.path.join(MODEL_PATH, "transformer"),
280
+ torch_dtype=MODEL_DTYPE,
281
  )
282
 
283
+ applied_attention_backend = set_attention_backend_safe(transformer, ATTENTION_BACKEND)
284
+ print(f"Attention backend: {applied_attention_backend}")
285
 
286
+ pipeline.transformer = transformer.to("cuda", MODEL_DTYPE)
287
+ pipeline.to("cuda", MODEL_DTYPE)
288
 
289
  loaded, error = attach_lora(pipeline)
290
  lora_loaded, lora_error = loaded, error
 
446
 
447
 
448
  def init_app() -> None:
449
+ global aoti_loaded, aoti_error
450
  try:
451
  ensure_models_loaded()
452
  if ENABLE_AOTI and not aoti_loaded and pipe is not None and getattr(pipe, "transformer", None) is not None:
 
454
  pipe.transformer.layers._repeated_blocks = ["ZImageTransformerBlock"]
455
  spaces.aoti_blocks_load(pipe.transformer.layers, AOTI_REPO, variant=AOTI_VARIANT)
456
  aoti_loaded = True
457
+ aoti_error = None
458
  print(f"AoTI loaded: {AOTI_REPO} (variant={AOTI_VARIANT})")
459
  except Exception as exc: # noqa: BLE001
460
+ aoti_error = str(exc)
461
  print(f"AoTI load failed (continuing without AoTI): {exc}")
462
  if ENABLE_WARMUP and pipe is not None:
463
  ensure_on_gpu()
 
529
  init_app()
530
 
531
  with gr.Blocks(title="Z-Image + LoRA") as demo:
532
+ pipe_status = "loaded (GPU)" if pipe and pipe_on_gpu else "loaded (CPU)" if pipe else "not loaded"
533
  lora_file_status = "found" if os.path.isfile(LORA_PATH) else "missing"
534
+ if lora_loaded:
535
+ lora_status = f"LoRA: loaded ({LORA_PATH})"
536
+ elif lora_error:
537
+ lora_status = f"LoRA: not loaded ({lora_error})"
538
+ else:
539
+ lora_status = f"LoRA file: {LORA_PATH} ({lora_file_status})"
540
+
541
+ attention_status = applied_attention_backend or "unknown"
542
+ aoti_status = "loaded" if aoti_loaded else f"failed ({aoti_error})" if aoti_error else "not loaded"
543
+ compile_status = "on" if ENABLE_COMPILE else "off"
544
 
545
  gr.Markdown(
546
  f"""<div align="center">
 
548
  # Z-Image Generation (No SD fallback)
549
 
550
  Model: `{MODEL_PATH}` | {pipe_status}
551
+ GPU: `{GPU_SUMMARY}` | dtype: `{MODEL_DTYPE_LABEL}`
552
+ Attention: `{attention_status}` | AoTI: `{aoti_status}` | torch.compile: `{compile_status}`
553
  {lora_status}
554
 
555
  </div>"""
 
602
  max_seq = gr.Slider(label="Max Sequence Length", minimum=256, maximum=1024, value=512, step=16)
603
 
604
  with gr.Row():
605
+ lora_controls_enabled = bool(lora_loaded)
606
+ use_lora = gr.Checkbox(label="Use LoRA", value=lora_controls_enabled, interactive=lora_controls_enabled)
607
  lora_strength = gr.Slider(
608
  label="LoRA Strength",
609
  minimum=0.0,
610
  maximum=1.5,
611
  value=1.0,
612
  step=0.05,
613
+ interactive=lora_controls_enabled,
614
  )
615
 
616
  generate_btn = gr.Button("Generate", variant="primary")
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  accelerate>=0.30.0
2
  diffusers>=0.32.0
3
  gradio>=4.44.0
 
4
  Pillow>=10.0.0
5
  safetensors>=0.4.2
6
  spaces>=0.27.0
 
1
  accelerate>=0.30.0
2
  diffusers>=0.32.0
3
  gradio>=4.44.0
4
+ peft>=0.10.0
5
  Pillow>=10.0.0
6
  safetensors>=0.4.2
7
  spaces>=0.27.0