Spaces:

programmersd
/

Zitc

Sleeping

App Files Files Community

programmersd commited on Feb 20

Commit

f7912b7

verified ·

1 Parent(s): 9cd03c5

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -87

app.py CHANGED Viewed

@@ -5,122 +5,154 @@ import random
 import torch
 import gradio as gr
-from huggingface_hub import hf_hub_download
-from diffusers import (
-    ZImagePipeline,
-    ZImageTransformer2DModel,
-    GGUFQuantizationConfig,
-    FlowMatchEulerDiscreteScheduler
-)
-# =========================
-# HARD CPU MODE
-# =========================
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
-cpu_cores = os.cpu_count() or 1
-torch.set_num_threads(cpu_cores)
-torch.set_num_interop_threads(cpu_cores)
-os.environ["OMP_NUM_THREADS"] = str(cpu_cores)
-os.environ["MKL_NUM_THREADS"] = str(cpu_cores)
-torch.backends.mkldnn.enabled = True
-torch.backends.quantized.engine = "fbgemm"
-device = torch.device("cpu")
-dtype = torch.float16
-# =========================
-# MODEL CONFIG
-# =========================
 BASE_MODEL_ID = "Tongyi-MAI/Z-Image-Turbo"
 GGUF_REPO_ID = "unsloth/Z-Image-Turbo-GGUF"
 GGUF_FILENAME = "z-image-turbo-Q2_K.gguf"
-CACHE_DIR = "models"
-os.makedirs(CACHE_DIR, exist_ok=True)
-def download_gguf():
-    local_path = os.path.join(CACHE_DIR, GGUF_FILENAME)
-    if os.path.exists(local_path):
-        return local_path
-    return hf_hub_download(
-        repo_id=GGUF_REPO_ID,
-        filename=GGUF_FILENAME,
-        cache_dir=CACHE_DIR,
-        resume_download=True
-    )
-# =========================
-# LOAD PIPELINE ULTRA LEAN
-# =========================
 def load_pipeline():
     scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
         BASE_MODEL_ID,
         subfolder="scheduler",
-        cache_dir=CACHE_DIR
     )
-    pipe = ZImagePipeline.from_pretrained(
         BASE_MODEL_ID,
-        scheduler=scheduler,
-        torch_dtype=dtype,
-        cache_dir=CACHE_DIR,
-        low_cpu_mem_usage=True
     )
-    gguf_path = download_gguf()
     transformer = ZImageTransformer2DModel.from_single_file(
         gguf_path,
-        quantization_config=GGUFQuantizationConfig(compute_dtype=dtype),
-        torch_dtype=dtype
-    ).to(device)
-    pipe.transformer = transformer
     pipe.enable_attention_slicing()
     pipe.enable_vae_slicing()
-    pipe.enable_sequential_cpu_offload()
-    pipe = pipe.to(device)
     return pipe
 pipe = load_pipeline()
-# =========================
-# GENERATION (MIN RAM)
-# =========================
-def generate(prompt, seed, progress=gr.Progress()):
     if not prompt:
         raise gr.Error("Prompt required")
     if seed < 0:
         seed = random.randint(0, 2**31 - 1)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    steps = 4
-    width = 256
-    height = 256
-    start = time.time()
-    def callback(step, timestep, latents):
-        done = step + 1
-        elapsed = time.time() - start
-        avg = elapsed / done
-        eta = avg * (steps - done)
-        progress(done / steps, desc=f"Step {done}/{steps} | ETA {eta:.1f}s")
-    with torch.inference_mode():
-        gc.collect()
-        image = pipe(
             prompt=prompt,
             width=width,
             height=height,
             num_inference_steps=steps,
@@ -128,28 +160,42 @@ def generate(prompt, seed, progress=gr.Progress()):
             generator=generator,
             callback=callback,
             callback_steps=1
-        ).images[0]
         gc.collect()
-    return image, seed
-# =========================
-# UI
-# =========================
-with gr.Blocks(title="Z-Image Turbo Ultra Lean CPU") as demo:
-    gr.Markdown("# Z-Image Turbo Q2_K — Ultra Lean 16GB CPU Mode")
-    prompt = gr.Textbox(label="Prompt", lines=3)
-    seed = gr.Number(label="Seed (-1 random)", value=-1, precision=0)
-    btn = gr.Button("Generate")
-    image_out = gr.Image()
-    seed_out = gr.Number(interactive=False)
-    btn.click(generate, inputs=[prompt, seed], outputs=[image_out, seed_out])
-demo.queue(max_size=5, concurrency_count=1)
-if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import torch
 import gradio as gr
+# =====================================================
+# 🔥 EXTREME CPU + RAM CONTROL
+# =====================================================
+CPU_THREADS = 2  # Ultra survival safe value
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
+os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
+os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
+os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
+torch.set_num_threads(CPU_THREADS)
+torch.set_grad_enabled(False)
+DEVICE = "cpu"
+DTYPE = torch.float32
+CACHE_DIR = "./hf_cache"
+os.makedirs(CACHE_DIR, exist_ok=True)
+# =====================================================
+# 📦 IMPORTS
+# =====================================================
+from huggingface_hub import hf_hub_download
+from diffusers import (
+    ZImagePipeline,
+    ZImageTransformer2DModel,
+    GGUFQuantizationConfig,
+    AutoencoderKL,
+    FlowMatchEulerDiscreteScheduler
+)
+from transformers import AutoTokenizer, AutoModel
+# =====================================================
+# 🧠 MODEL REFERENCES
+# =====================================================
 BASE_MODEL_ID = "Tongyi-MAI/Z-Image-Turbo"
+TEXT_ENCODER_ID = "Qwen/Qwen3-4B"
 GGUF_REPO_ID = "unsloth/Z-Image-Turbo-GGUF"
 GGUF_FILENAME = "z-image-turbo-Q2_K.gguf"
+print("⚡ Initializing Z-Image Turbo ULTRA CPU Engine...")
+# =====================================================
+# 🧠 LOAD PIPELINE (MEMORY SAFE)
+# =====================================================
 def load_pipeline():
     scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
         BASE_MODEL_ID,
         subfolder="scheduler",
+        cache_dir=CACHE_DIR,
+        low_cpu_mem_usage=True
     )
+    vae = AutoencoderKL.from_pretrained(
         BASE_MODEL_ID,
+        subfolder="vae",
+        torch_dtype=DTYPE,
+        low_cpu_mem_usage=True,
+        cache_dir=CACHE_DIR
+    )
+    tokenizer = AutoTokenizer.from_pretrained(
+        TEXT_ENCODER_ID,
+        cache_dir=CACHE_DIR
+    )
+    text_encoder = AutoModel.from_pretrained(
+        TEXT_ENCODER_ID,
+        torch_dtype=DTYPE,
+        low_cpu_mem_usage=True,
+        cache_dir=CACHE_DIR
     )
+    gguf_path = hf_hub_download(
+        repo_id=GGUF_REPO_ID,
+        filename=GGUF_FILENAME,
+        cache_dir=CACHE_DIR,
+        resume_download=True
+    )
     transformer = ZImageTransformer2DModel.from_single_file(
         gguf_path,
+        quantization_config=GGUFQuantizationConfig(compute_dtype=DTYPE),
+        torch_dtype=DTYPE,
+        low_cpu_mem_usage=True
+    )
+    pipe = ZImagePipeline(
+        vae=vae,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        transformer=transformer,
+        scheduler=scheduler
+    ).to(DEVICE)
+    # 🔥 MAX SAFE MEMORY STACK
     pipe.enable_attention_slicing()
     pipe.enable_vae_slicing()
+    pipe.enable_vae_tiling()
+    pipe.set_progress_bar_config(disable=True)
+    print("✅ Engine Ready")
     return pipe
 pipe = load_pipeline()
+# =====================================================
+# 🚀 GENERATION CORE WITH ETA
+# =====================================================
+@torch.inference_mode()
+def generate(prompt, width, height, steps, seed, progress=gr.Progress()):
     if not prompt:
         raise gr.Error("Prompt required")
+    # HARD OOM PROTECTION
+    width = max(256, min(width, 640))
+    height = max(256, min(height, 640))
+    steps = max(1, min(steps, 6))
     if seed < 0:
         seed = random.randint(0, 2**31 - 1)
+    generator = torch.Generator(device=DEVICE).manual_seed(seed)
+    start_time = time.time()
+    def callback(step, timestep, latents=None):
+        elapsed = time.time() - start_time
+        avg = elapsed / (step + 1)
+        remaining = avg * (steps - step - 1)
+        progress(
+            (step + 1) / steps,
+            desc=f"Step {step+1}/{steps} | ETA: {remaining:.1f}s"
+        )
+    try:
+        result = pipe(
             prompt=prompt,
+            negative_prompt=None,
             width=width,
             height=height,
             num_inference_steps=steps,
             generator=generator,
             callback=callback,
             callback_steps=1
+        )
+        image = result.images[0]
+        gc.collect()
+        return image, seed
+    except Exception as e:
         gc.collect()
+        raise gr.Error(f"Generation error: {e}")
+# =====================================================
+# 🎛 UI
+# =====================================================
+with gr.Blocks(title="Z-Image Turbo ULTRA CPU") as demo:
+    gr.Markdown("# ⚡ Z-Image Turbo — MAX CPU SURVIVAL MODE")
+    prompt = gr.Textbox(label="Prompt", lines=2)
+    with gr.Row():
+        width = gr.Slider(256, 640, 512, step=64)
+        height = gr.Slider(256, 640, 512, step=64)
+    steps = gr.Slider(1, 6, value=4, step=1)
+    seed = gr.Number(value=-1, precision=0)
+    btn = gr.Button("🚀 Generate")
+    output = gr.Image()
+    used_seed = gr.Number(label="Seed Used")
+    btn.click(
+        generate,
+        inputs=[prompt, width, height, steps, seed],
+        outputs=[output, used_seed]
+    )
+demo.queue(concurrency_count=1, max_size=4)
+demo.launch()