programmersd commited on
Commit
f7912b7
Β·
verified Β·
1 Parent(s): 9cd03c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +133 -87
app.py CHANGED
@@ -5,122 +5,154 @@ import random
5
  import torch
6
  import gradio as gr
7
 
8
- from huggingface_hub import hf_hub_download
9
- from diffusers import (
10
- ZImagePipeline,
11
- ZImageTransformer2DModel,
12
- GGUFQuantizationConfig,
13
- FlowMatchEulerDiscreteScheduler
14
- )
15
 
16
- # =========================
17
- # HARD CPU MODE
18
- # =========================
19
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
20
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
 
 
 
21
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
22
 
23
- cpu_cores = os.cpu_count() or 1
24
- torch.set_num_threads(cpu_cores)
25
- torch.set_num_interop_threads(cpu_cores)
 
 
 
 
26
 
27
- os.environ["OMP_NUM_THREADS"] = str(cpu_cores)
28
- os.environ["MKL_NUM_THREADS"] = str(cpu_cores)
 
29
 
30
- torch.backends.mkldnn.enabled = True
31
- torch.backends.quantized.engine = "fbgemm"
 
 
 
 
 
 
 
32
 
33
- device = torch.device("cpu")
34
- dtype = torch.float16
 
35
 
36
- # =========================
37
- # MODEL CONFIG
38
- # =========================
39
  BASE_MODEL_ID = "Tongyi-MAI/Z-Image-Turbo"
 
40
  GGUF_REPO_ID = "unsloth/Z-Image-Turbo-GGUF"
41
  GGUF_FILENAME = "z-image-turbo-Q2_K.gguf"
42
- CACHE_DIR = "models"
43
 
44
- os.makedirs(CACHE_DIR, exist_ok=True)
45
 
46
- def download_gguf():
47
- local_path = os.path.join(CACHE_DIR, GGUF_FILENAME)
48
- if os.path.exists(local_path):
49
- return local_path
50
- return hf_hub_download(
51
- repo_id=GGUF_REPO_ID,
52
- filename=GGUF_FILENAME,
53
- cache_dir=CACHE_DIR,
54
- resume_download=True
55
- )
56
 
57
- # =========================
58
- # LOAD PIPELINE ULTRA LEAN
59
- # =========================
60
  def load_pipeline():
 
61
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
62
  BASE_MODEL_ID,
63
  subfolder="scheduler",
64
- cache_dir=CACHE_DIR
 
65
  )
66
 
67
- pipe = ZImagePipeline.from_pretrained(
68
  BASE_MODEL_ID,
69
- scheduler=scheduler,
70
- torch_dtype=dtype,
71
- cache_dir=CACHE_DIR,
72
- low_cpu_mem_usage=True
 
 
 
 
 
 
 
 
 
 
 
 
73
  )
74
 
75
- gguf_path = download_gguf()
 
 
 
 
 
76
 
77
  transformer = ZImageTransformer2DModel.from_single_file(
78
  gguf_path,
79
- quantization_config=GGUFQuantizationConfig(compute_dtype=dtype),
80
- torch_dtype=dtype
81
- ).to(device)
 
82
 
83
- pipe.transformer = transformer
 
 
 
 
 
 
84
 
 
85
  pipe.enable_attention_slicing()
86
  pipe.enable_vae_slicing()
87
- pipe.enable_sequential_cpu_offload()
88
-
89
- pipe = pipe.to(device)
90
 
 
91
  return pipe
92
 
 
93
  pipe = load_pipeline()
94
 
95
- # =========================
96
- # GENERATION (MIN RAM)
97
- # =========================
98
- def generate(prompt, seed, progress=gr.Progress()):
 
 
 
99
  if not prompt:
100
  raise gr.Error("Prompt required")
101
 
 
 
 
 
 
102
  if seed < 0:
103
  seed = random.randint(0, 2**31 - 1)
104
 
105
- generator = torch.Generator(device=device).manual_seed(seed)
106
-
107
- steps = 4
108
- width = 256
109
- height = 256
110
 
111
- start = time.time()
112
 
113
- def callback(step, timestep, latents):
114
- done = step + 1
115
- elapsed = time.time() - start
116
- avg = elapsed / done
117
- eta = avg * (steps - done)
118
- progress(done / steps, desc=f"Step {done}/{steps} | ETA {eta:.1f}s")
 
 
119
 
120
- with torch.inference_mode():
121
- gc.collect()
122
- image = pipe(
123
  prompt=prompt,
 
124
  width=width,
125
  height=height,
126
  num_inference_steps=steps,
@@ -128,28 +160,42 @@ def generate(prompt, seed, progress=gr.Progress()):
128
  generator=generator,
129
  callback=callback,
130
  callback_steps=1
131
- ).images[0]
 
 
 
 
 
 
132
  gc.collect()
 
133
 
134
- return image, seed
 
 
135
 
136
- # =========================
137
- # UI
138
- # =========================
139
- with gr.Blocks(title="Z-Image Turbo Ultra Lean CPU") as demo:
140
- gr.Markdown("# Z-Image Turbo Q2_K β€” Ultra Lean 16GB CPU Mode")
141
 
142
- prompt = gr.Textbox(label="Prompt", lines=3)
143
- seed = gr.Number(label="Seed (-1 random)", value=-1, precision=0)
144
- btn = gr.Button("Generate")
145
 
146
- image_out = gr.Image()
147
- seed_out = gr.Number(interactive=False)
 
148
 
149
- btn.click(generate, inputs=[prompt, seed], outputs=[image_out, seed_out])
 
150
 
151
- demo.queue(max_size=5, concurrency_count=1)
 
 
 
 
 
 
 
 
 
152
 
153
- if __name__ == "__main__":
154
- demo.launch(server_name="0.0.0.0", server_port=7860)
155
-
 
5
  import torch
6
  import gradio as gr
7
 
8
+ # =====================================================
9
+ # πŸ”₯ EXTREME CPU + RAM CONTROL
10
+ # =====================================================
11
+
12
+ CPU_THREADS = 2 # Ultra survival safe value
 
 
13
 
 
 
 
14
  os.environ["CUDA_VISIBLE_DEVICES"] = ""
15
  os.environ["HF_HUB_DISABLE_TELEMETRY"] = "1"
16
+ os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0"
17
+ os.environ["OMP_NUM_THREADS"] = str(CPU_THREADS)
18
+ os.environ["MKL_NUM_THREADS"] = str(CPU_THREADS)
19
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
20
 
21
+ torch.set_num_threads(CPU_THREADS)
22
+ torch.set_grad_enabled(False)
23
+
24
+ DEVICE = "cpu"
25
+ DTYPE = torch.float32
26
+ CACHE_DIR = "./hf_cache"
27
+ os.makedirs(CACHE_DIR, exist_ok=True)
28
 
29
+ # =====================================================
30
+ # πŸ“¦ IMPORTS
31
+ # =====================================================
32
 
33
+ from huggingface_hub import hf_hub_download
34
+ from diffusers import (
35
+ ZImagePipeline,
36
+ ZImageTransformer2DModel,
37
+ GGUFQuantizationConfig,
38
+ AutoencoderKL,
39
+ FlowMatchEulerDiscreteScheduler
40
+ )
41
+ from transformers import AutoTokenizer, AutoModel
42
 
43
+ # =====================================================
44
+ # 🧠 MODEL REFERENCES
45
+ # =====================================================
46
 
 
 
 
47
  BASE_MODEL_ID = "Tongyi-MAI/Z-Image-Turbo"
48
+ TEXT_ENCODER_ID = "Qwen/Qwen3-4B"
49
  GGUF_REPO_ID = "unsloth/Z-Image-Turbo-GGUF"
50
  GGUF_FILENAME = "z-image-turbo-Q2_K.gguf"
 
51
 
52
+ print("⚑ Initializing Z-Image Turbo ULTRA CPU Engine...")
53
 
54
+ # =====================================================
55
+ # 🧠 LOAD PIPELINE (MEMORY SAFE)
56
+ # =====================================================
 
 
 
 
 
 
 
57
 
 
 
 
58
  def load_pipeline():
59
+
60
  scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(
61
  BASE_MODEL_ID,
62
  subfolder="scheduler",
63
+ cache_dir=CACHE_DIR,
64
+ low_cpu_mem_usage=True
65
  )
66
 
67
+ vae = AutoencoderKL.from_pretrained(
68
  BASE_MODEL_ID,
69
+ subfolder="vae",
70
+ torch_dtype=DTYPE,
71
+ low_cpu_mem_usage=True,
72
+ cache_dir=CACHE_DIR
73
+ )
74
+
75
+ tokenizer = AutoTokenizer.from_pretrained(
76
+ TEXT_ENCODER_ID,
77
+ cache_dir=CACHE_DIR
78
+ )
79
+
80
+ text_encoder = AutoModel.from_pretrained(
81
+ TEXT_ENCODER_ID,
82
+ torch_dtype=DTYPE,
83
+ low_cpu_mem_usage=True,
84
+ cache_dir=CACHE_DIR
85
  )
86
 
87
+ gguf_path = hf_hub_download(
88
+ repo_id=GGUF_REPO_ID,
89
+ filename=GGUF_FILENAME,
90
+ cache_dir=CACHE_DIR,
91
+ resume_download=True
92
+ )
93
 
94
  transformer = ZImageTransformer2DModel.from_single_file(
95
  gguf_path,
96
+ quantization_config=GGUFQuantizationConfig(compute_dtype=DTYPE),
97
+ torch_dtype=DTYPE,
98
+ low_cpu_mem_usage=True
99
+ )
100
 
101
+ pipe = ZImagePipeline(
102
+ vae=vae,
103
+ text_encoder=text_encoder,
104
+ tokenizer=tokenizer,
105
+ transformer=transformer,
106
+ scheduler=scheduler
107
+ ).to(DEVICE)
108
 
109
+ # πŸ”₯ MAX SAFE MEMORY STACK
110
  pipe.enable_attention_slicing()
111
  pipe.enable_vae_slicing()
112
+ pipe.enable_vae_tiling()
113
+ pipe.set_progress_bar_config(disable=True)
 
114
 
115
+ print("βœ… Engine Ready")
116
  return pipe
117
 
118
+
119
  pipe = load_pipeline()
120
 
121
+ # =====================================================
122
+ # πŸš€ GENERATION CORE WITH ETA
123
+ # =====================================================
124
+
125
+ @torch.inference_mode()
126
+ def generate(prompt, width, height, steps, seed, progress=gr.Progress()):
127
+
128
  if not prompt:
129
  raise gr.Error("Prompt required")
130
 
131
+ # HARD OOM PROTECTION
132
+ width = max(256, min(width, 640))
133
+ height = max(256, min(height, 640))
134
+ steps = max(1, min(steps, 6))
135
+
136
  if seed < 0:
137
  seed = random.randint(0, 2**31 - 1)
138
 
139
+ generator = torch.Generator(device=DEVICE).manual_seed(seed)
 
 
 
 
140
 
141
+ start_time = time.time()
142
 
143
+ def callback(step, timestep, latents=None):
144
+ elapsed = time.time() - start_time
145
+ avg = elapsed / (step + 1)
146
+ remaining = avg * (steps - step - 1)
147
+ progress(
148
+ (step + 1) / steps,
149
+ desc=f"Step {step+1}/{steps} | ETA: {remaining:.1f}s"
150
+ )
151
 
152
+ try:
153
+ result = pipe(
 
154
  prompt=prompt,
155
+ negative_prompt=None,
156
  width=width,
157
  height=height,
158
  num_inference_steps=steps,
 
160
  generator=generator,
161
  callback=callback,
162
  callback_steps=1
163
+ )
164
+
165
+ image = result.images[0]
166
+ gc.collect()
167
+ return image, seed
168
+
169
+ except Exception as e:
170
  gc.collect()
171
+ raise gr.Error(f"Generation error: {e}")
172
 
173
+ # =====================================================
174
+ # πŸŽ› UI
175
+ # =====================================================
176
 
177
+ with gr.Blocks(title="Z-Image Turbo ULTRA CPU") as demo:
178
+ gr.Markdown("# ⚑ Z-Image Turbo β€” MAX CPU SURVIVAL MODE")
 
 
 
179
 
180
+ prompt = gr.Textbox(label="Prompt", lines=2)
 
 
181
 
182
+ with gr.Row():
183
+ width = gr.Slider(256, 640, 512, step=64)
184
+ height = gr.Slider(256, 640, 512, step=64)
185
 
186
+ steps = gr.Slider(1, 6, value=4, step=1)
187
+ seed = gr.Number(value=-1, precision=0)
188
 
189
+ btn = gr.Button("πŸš€ Generate")
190
+
191
+ output = gr.Image()
192
+ used_seed = gr.Number(label="Seed Used")
193
+
194
+ btn.click(
195
+ generate,
196
+ inputs=[prompt, width, height, steps, seed],
197
+ outputs=[output, used_seed]
198
+ )
199
 
200
+ demo.queue(concurrency_count=1, max_size=4)
201
+ demo.launch()