Levaser commited on
Commit
4743b2e
·
verified ·
1 Parent(s): 583baf3

Use CPU-friendlier FLUX GGUF profile

Browse files
Files changed (1) hide show
  1. app.py +23 -10
app.py CHANGED
@@ -8,12 +8,10 @@ from PIL import Image, ImageDraw, ImageFont, ImageOps
8
 
9
 
10
  MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
11
- GGUF_URL = (
12
- "https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main/"
13
- "flux-2-klein-4b-Q4_K_M.gguf"
14
- )
15
  MAX_SEED = 2_147_483_647
16
- MAX_GENERATION_EDGE = 1024
 
17
  MIN_GENERATION_EDGE = 256
18
  SIZE_STEP = 32
19
 
@@ -50,6 +48,15 @@ def _dtype() -> torch.dtype:
50
  return torch.bfloat16 if torch.cuda.is_available() else torch.float32
51
 
52
 
 
 
 
 
 
 
 
 
 
53
  def get_pipeline() -> Flux2KleinPipeline:
54
  global PIPELINE
55
 
@@ -62,7 +69,7 @@ def get_pipeline() -> Flux2KleinPipeline:
62
 
63
  quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype())
64
  transformer = Flux2Transformer2DModel.from_single_file(
65
- GGUF_URL,
66
  config=MODEL_ID,
67
  subfolder="transformer",
68
  quantization_config=quantization_config,
@@ -75,10 +82,12 @@ def get_pipeline() -> Flux2KleinPipeline:
75
  torch_dtype=_dtype(),
76
  )
77
  pipe.vae.enable_slicing()
 
78
 
79
  if torch.cuda.is_available():
80
  pipe.enable_model_cpu_offload()
81
  else:
 
82
  pipe.to("cpu")
83
 
84
  pipe.set_progress_bar_config(disable=True)
@@ -93,7 +102,8 @@ def _round_to_step(value: int, step: int = SIZE_STEP) -> int:
93
  def _generation_size(image: Image.Image) -> tuple[int, int]:
94
  width, height = image.size
95
  longest_edge = max(width, height)
96
- scale = min(1.0, MAX_GENERATION_EDGE / longest_edge) if longest_edge else 1.0
 
97
 
98
  resized_width = max(MIN_GENERATION_EDGE, int(width * scale))
99
  resized_height = max(MIN_GENERATION_EDGE, int(height * scale))
@@ -101,8 +111,8 @@ def _generation_size(image: Image.Image) -> tuple[int, int]:
101
  gen_width = _round_to_step(resized_width)
102
  gen_height = _round_to_step(resized_height)
103
 
104
- gen_width = max(MIN_GENERATION_EDGE, min(MAX_GENERATION_EDGE, gen_width))
105
- gen_height = max(MIN_GENERATION_EDGE, min(MAX_GENERATION_EDGE, gen_height))
106
  return gen_width, gen_height
107
 
108
 
@@ -212,6 +222,9 @@ with gr.Blocks(css=CSS) as demo:
212
  Upload a normal photo and get a side-by-side comparison:
213
  the left panel stays untouched, the right panel is regenerated
214
  in a BSOD, computers, robots, and industrial sci-fi style.
 
 
 
215
  """
216
  )
217
 
@@ -251,7 +264,7 @@ with gr.Blocks(css=CSS) as demo:
251
  minimum=1,
252
  maximum=50,
253
  step=1,
254
- value=12,
255
  )
256
  guidance_scale = gr.Slider(
257
  label="Guidance scale",
 
8
 
9
 
10
  MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
11
+ GGUF_BASE_URL = "https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main"
 
 
 
12
  MAX_SEED = 2_147_483_647
13
+ GPU_MAX_GENERATION_EDGE = 1024
14
+ CPU_MAX_GENERATION_EDGE = 512
15
  MIN_GENERATION_EDGE = 256
16
  SIZE_STEP = 32
17
 
 
48
  return torch.bfloat16 if torch.cuda.is_available() else torch.float32
49
 
50
 
51
+ def _gguf_url() -> str:
52
+ filename = "flux-2-klein-4b-Q4_K_M.gguf" if torch.cuda.is_available() else "flux-2-klein-4b-Q2_K.gguf"
53
+ return f"{GGUF_BASE_URL}/{filename}"
54
+
55
+
56
+ def _max_generation_edge() -> int:
57
+ return GPU_MAX_GENERATION_EDGE if torch.cuda.is_available() else CPU_MAX_GENERATION_EDGE
58
+
59
+
60
  def get_pipeline() -> Flux2KleinPipeline:
61
  global PIPELINE
62
 
 
69
 
70
  quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype())
71
  transformer = Flux2Transformer2DModel.from_single_file(
72
+ _gguf_url(),
73
  config=MODEL_ID,
74
  subfolder="transformer",
75
  quantization_config=quantization_config,
 
82
  torch_dtype=_dtype(),
83
  )
84
  pipe.vae.enable_slicing()
85
+ pipe.vae.enable_tiling()
86
 
87
  if torch.cuda.is_available():
88
  pipe.enable_model_cpu_offload()
89
  else:
90
+ pipe.enable_attention_slicing()
91
  pipe.to("cpu")
92
 
93
  pipe.set_progress_bar_config(disable=True)
 
102
  def _generation_size(image: Image.Image) -> tuple[int, int]:
103
  width, height = image.size
104
  longest_edge = max(width, height)
105
+ max_generation_edge = _max_generation_edge()
106
+ scale = min(1.0, max_generation_edge / longest_edge) if longest_edge else 1.0
107
 
108
  resized_width = max(MIN_GENERATION_EDGE, int(width * scale))
109
  resized_height = max(MIN_GENERATION_EDGE, int(height * scale))
 
111
  gen_width = _round_to_step(resized_width)
112
  gen_height = _round_to_step(resized_height)
113
 
114
+ gen_width = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_width))
115
+ gen_height = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_height))
116
  return gen_width, gen_height
117
 
118
 
 
222
  Upload a normal photo and get a side-by-side comparison:
223
  the left panel stays untouched, the right panel is regenerated
224
  in a BSOD, computers, robots, and industrial sci-fi style.
225
+
226
+ On free CPU hardware, generation uses a lighter quant and smaller
227
+ working size, so higher step counts can be slow.
228
  """
229
  )
230
 
 
264
  minimum=1,
265
  maximum=50,
266
  step=1,
267
+ value=4,
268
  )
269
  guidance_scale = gr.Slider(
270
  label="Guidance scale",