Spaces:
Running
Running
Use CPU-friendlier FLUX GGUF profile
Browse files
app.py
CHANGED
|
@@ -8,12 +8,10 @@ from PIL import Image, ImageDraw, ImageFont, ImageOps
|
|
| 8 |
|
| 9 |
|
| 10 |
MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
|
| 11 |
-
|
| 12 |
-
"https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main/"
|
| 13 |
-
"flux-2-klein-4b-Q4_K_M.gguf"
|
| 14 |
-
)
|
| 15 |
MAX_SEED = 2_147_483_647
|
| 16 |
-
|
|
|
|
| 17 |
MIN_GENERATION_EDGE = 256
|
| 18 |
SIZE_STEP = 32
|
| 19 |
|
|
@@ -50,6 +48,15 @@ def _dtype() -> torch.dtype:
|
|
| 50 |
return torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
| 51 |
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def get_pipeline() -> Flux2KleinPipeline:
|
| 54 |
global PIPELINE
|
| 55 |
|
|
@@ -62,7 +69,7 @@ def get_pipeline() -> Flux2KleinPipeline:
|
|
| 62 |
|
| 63 |
quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype())
|
| 64 |
transformer = Flux2Transformer2DModel.from_single_file(
|
| 65 |
-
|
| 66 |
config=MODEL_ID,
|
| 67 |
subfolder="transformer",
|
| 68 |
quantization_config=quantization_config,
|
|
@@ -75,10 +82,12 @@ def get_pipeline() -> Flux2KleinPipeline:
|
|
| 75 |
torch_dtype=_dtype(),
|
| 76 |
)
|
| 77 |
pipe.vae.enable_slicing()
|
|
|
|
| 78 |
|
| 79 |
if torch.cuda.is_available():
|
| 80 |
pipe.enable_model_cpu_offload()
|
| 81 |
else:
|
|
|
|
| 82 |
pipe.to("cpu")
|
| 83 |
|
| 84 |
pipe.set_progress_bar_config(disable=True)
|
|
@@ -93,7 +102,8 @@ def _round_to_step(value: int, step: int = SIZE_STEP) -> int:
|
|
| 93 |
def _generation_size(image: Image.Image) -> tuple[int, int]:
|
| 94 |
width, height = image.size
|
| 95 |
longest_edge = max(width, height)
|
| 96 |
-
|
|
|
|
| 97 |
|
| 98 |
resized_width = max(MIN_GENERATION_EDGE, int(width * scale))
|
| 99 |
resized_height = max(MIN_GENERATION_EDGE, int(height * scale))
|
|
@@ -101,8 +111,8 @@ def _generation_size(image: Image.Image) -> tuple[int, int]:
|
|
| 101 |
gen_width = _round_to_step(resized_width)
|
| 102 |
gen_height = _round_to_step(resized_height)
|
| 103 |
|
| 104 |
-
gen_width = max(MIN_GENERATION_EDGE, min(
|
| 105 |
-
gen_height = max(MIN_GENERATION_EDGE, min(
|
| 106 |
return gen_width, gen_height
|
| 107 |
|
| 108 |
|
|
@@ -212,6 +222,9 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 212 |
Upload a normal photo and get a side-by-side comparison:
|
| 213 |
the left panel stays untouched, the right panel is regenerated
|
| 214 |
in a BSOD, computers, robots, and industrial sci-fi style.
|
|
|
|
|
|
|
|
|
|
| 215 |
"""
|
| 216 |
)
|
| 217 |
|
|
@@ -251,7 +264,7 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 251 |
minimum=1,
|
| 252 |
maximum=50,
|
| 253 |
step=1,
|
| 254 |
-
value=
|
| 255 |
)
|
| 256 |
guidance_scale = gr.Slider(
|
| 257 |
label="Guidance scale",
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
|
| 11 |
+
GGUF_BASE_URL = "https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main"
|
|
|
|
|
|
|
|
|
|
| 12 |
MAX_SEED = 2_147_483_647
|
| 13 |
+
GPU_MAX_GENERATION_EDGE = 1024
|
| 14 |
+
CPU_MAX_GENERATION_EDGE = 512
|
| 15 |
MIN_GENERATION_EDGE = 256
|
| 16 |
SIZE_STEP = 32
|
| 17 |
|
|
|
|
| 48 |
return torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
| 49 |
|
| 50 |
|
| 51 |
+
def _gguf_url() -> str:
|
| 52 |
+
filename = "flux-2-klein-4b-Q4_K_M.gguf" if torch.cuda.is_available() else "flux-2-klein-4b-Q2_K.gguf"
|
| 53 |
+
return f"{GGUF_BASE_URL}/{filename}"
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _max_generation_edge() -> int:
|
| 57 |
+
return GPU_MAX_GENERATION_EDGE if torch.cuda.is_available() else CPU_MAX_GENERATION_EDGE
|
| 58 |
+
|
| 59 |
+
|
| 60 |
def get_pipeline() -> Flux2KleinPipeline:
|
| 61 |
global PIPELINE
|
| 62 |
|
|
|
|
| 69 |
|
| 70 |
quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype())
|
| 71 |
transformer = Flux2Transformer2DModel.from_single_file(
|
| 72 |
+
_gguf_url(),
|
| 73 |
config=MODEL_ID,
|
| 74 |
subfolder="transformer",
|
| 75 |
quantization_config=quantization_config,
|
|
|
|
| 82 |
torch_dtype=_dtype(),
|
| 83 |
)
|
| 84 |
pipe.vae.enable_slicing()
|
| 85 |
+
pipe.vae.enable_tiling()
|
| 86 |
|
| 87 |
if torch.cuda.is_available():
|
| 88 |
pipe.enable_model_cpu_offload()
|
| 89 |
else:
|
| 90 |
+
pipe.enable_attention_slicing()
|
| 91 |
pipe.to("cpu")
|
| 92 |
|
| 93 |
pipe.set_progress_bar_config(disable=True)
|
|
|
|
| 102 |
def _generation_size(image: Image.Image) -> tuple[int, int]:
|
| 103 |
width, height = image.size
|
| 104 |
longest_edge = max(width, height)
|
| 105 |
+
max_generation_edge = _max_generation_edge()
|
| 106 |
+
scale = min(1.0, max_generation_edge / longest_edge) if longest_edge else 1.0
|
| 107 |
|
| 108 |
resized_width = max(MIN_GENERATION_EDGE, int(width * scale))
|
| 109 |
resized_height = max(MIN_GENERATION_EDGE, int(height * scale))
|
|
|
|
| 111 |
gen_width = _round_to_step(resized_width)
|
| 112 |
gen_height = _round_to_step(resized_height)
|
| 113 |
|
| 114 |
+
gen_width = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_width))
|
| 115 |
+
gen_height = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_height))
|
| 116 |
return gen_width, gen_height
|
| 117 |
|
| 118 |
|
|
|
|
| 222 |
Upload a normal photo and get a side-by-side comparison:
|
| 223 |
the left panel stays untouched, the right panel is regenerated
|
| 224 |
in a BSOD, computers, robots, and industrial sci-fi style.
|
| 225 |
+
|
| 226 |
+
On free CPU hardware, generation uses a lighter quant and smaller
|
| 227 |
+
working size, so higher step counts can be slow.
|
| 228 |
"""
|
| 229 |
)
|
| 230 |
|
|
|
|
| 264 |
minimum=1,
|
| 265 |
maximum=50,
|
| 266 |
step=1,
|
| 267 |
+
value=4,
|
| 268 |
)
|
| 269 |
guidance_scale = gr.Slider(
|
| 270 |
label="Guidance scale",
|