import os import random import threading import gradio as gr import torch from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel, GGUFQuantizationConfig from huggingface_hub import hf_hub_download from PIL import Image GGUF_REPO = "unsloth/FLUX.2-klein-base-4B-GGUF" BASE_REPO = "black-forest-labs/FLUX.2-klein-base-4B" GGUF_FILE = os.getenv("GGUF_FILE", "flux-2-klein-base-4b-Q2_K.gguf") MAX_SIDE = int(os.getenv("MAX_SIDE", "768")) DEFAULT_STEPS = int(os.getenv("DEFAULT_STEPS", "8")) DEFAULT_GUIDANCE = float(os.getenv("DEFAULT_GUIDANCE", "3.5")) _pipe = None _pipe_error = None _lock = threading.Lock() def _prepare_image(img: Image.Image) -> Image.Image: img = img.convert("RGB") w, h = img.size m = max(w, h) if m > MAX_SIDE: s = MAX_SIDE / m w = int(w * s) h = int(h * s) w = max(256, (w // 32) * 32) h = max(256, (h // 32) * 32) return img.resize((w, h), Image.Resampling.LANCZOS) def get_pipe() -> Flux2KleinPipeline: global _pipe, _pipe_error if _pipe_error is not None: raise RuntimeError(_pipe_error) if _pipe is None: with _lock: if _pipe is None: try: hf_token = os.getenv("HF_TOKEN") or os.getenv("HUGGINGFACEHUB_API_TOKEN") gguf_path = hf_hub_download(repo_id=GGUF_REPO, filename=GGUF_FILE) qconfig = GGUFQuantizationConfig(compute_dtype=torch.float32) transformer = Flux2Transformer2DModel.from_single_file( gguf_path, config=BASE_REPO, subfolder="transformer", token=hf_token, quantization_config=qconfig, torch_dtype=torch.float32, ) pipe = Flux2KleinPipeline.from_pretrained( BASE_REPO, transformer=transformer, token=hf_token, torch_dtype=torch.float32, ) pipe = pipe.to("cpu") pipe.set_progress_bar_config(disable=True) pipe.enable_attention_slicing() if hasattr(pipe, "enable_vae_slicing"): pipe.enable_vae_slicing() _pipe = pipe except Exception as e: _pipe_error = str(e) raise return _pipe def run_edit( image: Image.Image, prompt: str, steps: int = DEFAULT_STEPS, guidance: float = DEFAULT_GUIDANCE, seed: int = -1, ): if not prompt or not prompt.strip(): raise gr.Error("Escreve uma instrucao.") try: pipe = get_pipe() except Exception: raise gr.Error("Falha ao carregar FLUX.2-klein-base-4B-GGUF no CPU Basic.") src = _prepare_image(image) if image is not None else None if src is not None: width, height = src.size else: width, height = 768, 768 if seed < 0: seed = random.randint(0, 2**31 - 1) generator = torch.Generator(device="cpu").manual_seed(seed) try: out = pipe( prompt=prompt.strip(), image=src, height=height, width=width, num_inference_steps=max(1, int(steps if steps is not None else DEFAULT_STEPS)), guidance_scale=float(guidance if guidance is not None else DEFAULT_GUIDANCE), generator=generator, ).images[0] return out except Exception: raise gr.Error("Falha na geracao. Tenta imagem menor e menos steps.") with gr.Blocks() as demo: gr.Markdown("# FLUX.2-klein-base-4B-GGUF local (CPU Basic)") gr.Markdown("Modelo: unsloth/FLUX.2-klein-base-4B-GGUF (Q2_K por default).") with gr.Row(): inp = gr.Image(type="pil", label="Imagem (opcional)") out = gr.Image(type="pil", label="Resultado") prompt = gr.Textbox(lines=3, label="Instrucao") with gr.Row(): steps = gr.Slider(minimum=1, maximum=20, value=DEFAULT_STEPS, step=1, label="Steps") guidance = gr.Slider(minimum=1.0, maximum=8.0, value=DEFAULT_GUIDANCE, step=0.1, label="Guidance") seed = gr.Number(value=-1, label="Seed (-1 aleatorio)") run_btn = gr.Button("Gerar") run_btn.click(run_edit, inputs=[inp, prompt, steps, guidance, seed], outputs=out) if __name__ == "__main__": demo.queue(default_concurrency_limit=1).launch(show_error=True)