"""FLUX.2 Klein 4B - Free CPU Space with dynamic LoRA search from HuggingFace Hub""" import os, time, gc, shutil from pathlib import Path from PIL import Image import requests as req # --------------------------------------------------------------------------- # Thread config (cgroup-aware) # --------------------------------------------------------------------------- def get_cpu_count() -> int: try: with open("/sys/fs/cgroup/cpu.max") as f: q, p = f.read().strip().split() if q != "max": return max(1, int(q) // int(p)) except Exception: pass try: with open("/sys/fs/cgroup/cpu/cpu.cfs_quota_us") as f: q = int(f.read().strip()) with open("/sys/fs/cgroup/cpu/cpu.cfs_period_us") as f: p = int(f.read().strip()) if q > 0: return max(1, q // p) except Exception: pass return max(1, os.cpu_count() or 2) N_THREADS = get_cpu_count() for k in ["OMP_NUM_THREADS", "OPENBLAS_NUM_THREADS", "MKL_NUM_THREADS"]: os.environ.setdefault(k, str(N_THREADS)) print(f"[init] CPU threads: {N_THREADS}") # --------------------------------------------------------------------------- # Model resolution # --------------------------------------------------------------------------- HF_CACHE = Path(os.environ.get("HF_HOME", Path.home() / ".cache" / "huggingface" / "hub")) def find_model(filename: str) -> str: for d in [Path("."), Path("models")]: if (d / filename).exists(): return str(d / filename) for p in HF_CACHE.rglob(filename): return str(p) raise FileNotFoundError(f"Not found: {filename}") # --------------------------------------------------------------------------- # Load base models # --------------------------------------------------------------------------- from huggingface_hub import hf_hub_download, list_repo_files from stable_diffusion_cpp import StableDiffusion DIFFUSION_FILE = "flux-2-klein-4b-Q4_K_M.gguf" LLM_FILE = "qwen3-4b-abl-q4_0.gguf" VAE_FILE = "flux2-vae.safetensors" print("[init] Locating models...") diffusion_path = find_model(DIFFUSION_FILE) vae_path = find_model(VAE_FILE) try: llm_path = find_model(LLM_FILE) except FileNotFoundError: print("[init] Downloading uncensored text encoder...") llm_path = hf_hub_download( repo_id="WeReCooking/flux2-klein-4B-uncensored-text-encoder", filename=LLM_FILE, ) print(f"[init] Diffusion: {diffusion_path}") print(f"[init] LLM: {llm_path}") print(f"[init] VAE: {vae_path}") # --------------------------------------------------------------------------- # LoRA management # --------------------------------------------------------------------------- LORA_DIR = "/tmp/loras" os.makedirs(LORA_DIR, exist_ok=True) DOWNLOADED_LORAS: dict[str, str] = {} def fetch_all_loras(query: str = "") -> list[str]: search = f"klein 4b {query}".strip() try: r = req.get("https://huggingface.co/api/models", params={ "search": search, "filter": "lora", "sort": "downloads", "direction": "-1", "limit": 50, }, timeout=10) r.raise_for_status() results = [] for m in r.json(): mid = m.get("id", "") tags = m.get("tags", []) if "lora" in tags or "lora" in mid.lower(): results.append(mid) return results if results else [] except Exception as e: print(f"[lora] Search error: {e}") return [] def download_lora(repo_id: str) -> tuple[str, str]: if not repo_id or repo_id.startswith("("): return "", "Select a LoRA first" try: files = list_repo_files(repo_id) sf_files = [f for f in files if f.endswith(".safetensors")] if not sf_files: return "", f"No .safetensors found in {repo_id}" target = sf_files[0] for f in sf_files: if "lora" in f.lower() or "adapter" in f.lower(): target = f break label = f"{repo_id}/{target}" lora_name = label.replace("/", "_").replace("-", "_").replace(".", "_") lora_name = lora_name.rsplit("_safetensors", 1)[0] lora_dst = os.path.join(LORA_DIR, f"{lora_name}.safetensors") if label in DOWNLOADED_LORAS: size_mb = os.path.getsize(lora_dst) / 1024**2 return label, f"Already cached ({size_mb:.0f} MB)" print(f"[lora] Downloading {repo_id}/{target}...") src = hf_hub_download(repo_id=repo_id, filename=target) shutil.copy2(src, lora_dst) size_mb = os.path.getsize(lora_dst) / 1024**2 DOWNLOADED_LORAS[label] = lora_name print(f"[lora] Downloaded: {label} ({size_mb:.0f} MB)") return label, f"Downloaded: {label} ({size_mb:.0f} MB)" except Exception as e: return "", f"Failed: {e}" # --------------------------------------------------------------------------- # Engine # --------------------------------------------------------------------------- SD_ENGINE = {"instance": None, "lora_state": None} def _reload_engine(): lora_files = set(os.listdir(LORA_DIR)) if os.path.exists(LORA_DIR) else set() state_key = frozenset(lora_files) if SD_ENGINE["instance"] is not None and SD_ENGINE["lora_state"] == state_key: return print(f"[engine] Loading (loras: {len(lora_files)})...") t0 = time.time() kwargs = dict( diffusion_model_path=diffusion_path, llm_path=llm_path, vae_path=vae_path, diffusion_flash_attn=True, n_threads=N_THREADS, verbose=True, ) if lora_files: kwargs["lora_model_dir"] = LORA_DIR SD_ENGINE["instance"] = StableDiffusion(**kwargs) SD_ENGINE["lora_state"] = state_key print(f"[engine] Loaded in {time.time()-t0:.1f}s") def get_engine(): if SD_ENGINE["instance"] is None: _reload_engine() return SD_ENGINE["instance"] _reload_engine() print("[init] Fetching Klein 4B LoRA catalog...") INITIAL_LORAS = fetch_all_loras("") print(f"[init] Found {len(INITIAL_LORAS)} LoRAs") # --------------------------------------------------------------------------- # Inference # --------------------------------------------------------------------------- RESOLUTIONS = ["512x512", "768x768", "1024x1024", "1024x768", "768x1024", "1024x576", "576x1024"] def parse_res(s): w, h = s.split("x") return int(w), int(h) def generate(prompt, ref_image, resolution, steps, seed, lora_strength, active_loras, progress=None): try: gc.collect() sd = get_engine() w, h = parse_res(resolution) steps, seed = int(steps), int(seed) if int(seed) >= 0 else -1 actual_prompt = prompt lora_tags = [] if active_loras: for label in active_loras: lora_name = DOWNLOADED_LORAS.get(label) if lora_name: actual_prompt = f' {actual_prompt}' lora_tags.append(label.split("/")[-1]) is_edit = ref_image is not None mode = "edit" if is_edit else "gen" print(f"[{mode}] {w}x{h} steps={steps} seed={seed} loras={lora_tags}") t0 = time.time() kwargs = dict(prompt=actual_prompt, width=w, height=h, sample_steps=steps, cfg_scale=1.0, seed=seed) if is_edit: kwargs["ref_images"] = [ref_image] images = sd.generate_image(**kwargs) elapsed = time.time() - t0 lora_info = f" +{len(lora_tags)} LoRA(s)" if lora_tags else "" edit_info = " [edit]" if is_edit else "" status = f"{elapsed:.1f}s | {w}x{h}, {steps} steps, seed {seed}{lora_info}{edit_info}" print(f"[{mode}] {status}") return (images[0] if images else None), status except Exception as e: import traceback; traceback.print_exc() return None, f"Error: {e}" # --------------------------------------------------------------------------- # Gradio UI # --------------------------------------------------------------------------- import gradio as gr with gr.Blocks(theme="NoCrypt/miku", title="FLUX.2 Klein 4B CPU") as demo: gr.Markdown( "# FLUX.2 Klein 4B / Free CPU\n" "Type a prompt to generate. Upload a reference image to edit it instead. " "Expect **15-30 min** per image at 512x512 on free CPU." ) with gr.Row(): with gr.Column(scale=1): prompt = gr.Textbox(label="Prompt", lines=3, placeholder="Describe what to generate or edit...") ref_image = gr.Image(label="Reference Image (optional, for editing)", type="pil") resolution = gr.Dropdown(choices=RESOLUTIONS, value="512x512", label="Resolution") with gr.Row(): steps = gr.Slider(2, 8, value=4, step=1, label="Steps", scale=1) seed = gr.Number(value=-1, label="Seed", precision=0, scale=1) lora_strength = gr.Slider(0.1, 1.5, value=0.8, step=0.05, label="LoRA str", scale=1) with gr.Accordion("LoRA (search Klein 4B LoRAs on HuggingFace)", open=False): lora_search = gr.Dropdown( choices=INITIAL_LORAS, value=None, label="Search LoRA repos (type to filter, select to download)", filterable=True, allow_custom_value=True, interactive=True, ) lora_status = gr.Textbox(label="Status", interactive=False, value="No LoRA active") active_loras = gr.Dropdown( choices=[], value=[], multiselect=True, interactive=True, label="Active LoRAs (click X to remove)", ) gen_btn = gr.Button("Generate / Edit", variant="primary", size="lg") with gr.Column(scale=1): output_image = gr.Image(label="Output", type="pil") status_text = gr.Textbox(label="Status", interactive=False) def on_search_type(query): if not query or query in INITIAL_LORAS: return gr.update(choices=INITIAL_LORAS) results = fetch_all_loras(query) return gr.update(choices=results if results else INITIAL_LORAS) def on_lora_select(repo_id, current_active): if not repo_id or repo_id.startswith("("): return current_active or [], "Select a LoRA", gr.update() label, status_msg = download_lora(repo_id) if not label: return current_active or [], status_msg, gr.update() _reload_engine() active = list(current_active) if current_active else [] if label not in active: active.append(label) all_downloaded = list(DOWNLOADED_LORAS.keys()) return gr.update(choices=all_downloaded, value=active), status_msg, gr.update(value=None) lora_search.input(fn=on_search_type, inputs=[lora_search], outputs=[lora_search]) lora_search.select(fn=on_lora_select, inputs=[lora_search, active_loras], outputs=[active_loras, lora_status, lora_search]) gen_btn.click(fn=generate, inputs=[prompt, ref_image, resolution, steps, seed, lora_strength, active_loras], outputs=[output_image, status_text]) gr.Markdown("---\nsd.cpp Q4_K_M | Uncensored encoder | " "[BFL](https://bfl.ai/models/flux-2-klein) | [sd.cpp](https://github.com/leejet/stable-diffusion.cpp) | " "[Browse LoRAs](https://huggingface.co/models?search=klein+4b&filter=lora)") demo.queue().launch(ssr_mode=False, show_error=True)