Spaces:

Levaser
/

Make-it-bsod

Running

File size: 9,497 Bytes

import random
import threading
import traceback

import gradio as gr
import torch
from diffusers import Flux2KleinPipeline, Flux2Transformer2DModel, GGUFQuantizationConfig
from PIL import Image, ImageDraw, ImageFont, ImageOps


MODEL_ID = "black-forest-labs/FLUX.2-klein-4B"
GGUF_BASE_URL = "https://huggingface.co/unsloth/FLUX.2-klein-4B-GGUF/resolve/main"
MAX_SEED = 2_147_483_647
GPU_MAX_GENERATION_EDGE = 1024
CPU_MAX_GENERATION_EDGE = 512
MIN_GENERATION_EDGE = 256
SIZE_STEP = 32

PIPELINE = None
PIPELINE_LOCK = threading.Lock()

BSOD_PROMPT = (
    "Transform the reference photo into a BSOD-inspired scene. "
    "Keep the main subject recognizable and preserve the overall composition. "
    "Use blue-screen-of-death aesthetics, computer hardware, machines, robots, "
    "cybernetic details, metallic structures, monitor glow, motherboard patterns, "
    "industrial sci-fi atmosphere, neon blue diagnostics, clean high detail."
)

CSS = """
.app-shell {
    max-width: 1080px;
    margin: 0 auto;
}
.hero {
    padding: 8px 0 20px;
}
.hero h1 {
    margin-bottom: 8px;
}
"""


def _device() -> str:
    return "cuda" if torch.cuda.is_available() else "cpu"


def _dtype() -> torch.dtype:
    return torch.bfloat16 if torch.cuda.is_available() else torch.float32


def _gguf_url() -> str:
    filename = "flux-2-klein-4b-Q4_K_M.gguf" if torch.cuda.is_available() else "flux-2-klein-4b-Q2_K.gguf"
    return f"{GGUF_BASE_URL}/{filename}"


def _max_generation_edge() -> int:
    return GPU_MAX_GENERATION_EDGE if torch.cuda.is_available() else CPU_MAX_GENERATION_EDGE


def get_pipeline() -> Flux2KleinPipeline:
    global PIPELINE

    if PIPELINE is not None:
        return PIPELINE

    with PIPELINE_LOCK:
        if PIPELINE is not None:
            return PIPELINE

        quantization_config = GGUFQuantizationConfig(compute_dtype=_dtype())
        transformer = Flux2Transformer2DModel.from_single_file(
            _gguf_url(),
            config=MODEL_ID,
            subfolder="transformer",
            quantization_config=quantization_config,
            torch_dtype=_dtype(),
        )

        pipe = Flux2KleinPipeline.from_pretrained(
            MODEL_ID,
            transformer=transformer,
            torch_dtype=_dtype(),
        )
        pipe.vae.enable_slicing()
        pipe.vae.enable_tiling()

        if torch.cuda.is_available():
            pipe.enable_model_cpu_offload()
        else:
            pipe.enable_attention_slicing()
            pipe.to("cpu")

        pipe.set_progress_bar_config(disable=True)
        PIPELINE = pipe
        return PIPELINE


def _round_to_step(value: int, step: int = SIZE_STEP) -> int:
    return max(step, int(round(value / step) * step))


def _generation_size(image: Image.Image) -> tuple[int, int]:
    width, height = image.size
    longest_edge = max(width, height)
    max_generation_edge = _max_generation_edge()
    scale = min(1.0, max_generation_edge / longest_edge) if longest_edge else 1.0

    resized_width = max(MIN_GENERATION_EDGE, int(width * scale))
    resized_height = max(MIN_GENERATION_EDGE, int(height * scale))

    gen_width = _round_to_step(resized_width)
    gen_height = _round_to_step(resized_height)

    gen_width = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_width))
    gen_height = max(MIN_GENERATION_EDGE, min(max_generation_edge, gen_height))
    return gen_width, gen_height


def _resize_for_model(image: Image.Image, width: int, height: int) -> Image.Image:
    return image.resize((width, height), Image.Resampling.LANCZOS)


def _label_font() -> ImageFont.ImageFont | ImageFont.FreeTypeFont:
    for font_name in ("DejaVuSans-Bold.ttf", "Arial.ttf"):
        try:
            return ImageFont.truetype(font_name, 36)
        except OSError:
            continue
    return ImageFont.load_default()


def _compose_comparison(original: Image.Image, bsod: Image.Image) -> Image.Image:
    pad = 28
    gap = 24
    header_height = 74
    bg_color = (10, 16, 30)
    panel_color = (18, 30, 54)
    text_color = (223, 236, 255)

    left_w, left_h = original.size
    right_w, right_h = bsod.size
    panel_height = max(left_h, right_h)

    total_width = pad * 2 + left_w + right_w + gap
    total_height = pad * 2 + header_height + panel_height
    canvas = Image.new("RGB", (total_width, total_height), bg_color)
    draw = ImageDraw.Draw(canvas)
    font = _label_font()

    left_panel = (pad, pad + header_height, pad + left_w, pad + header_height + panel_height)
    right_panel = (
        pad + left_w + gap,
        pad + header_height,
        pad + left_w + gap + right_w,
        pad + header_height + panel_height,
    )

    draw.rounded_rectangle(left_panel, radius=20, fill=panel_color)
    draw.rounded_rectangle(right_panel, radius=20, fill=panel_color)

    left_text_x = pad + 16
    right_text_x = pad + left_w + gap + 16
    text_y = pad + 18
    draw.text((left_text_x, text_y), "original", fill=text_color, font=font)
    draw.text((right_text_x, text_y), "bsod", fill=text_color, font=font)

    left_y = pad + header_height + (panel_height - left_h) // 2
    right_y = pad + header_height + (panel_height - right_h) // 2

    canvas.paste(original, (pad, left_y))
    canvas.paste(bsod, (pad + left_w + gap, right_y))
    return canvas


def infer(
    input_image: Image.Image,
    extra_prompt: str,
    seed: int,
    randomize_seed: bool,
    num_inference_steps: int,
    guidance_scale: float,
    progress=gr.Progress(track_tqdm=True),
):
    if input_image is None:
        raise gr.Error("Upload a source image first.")

    try:
        original = ImageOps.exif_transpose(input_image).convert("RGB")
        width, height = _generation_size(original)
        conditioning = _resize_for_model(original, width, height)

        if randomize_seed:
            seed = random.randint(0, MAX_SEED)

        prompt = BSOD_PROMPT
        if extra_prompt and extra_prompt.strip():
            prompt = f"{prompt} Extra instructions: {extra_prompt.strip()}"

        pipe = get_pipeline()
        generator_device = "cuda" if torch.cuda.is_available() else "cpu"
        generator = torch.Generator(device=generator_device).manual_seed(int(seed))

        result = pipe(
            prompt=prompt,
            image=conditioning,
            width=width,
            height=height,
            guidance_scale=guidance_scale,
            num_inference_steps=int(num_inference_steps),
            generator=generator,
        ).images[0]

        comparison = _compose_comparison(original, result)
        return comparison, result, seed
    except Exception as exc:
        print(traceback.format_exc(), flush=True)
        raise gr.Error(f"{type(exc).__name__}: {exc}") from exc


with gr.Blocks(css=CSS) as demo:
    with gr.Column(elem_classes=["app-shell"]):
        with gr.Column(elem_classes=["hero"]):
            gr.Markdown(
                """
                # Make It BSOD
                Upload a normal photo and get a side-by-side comparison:
                the left panel stays untouched, the right panel is regenerated
                in a BSOD, computers, robots, and industrial sci-fi style.

                On free CPU hardware, generation uses a lighter quant and smaller
                working size, so higher step counts can be slow.
                """
            )

        with gr.Row():
            input_image = gr.Image(
                label="Original photo",
                type="pil",
                image_mode="RGB",
            )
            comparison_image = gr.Image(
                label="Comparison",
                type="pil",
            )

        with gr.Row():
            extra_prompt = gr.Textbox(
                label="Extra style instructions",
                placeholder="Optional: chrome limbs, server room, broken CRTs, robot swarm...",
                lines=2,
            )
            stylized_image = gr.Image(
                label="BSOD only",
                type="pil",
            )

        with gr.Accordion("Generation settings", open=False):
            seed = gr.Slider(
                label="Seed",
                minimum=0,
                maximum=MAX_SEED,
                step=1,
                value=0,
            )
            randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
            num_inference_steps = gr.Slider(
                label="Steps",
                minimum=1,
                maximum=50,
                step=1,
                value=4,
            )
            guidance_scale = gr.Slider(
                label="Guidance scale",
                minimum=1.0,
                maximum=10.0,
                step=0.1,
                value=4.0,
            )

        run_button = gr.Button("Make it BSOD", variant="primary")

        gr.Examples(
            examples=[
                ["cold blue datacenter, mechanical arms, diagnostic overlays"],
                ["retro windows crash screen, motherboard textures, chrome robot face"],
                ["factory machines, server racks, terminal glow, cybernetic details"],
            ],
            inputs=[extra_prompt],
        )

    run_button.click(
        fn=infer,
        inputs=[
            input_image,
            extra_prompt,
            seed,
            randomize_seed,
            num_inference_steps,
            guidance_scale,
        ],
        outputs=[comparison_image, stylized_image, seed],
    )


if __name__ == "__main__":
    demo.launch()