Spaces:

Cascade-Inc
/

Ad_gen

Paused

App Files Files Community

Flulike99 commited on Nov 5, 2025

Commit

81bf056

1 Parent(s): 51935a3

init

Browse files

Files changed (22) hide show

.gitattributes copy +35 -0
README copy.md +13 -0
app.py +420 -0
flux/__init__.py +0 -0
flux/__pycache__/__init__.cpython-312.pyc +0 -0
flux/__pycache__/block.cpython-312.pyc +0 -0
flux/__pycache__/condition.cpython-312.pyc +0 -0
flux/__pycache__/generate.cpython-312.pyc +0 -0
flux/__pycache__/lora_controller.cpython-312.pyc +0 -0
flux/__pycache__/padding_orthogonalization.cpython-312.pyc +0 -0
flux/__pycache__/pipeline_tools.cpython-312.pyc +0 -0
flux/__pycache__/transformer.cpython-312.pyc +0 -0
flux/block.py +339 -0
flux/condition.py +138 -0
flux/generate.py +366 -0
flux/lora_controller.py +77 -0
flux/padding_orthogonalization.py +252 -0
flux/pipeline_tools.py +80 -0
flux/transformer.py +252 -0
pyproject.toml +66 -0
requirements.txt +938 -0
uv.lock +0 -0

.gitattributes copy ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README copy.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Ads Ap
+emoji: 📉
+colorFrom: indigo
+colorTo: purple
+sdk: gradio
+sdk_version: 5.49.1
+app_file: app.py
+pinned: false
+short_description: Advertisement generation
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,420 @@

+import os
+import sys
+from typing import Union, Any, Optional
+import gradio as gr
+import numpy as np
+import torch
+from PIL import Image
+import spaces
+# 添加项目根目录到Python路径
+project_root = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(project_root)
+hf_token = os.environ.get("CASCADE_PRIVATE_MODEL_HF_TOKEN")
+secret_model = os.environ.get("MODEL_PATH")
+try:
+    from diffusers import FluxTransformer2DModel
+    from diffusers.pipelines import FluxPipeline
+    from flux.condition import Condition
+    from flux.generate import generate
+    from flux.lora_controller import set_lora_scale
+    FLUX_AVAILABLE = True
+except ImportError as e:
+    print(f"Warning: FLUX components not available: {e}")
+    FLUX_AVAILABLE = False
+from huggingface_hub import hf_hub_download
+from safetensors.torch import load_file
+# 認証トークンを使ってファイルをダウンロード
+model_path = hf_hub_download(
+    repo_id="spaces/Cascade-Inc/private_model",
+    filename=secret_model,
+    token=hf_token,
+    repo_type="space"
+)
+# Get temp directory
+temp_dir = os.path.join(os.path.expanduser("~"), "gradio_temp")
+os.makedirs(temp_dir, exist_ok=True)
+os.environ["GRADIO_TEMP_DIR"] = temp_dir
+# Global state
+pipe: Union[FluxPipeline, None] = None
+use_int8 = False
+ADAPTER_NAME = "subject"
+MODEL_PATH = model_path
+def get_gpu_memory_gb() -> float:
+    return torch.cuda.get_device_properties(0).total_memory / 1024**3
+def init_pipeline_if_needed():
+    global pipe
+    if pipe is not None:
+        return
+    if use_int8 or get_gpu_memory_gb() < 33:
+        transformer_model = FluxTransformer2DModel.from_pretrained(
+            "sayakpaul/flux.1-schell-int8wo-improved",
+            torch_dtype=torch.bfloat16,
+            use_safetensors=False,
+        )
+        _pipe = FluxPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-schnell",
+            transformer=transformer_model,
+            torch_dtype=torch.bfloat16,
+        )
+    else:
+        _pipe = FluxPipeline.from_pretrained(
+            "black-forest-labs/FLUX.1-schnell", torch_dtype=torch.bfloat16
+        )
+    _pipe = _pipe.to("cuda")
+    _pipe.load_lora_weights(MODEL_PATH, adapter_name=ADAPTER_NAME)
+    _pipe.set_adapters([ADAPTER_NAME])
+    pipe = _pipe
+def _to_pil_rgba(img: Any) -> Image.Image:
+    """Convert various inputs to PIL RGBA image"""
+    pil: Optional[Image.Image] = None
+    if isinstance(img, Image.Image):
+        pil = img
+    elif isinstance(img, np.ndarray):
+        pil = Image.fromarray(img)
+    elif isinstance(img, str) and os.path.exists(img):
+        pil = Image.open(img)
+    else:
+        raise ValueError("Unsupported image type")
+    if pil.mode != "RGBA":
+        pil = pil.convert("RGBA")
+    return pil
+def _place_subject_on_canvas(
+    subject_rgba: Image.Image,
+    canvas_size: int,
+    style: str,
+    base_coverage: float = 0.7,
+) -> Image.Image:
+    """
+    Place subject on transparent canvas with position and angle adjustments based on style
+    """
+    canvas = Image.new("RGBA", (canvas_size, canvas_size), (0, 0, 0, 0))
+    # Define three styles
+    styles = {
+        "center": {"scale": 1.0, "rotation": 0, "pos": (0.0, 0.0)},
+        "tilt_left": {"scale": 0.95, "rotation": -15, "pos": (-0.1, 0.0)},
+        "right": {"scale": 0.95, "rotation": 0, "pos": (0.25, 0.0)},
+    }
+    if style not in styles:
+        style = "center"
+    style_config = styles[style]
+    # Calculate scaling
+    subject_w, subject_h = subject_rgba.size
+    max_dim = max(subject_w, subject_h)
+    desired_max_dim = max(1, int(canvas_size * base_coverage * style_config["scale"]))
+    scale = desired_max_dim / max(1, max_dim)
+    new_w = max(1, int(subject_w * scale))
+    new_h = max(1, int(subject_h * scale))
+    resized = subject_rgba.resize((new_w, new_h), Image.LANCZOS)
+    # Rotation
+    rotated = resized.rotate(style_config["rotation"], expand=True, resample=Image.BICUBIC)
+    rw, rh = rotated.size
+    # Positioning
+    cx = canvas_size // 2
+    cy = canvas_size // 2
+    dx = int(style_config["pos"][0] * canvas_size)
+    dy = int(style_config["pos"][1] * canvas_size)
+    paste_x = int(cx + dx - rw // 2)
+    paste_y = int(cy + dy - rh // 2)
+    canvas.alpha_composite(rotated, dest=(paste_x, paste_y))
+    return canvas
+def _place_subject_on_canvas_rect(
+    subject_rgba: Image.Image,
+    canvas_width: int,
+    canvas_height: int,
+    style: str,
+    base_coverage: float = 0.7,
+) -> Image.Image:
+    """
+    Place subject on rectangular transparent canvas with position and angle adjustments based on style
+    """
+    canvas = Image.new("RGBA", (canvas_width, canvas_height), (0, 0, 0, 0))
+    # Define three styles
+    styles = {
+        "center": {"scale": 1.0, "rotation": 0, "pos": (0.0, 0.0)},
+        "tilt_left": {"scale": 0.95, "rotation": -15, "pos": (-0.1, 0.0)},
+        "right": {"scale": 0.95, "rotation": 0, "pos": (0.25, 0.0)},
+    }
+    if style not in styles:
+        style = "center"
+    style_config = styles[style]
+    # Calculate scaling based on smaller dimension
+    subject_w, subject_h = subject_rgba.size
+    max_dim = max(subject_w, subject_h)
+    canvas_min_dim = min(canvas_width, canvas_height)
+    desired_max_dim = max(1, int(canvas_min_dim * base_coverage * style_config["scale"]))
+    scale = desired_max_dim / max(1, max_dim)
+    new_w = max(1, int(subject_w * scale))
+    new_h = max(1, int(subject_h * scale))
+    resized = subject_rgba.resize((new_w, new_h), Image.LANCZOS)
+    # Rotation
+    rotated = resized.rotate(style_config["rotation"], expand=True, resample=Image.BICUBIC)
+    rw, rh = rotated.size
+    # Positioning
+    cx = canvas_width // 2
+    cy = canvas_height // 2
+    dx = int(style_config["pos"][0] * canvas_width)
+    dy = int(style_config["pos"][1] * canvas_height)
+    paste_x = int(cx + dx - rw // 2)
+    paste_y = int(cy + dy - rh // 2)
+    canvas.alpha_composite(rotated, dest=(paste_x, paste_y))
+    return canvas
+def apply_style(image: Image.Image, style: str, width: int = 1024, height: int = 1024) -> Image.Image:
+    """Apply specified style to image with custom dimensions"""
+    if image is None:
+        # Create default transparent image
+        image = Image.new("RGBA", (512, 512), (255, 255, 255, 0))
+    # Ensure image is in RGBA format
+    if image.mode != "RGBA":
+        image = image.convert("RGBA")
+    # Apply style with custom dimensions
+    styled_image = _place_subject_on_canvas_rect(image, width, height, style)
+    return styled_image
+def generate_background_local(styled_image: Image.Image, prompt: str, steps: int = 10, width: int = 1024, height: int = 1024) -> Image.Image:
+    """Generate background using local FLUX model"""
+    if not FLUX_AVAILABLE:
+        # Return a simple gradient background if FLUX is not available
+        if styled_image is None:
+            return Image.new("RGB", (width, height), (200, 200, 255))
+        # Create a simple colored background
+        bg = Image.new("RGB", (width, height), (200, 220, 255))
+        if styled_image.mode == "RGBA":
+            bg.paste(styled_image, (0, 0), styled_image)
+        else:
+            bg.paste(styled_image, (0, 0))
+        return bg
+    init_pipeline_if_needed()
+    if styled_image is None:
+        return Image.new("RGB", (width, height), (255, 255, 255))
+    # Convert to RGB for background generation
+    img_rgb = styled_image.convert("RGB")
+    condition = Condition(ADAPTER_NAME, img_rgb, position_delta=(0, 0))
+    # Enable padding token orthogonalization for enhanced text-image alignment
+    model_config = {
+        'padding_orthogonalization_enabled': True,
+        'preserve_norm': True,
+        'orthogonalize_all_tokens': False,
+    }
+    with set_lora_scale([ADAPTER_NAME], scale=3.0):
+        result_img = generate(
+            pipe,
+            model_config=model_config,
+            prompt=prompt.strip() if prompt else "",
+            conditions=[condition],
+            num_inference_steps=steps,
+            height=height,
+            width=width,
+            default_lora=True,
+        ).images[0]
+    return result_img
+@spaces.GPU
+# Gradio Interface
+def create_simple_app():
+    # Example prompts for reference
+    example_prompts = [
+        {
+            "title": "Handcrafted Leather Wallet",
+            "prompt": "A hand-stitched, dark brown leather wallet lies half-open on a wooden desk with a map, next to a brass pen and compass. A stack of classic books is in the background. A warm desk lamp from the right highlights the leather texture. Classic, rustic style."
+        },
+        {
+            "title": "Sparkling Water with Fresh Lemons",
+            "prompt": "A dewy glass bottle of sparkling water on a white marble countertop, next to a sliced lemon and ice cubes. The background is a soft-focus, pale blue gradient. Lighting is bright, even, and cool-toned from above. Clean, crisp, minimalist style."
+        },
+        {
+            "title": "High-tech Smartwatch",
+            "prompt": "A titanium smartwatch with an illuminated screen rests on a black matte slate rock. The background is a blurred cityscape at night with neon bokeh. A sharp, direct light from the top-left highlights the watch's metallic edge. Futuristic, tech-focused style."
+        },
+        {
+            "title": "Japanese Ramen Bowl",
+            "prompt": "A ceramic bowl of tonkotsu ramen with chashu pork and a soft-boiled egg on a wooden table, with chopsticks beside it. Rising steam is caught in soft overhead light. The background is a blurred, cozy izakaya. Warm, authentic, and appetizing style."
+        },
+        {
+            "title": "Japanese Peach Iced Tea",
+            "prompt": "A bottle of Japanese peach iced tea beside a tall glass with tea and sparkling ice cubes. The background is a soft, warm peach and beige gradient. Lit with bright, soft light to appear crisp and refreshing. The style is clean, minimalist, and refined."
+        }
+    ]
+    with gr.Blocks(title="Ads Background Generation") as app:
+        gr.Markdown("# Ads Background Generation App")
+        gr.Markdown("Upload an image with transparent background → Enter prompt → Generate")
+        # Example Prompts Section
+        with gr.Accordion("📝 Example Prompts (Click to expand)", open=False):
+            gr.Markdown("### Background Prompt Examples")
+            gr.Markdown("Click any example below to copy it to the background description field:")
+            # Create example buttons
+            example_buttons = []
+            with gr.Row():
+                for i, example in enumerate(example_prompts):
+                    if i < 3:  # First row
+                        example_btn = gr.Button(
+                            f"📋 {example['title']}",
+                            variant="secondary",
+                            size="sm"
+                        )
+                        example_buttons.append(example_btn)
+            with gr.Row():
+                for i, example in enumerate(example_prompts):
+                    if i >= 3:  # Second row
+                        example_btn = gr.Button(
+                            f"📋 {example['title']}",
+                            variant="secondary",
+                            size="sm"
+                        )
+                        example_buttons.append(example_btn)
+            # Display area for selected prompt preview
+            selected_prompt_display = gr.Textbox(
+                label="Selected Prompt Preview",
+                lines=4,
+                max_lines=8,
+                interactive=False,
+                visible=False
+            )
+        with gr.Row():
+            # Left column
+            with gr.Column(scale=1):
+                # Image upload (top left)
+                input_image = gr.Image(
+                    label="Upload Image (Transparent Background)",
+                    type="pil",
+                    format="png",
+                    image_mode="RGBA",
+                    height=350
+                )
+                # Image dimensions
+                with gr.Row():
+                    img_width = gr.Number(
+                        value=1024,
+                        label="Width",
+                        precision=0,
+                        minimum=256,
+                        maximum=2048
+                    )
+                    img_height = gr.Number(
+                        value=1024,
+                        label="Height",
+                        precision=0,
+                        minimum=256,
+                        maximum=2048
+                    )
+                # Background prompt (bottom left)
+                bg_prompt = gr.Textbox(
+                    label="Background Description",
+                    placeholder="e.g.: Forest scene, soft lighting",
+                    lines=3
+                )
+                # Generation steps
+                steps_slider = gr.Slider(
+                    minimum=5,
+                    maximum=20,
+                    value=10,
+                    step=1,
+                    label="Generation Steps"
+                )
+                # Generate background button
+                generate_bg_btn = gr.Button("Generate Background", variant="primary", size="lg")
+            # Right column - Result display
+            with gr.Column(scale=1):
+                final_result = gr.Image(
+                    label="Generated Result",
+                    type="pil",
+                    format="png",
+                    height=700
+                )
+        # Generate background directly from input image
+        def generate_from_input(image, prompt, steps, width, height):
+            if image is None:
+                return None
+            # Ensure image is RGBA
+            if image.mode != "RGBA":
+                image = image.convert("RGBA")
+            # Generate background using local model only
+            return generate_background_local(image, prompt, steps, width, height)
+        # Event binding
+        generate_bg_btn.click(
+            fn=generate_from_input,
+            inputs=[input_image, bg_prompt, steps_slider, img_width, img_height],
+            outputs=[final_result]
+        )
+        # Example prompt button handlers
+        def create_example_handler(prompt_text):
+            def handler():
+                return prompt_text, gr.update(value=prompt_text, visible=True)
+            return handler
+        # Connect example buttons to background prompt field and preview
+        for i, example_btn in enumerate(example_buttons):
+            if i < len(example_prompts):
+                example_btn.click(
+                    fn=create_example_handler(example_prompts[i]['prompt']),
+                    outputs=[bg_prompt, selected_prompt_display]
+                )
+    return app
+if __name__ == "__main__":
+    app = create_simple_app()
+    app.launch(
+        debug=True,
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860
+    )

flux/__init__.py ADDED Viewed

File without changes

flux/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (134 Bytes). View file

flux/__pycache__/block.cpython-312.pyc ADDED Viewed

Binary file (13.3 kB). View file

flux/__pycache__/condition.cpython-312.pyc ADDED Viewed

Binary file (5.74 kB). View file

flux/__pycache__/generate.cpython-312.pyc ADDED Viewed

Binary file (12.8 kB). View file

flux/__pycache__/lora_controller.cpython-312.pyc ADDED Viewed

Binary file (4.22 kB). View file

flux/__pycache__/padding_orthogonalization.cpython-312.pyc ADDED Viewed

Binary file (9.47 kB). View file

flux/__pycache__/pipeline_tools.cpython-312.pyc ADDED Viewed

Binary file (3.43 kB). View file

flux/__pycache__/transformer.cpython-312.pyc ADDED Viewed

Binary file (7.27 kB). View file

flux/block.py ADDED Viewed

	@@ -0,0 +1,339 @@

+import torch
+from typing import List, Union, Optional, Dict, Any, Callable
+from diffusers.models.attention_processor import Attention, F
+from .lora_controller import enable_lora
+def attn_forward(
+    attn: Attention,
+    hidden_states: torch.FloatTensor,
+    encoder_hidden_states: torch.FloatTensor = None,
+    condition_latents: torch.FloatTensor = None,
+    attention_mask: Optional[torch.FloatTensor] = None,
+    image_rotary_emb: Optional[torch.Tensor] = None,
+    cond_rotary_emb: Optional[torch.Tensor] = None,
+    model_config: Optional[Dict[str, Any]] = {},
+) -> torch.FloatTensor:
+    batch_size, _, _ = (
+        hidden_states.shape
+        if encoder_hidden_states is None
+        else encoder_hidden_states.shape
+    )
+    with enable_lora(
+        (attn.to_q, attn.to_k, attn.to_v), model_config.get("latent_lora", False)
+    ):
+        # `sample` projections.
+        query = attn.to_q(hidden_states)
+        key = attn.to_k(hidden_states)
+        value = attn.to_v(hidden_states)
+    inner_dim = key.shape[-1]
+    head_dim = inner_dim // attn.heads
+    query = query.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+    key = key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+    value = value.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+    if attn.norm_q is not None:
+        query = attn.norm_q(query)
+    if attn.norm_k is not None:
+        key = attn.norm_k(key)
+    # the attention in FluxSingleTransformerBlock does not use `encoder_hidden_states`
+    if encoder_hidden_states is not None:
+        # `context` projections.
+        encoder_hidden_states_query_proj = attn.add_q_proj(encoder_hidden_states)
+        encoder_hidden_states_key_proj = attn.add_k_proj(encoder_hidden_states)
+        encoder_hidden_states_value_proj = attn.add_v_proj(encoder_hidden_states)
+        encoder_hidden_states_query_proj = encoder_hidden_states_query_proj.view(
+            batch_size, -1, attn.heads, head_dim
+        ).transpose(1, 2)
+        encoder_hidden_states_key_proj = encoder_hidden_states_key_proj.view(
+            batch_size, -1, attn.heads, head_dim
+        ).transpose(1, 2)
+        encoder_hidden_states_value_proj = encoder_hidden_states_value_proj.view(
+            batch_size, -1, attn.heads, head_dim
+        ).transpose(1, 2)
+        if attn.norm_added_q is not None:
+            encoder_hidden_states_query_proj = attn.norm_added_q(
+                encoder_hidden_states_query_proj
+            )
+        if attn.norm_added_k is not None:
+            encoder_hidden_states_key_proj = attn.norm_added_k(
+                encoder_hidden_states_key_proj
+            )
+        # attention
+        query = torch.cat([encoder_hidden_states_query_proj, query], dim=2)
+        key = torch.cat([encoder_hidden_states_key_proj, key], dim=2)
+        value = torch.cat([encoder_hidden_states_value_proj, value], dim=2)
+    if image_rotary_emb is not None:
+        from diffusers.models.embeddings import apply_rotary_emb
+        query = apply_rotary_emb(query, image_rotary_emb)
+        key = apply_rotary_emb(key, image_rotary_emb)
+    if condition_latents is not None:
+        cond_query = attn.to_q(condition_latents)
+        cond_key = attn.to_k(condition_latents)
+        cond_value = attn.to_v(condition_latents)
+        cond_query = cond_query.view(batch_size, -1, attn.heads, head_dim).transpose(
+            1, 2
+        )
+        cond_key = cond_key.view(batch_size, -1, attn.heads, head_dim).transpose(1, 2)
+        cond_value = cond_value.view(batch_size, -1, attn.heads, head_dim).transpose(
+            1, 2
+        )
+        if attn.norm_q is not None:
+            cond_query = attn.norm_q(cond_query)
+        if attn.norm_k is not None:
+            cond_key = attn.norm_k(cond_key)
+    if cond_rotary_emb is not None:
+        cond_query = apply_rotary_emb(cond_query, cond_rotary_emb)
+        cond_key = apply_rotary_emb(cond_key, cond_rotary_emb)
+    if condition_latents is not None:
+        query = torch.cat([query, cond_query], dim=2)
+        key = torch.cat([key, cond_key], dim=2)
+        value = torch.cat([value, cond_value], dim=2)
+    if not model_config.get("union_cond_attn", True):
+        # If we don't want to use the union condition attention, we need to mask the attention
+        # between the hidden states and the condition latents
+        attention_mask = torch.ones(
+            query.shape[2], key.shape[2], device=query.device, dtype=torch.bool
+        )
+        condition_n = cond_query.shape[2]
+        attention_mask[-condition_n:, :-condition_n] = False
+        attention_mask[:-condition_n, -condition_n:] = False
+    elif model_config.get("independent_condition", False):
+        attention_mask = torch.ones(
+            query.shape[2], key.shape[2], device=query.device, dtype=torch.bool
+        )
+        condition_n = cond_query.shape[2]
+        attention_mask[-condition_n:, :-condition_n] = False
+    if hasattr(attn, "c_factor"):
+        attention_mask = torch.zeros(
+            query.shape[2], key.shape[2], device=query.device, dtype=query.dtype
+        )
+        condition_n = cond_query.shape[2]
+        bias = torch.log(attn.c_factor[0])
+        attention_mask[-condition_n:, :-condition_n] = bias
+        attention_mask[:-condition_n, -condition_n:] = bias
+    hidden_states = F.scaled_dot_product_attention(
+        query, key, value, dropout_p=0.0, is_causal=False, attn_mask=attention_mask
+    )
+    hidden_states = hidden_states.transpose(1, 2).reshape(
+        batch_size, -1, attn.heads * head_dim
+    )
+    hidden_states = hidden_states.to(query.dtype)
+    if encoder_hidden_states is not None:
+        if condition_latents is not None:
+            encoder_hidden_states, hidden_states, condition_latents = (
+                hidden_states[:, : encoder_hidden_states.shape[1]],
+                hidden_states[
+                    :, encoder_hidden_states.shape[1] : -condition_latents.shape[1]
+                ],
+                hidden_states[:, -condition_latents.shape[1] :],
+            )
+        else:
+            encoder_hidden_states, hidden_states = (
+                hidden_states[:, : encoder_hidden_states.shape[1]],
+                hidden_states[:, encoder_hidden_states.shape[1] :],
+            )
+        with enable_lora((attn.to_out[0],), model_config.get("latent_lora", False)):
+            # linear proj
+            hidden_states = attn.to_out[0](hidden_states)
+            # dropout
+            hidden_states = attn.to_out[1](hidden_states)
+        encoder_hidden_states = attn.to_add_out(encoder_hidden_states)
+        if condition_latents is not None:
+            condition_latents = attn.to_out[0](condition_latents)
+            condition_latents = attn.to_out[1](condition_latents)
+        return (
+            (hidden_states, encoder_hidden_states, condition_latents)
+            if condition_latents is not None
+            else (hidden_states, encoder_hidden_states)
+        )
+    elif condition_latents is not None:
+        # if there are condition_latents, we need to separate the hidden_states and the condition_latents
+        hidden_states, condition_latents = (
+            hidden_states[:, : -condition_latents.shape[1]],
+            hidden_states[:, -condition_latents.shape[1] :],
+        )
+        return hidden_states, condition_latents
+    else:
+        return hidden_states
+def block_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    encoder_hidden_states: torch.FloatTensor,
+    condition_latents: torch.FloatTensor,
+    temb: torch.FloatTensor,
+    cond_temb: torch.FloatTensor,
+    cond_rotary_emb=None,
+    image_rotary_emb=None,
+    model_config: Optional[Dict[str, Any]] = {},
+):
+    use_cond = condition_latents is not None
+    with enable_lora((self.norm1.linear,), model_config.get("latent_lora", False)):
+        norm_hidden_states, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.norm1(
+            hidden_states, emb=temb
+        )
+    norm_encoder_hidden_states, c_gate_msa, c_shift_mlp, c_scale_mlp, c_gate_mlp = (
+        self.norm1_context(encoder_hidden_states, emb=temb)
+    )
+    if use_cond:
+        (
+            norm_condition_latents,
+            cond_gate_msa,
+            cond_shift_mlp,
+            cond_scale_mlp,
+            cond_gate_mlp,
+        ) = self.norm1(condition_latents, emb=cond_temb)
+    # Attention.
+    result = attn_forward(
+        self.attn,
+        model_config=model_config,
+        hidden_states=norm_hidden_states,
+        encoder_hidden_states=norm_encoder_hidden_states,
+        condition_latents=norm_condition_latents if use_cond else None,
+        image_rotary_emb=image_rotary_emb,
+        cond_rotary_emb=cond_rotary_emb if use_cond else None,
+    )
+    attn_output, context_attn_output = result[:2]
+    cond_attn_output = result[2] if use_cond else None
+    # Process attention outputs for the `hidden_states`.
+    # 1. hidden_states
+    attn_output = gate_msa.unsqueeze(1) * attn_output
+    hidden_states = hidden_states + attn_output
+    # 2. encoder_hidden_states
+    context_attn_output = c_gate_msa.unsqueeze(1) * context_attn_output
+    encoder_hidden_states = encoder_hidden_states + context_attn_output
+    # 3. condition_latents
+    if use_cond:
+        cond_attn_output = cond_gate_msa.unsqueeze(1) * cond_attn_output
+        condition_latents = condition_latents + cond_attn_output
+        if model_config.get("add_cond_attn", False):
+            hidden_states += cond_attn_output
+    # LayerNorm + MLP.
+    # 1. hidden_states
+    norm_hidden_states = self.norm2(hidden_states)
+    norm_hidden_states = (
+        norm_hidden_states * (1 + scale_mlp[:, None]) + shift_mlp[:, None]
+    )
+    # 2. encoder_hidden_states
+    norm_encoder_hidden_states = self.norm2_context(encoder_hidden_states)
+    norm_encoder_hidden_states = (
+        norm_encoder_hidden_states * (1 + c_scale_mlp[:, None]) + c_shift_mlp[:, None]
+    )
+    # 3. condition_latents
+    if use_cond:
+        norm_condition_latents = self.norm2(condition_latents)
+        norm_condition_latents = (
+            norm_condition_latents * (1 + cond_scale_mlp[:, None])
+            + cond_shift_mlp[:, None]
+        )
+    # Feed-forward.
+    with enable_lora((self.ff.net[2],), model_config.get("latent_lora", False)):
+        # 1. hidden_states
+        ff_output = self.ff(norm_hidden_states)
+        ff_output = gate_mlp.unsqueeze(1) * ff_output
+    # 2. encoder_hidden_states
+    context_ff_output = self.ff_context(norm_encoder_hidden_states)
+    context_ff_output = c_gate_mlp.unsqueeze(1) * context_ff_output
+    # 3. condition_latents
+    if use_cond:
+        cond_ff_output = self.ff(norm_condition_latents)
+        cond_ff_output = cond_gate_mlp.unsqueeze(1) * cond_ff_output
+    # Process feed-forward outputs.
+    hidden_states = hidden_states + ff_output
+    encoder_hidden_states = encoder_hidden_states + context_ff_output
+    if use_cond:
+        condition_latents = condition_latents + cond_ff_output
+    # Clip to avoid overflow.
+    if encoder_hidden_states.dtype == torch.float16:
+        encoder_hidden_states = encoder_hidden_states.clip(-65504, 65504)
+    return encoder_hidden_states, hidden_states, condition_latents if use_cond else None
+def single_block_forward(
+    self,
+    hidden_states: torch.FloatTensor,
+    temb: torch.FloatTensor,
+    image_rotary_emb=None,
+    condition_latents: torch.FloatTensor = None,
+    cond_temb: torch.FloatTensor = None,
+    cond_rotary_emb=None,
+    model_config: Optional[Dict[str, Any]] = {},
+):
+    using_cond = condition_latents is not None
+    residual = hidden_states
+    with enable_lora(
+        (
+            self.norm.linear,
+            self.proj_mlp,
+        ),
+        model_config.get("latent_lora", False),
+    ):
+        norm_hidden_states, gate = self.norm(hidden_states, emb=temb)
+        mlp_hidden_states = self.act_mlp(self.proj_mlp(norm_hidden_states))
+    if using_cond:
+        residual_cond = condition_latents
+        norm_condition_latents, cond_gate = self.norm(condition_latents, emb=cond_temb)
+        mlp_cond_hidden_states = self.act_mlp(self.proj_mlp(norm_condition_latents))
+    attn_output = attn_forward(
+        self.attn,
+        model_config=model_config,
+        hidden_states=norm_hidden_states,
+        image_rotary_emb=image_rotary_emb,
+        **(
+            {
+                "condition_latents": norm_condition_latents,
+                "cond_rotary_emb": cond_rotary_emb if using_cond else None,
+            }
+            if using_cond
+            else {}
+        ),
+    )
+    if using_cond:
+        attn_output, cond_attn_output = attn_output
+    with enable_lora((self.proj_out,), model_config.get("latent_lora", False)):
+        hidden_states = torch.cat([attn_output, mlp_hidden_states], dim=2)
+        gate = gate.unsqueeze(1)
+        hidden_states = gate * self.proj_out(hidden_states)
+        hidden_states = residual + hidden_states
+    if using_cond:
+        condition_latents = torch.cat([cond_attn_output, mlp_cond_hidden_states], dim=2)
+        cond_gate = cond_gate.unsqueeze(1)
+        condition_latents = cond_gate * self.proj_out(condition_latents)
+        condition_latents = residual_cond + condition_latents
+    if hidden_states.dtype == torch.float16:
+        hidden_states = hidden_states.clip(-65504, 65504)
+    return hidden_states if not using_cond else (hidden_states, condition_latents)

flux/condition.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import torch
+from typing import Optional, Union, List, Tuple
+from diffusers.pipelines import FluxPipeline
+from PIL import Image, ImageFilter
+import numpy as np
+import cv2
+from .pipeline_tools import encode_images
+condition_dict = {
+    "depth": 0,
+    "canny": 1,
+    "subject": 4,
+    "coloring": 6,
+    "deblurring": 7,
+    "depth_pred": 8,
+    "fill": 9,
+    "sr": 10,
+    "cartoon": 11,
+}
+class Condition(object):
+    def __init__(
+        self,
+        condition_type: str,
+        raw_img: Union[Image.Image, torch.Tensor] = None,
+        condition: Union[Image.Image, torch.Tensor] = None,
+        mask=None,
+        position_delta=None,
+        position_scale=1.0,
+    ) -> None:
+        self.condition_type = condition_type
+        assert raw_img is not None or condition is not None
+        if raw_img is not None:
+            self.condition = self.get_condition(condition_type, raw_img)
+        else:
+            self.condition = condition
+        self.position_delta = position_delta
+        self.position_scale = position_scale
+        # TODO: Add mask support
+        assert mask is None, "Mask not supported yet"
+    def get_condition(
+        self, condition_type: str, raw_img: Union[Image.Image, torch.Tensor]
+    ) -> Union[Image.Image, torch.Tensor]:
+        """
+        Returns the condition image.
+        """
+        if condition_type == "depth":
+            from transformers import pipeline
+            depth_pipe = pipeline(
+                task="depth-estimation",
+                model="LiheYoung/depth-anything-small-hf",
+                device="cuda",
+            )
+            source_image = raw_img.convert("RGB")
+            condition_img = depth_pipe(source_image)["depth"].convert("RGB")
+            return condition_img
+        elif condition_type == "canny":
+            img = np.array(raw_img)
+            edges = cv2.Canny(img, 100, 200)
+            edges = Image.fromarray(edges).convert("RGB")
+            return edges
+        elif condition_type == "subject":
+            return raw_img
+        elif condition_type == "coloring":
+            return raw_img.convert("L").convert("RGB")
+        elif condition_type == "deblurring":
+            condition_image = (
+                raw_img.convert("RGB")
+                .filter(ImageFilter.GaussianBlur(10))
+                .convert("RGB")
+            )
+            return condition_image
+        elif condition_type == "fill":
+            return raw_img.convert("RGB")
+        elif condition_type == "cartoon":
+            return raw_img.convert("RGB")
+        return self.condition
+    @property
+    def type_id(self) -> int:
+        """
+        Returns the type id of the condition.
+        """
+        return condition_dict[self.condition_type]
+    @classmethod
+    def get_type_id(cls, condition_type: str) -> int:
+        """
+        Returns the type id of the condition.
+        """
+        return condition_dict[condition_type]
+    def encode(
+        self, pipe: FluxPipeline, empty: bool = False
+    ) -> Tuple[torch.Tensor, torch.Tensor, int]:
+        """
+        Encodes the condition into tokens, ids and type_id.
+        """
+        if self.condition_type in [
+            "depth",
+            "canny",
+            "subject",
+            "coloring",
+            "deblurring",
+            "depth_pred",
+            "fill",
+            "sr",
+            "cartoon",
+        ]:
+            if empty:
+                # make the condition black
+                e_condition = Image.new("RGB", self.condition.size, (0, 0, 0))
+                e_condition = e_condition.convert("RGB")
+                tokens, ids = encode_images(pipe, e_condition)
+            else:
+                tokens, ids = encode_images(pipe, self.condition)
+            tokens, ids = encode_images(pipe, self.condition)
+        else:
+            raise NotImplementedError(
+                f"Condition type {self.condition_type} not implemented"
+            )
+        if self.position_delta is None and self.condition_type == "subject":
+            self.position_delta = [0, -self.condition.size[0] // 16]
+        if self.position_delta is not None:
+            ids[:, 1] += self.position_delta[0]
+            ids[:, 2] += self.position_delta[1]
+        if self.position_scale != 1.0:
+            scale_bias = (self.position_scale - 1.0) / 2
+            ids[:, 1] *= self.position_scale
+            ids[:, 2] *= self.position_scale
+            ids[:, 1] += scale_bias
+            ids[:, 2] += scale_bias
+        type_id = torch.ones_like(ids[:, :1]) * self.type_id
+        return tokens, ids, type_id

flux/generate.py ADDED Viewed

	@@ -0,0 +1,366 @@

+import os
+from typing import Any, Callable, Dict, List, Optional, Union
+import torch
+import yaml
+from diffusers.pipelines import FluxPipeline
+from diffusers.pipelines.flux.pipeline_flux import (
+    FluxPipelineOutput,
+    calculate_shift,
+    np,
+    retrieve_timesteps,
+)
+from .condition import Condition
+from .transformer import tranformer_forward
+from .padding_orthogonalization import apply_padding_token_orthogonalization
+def get_config(config_path: str = None):
+    config_path = config_path or os.environ.get("XFL_CONFIG")
+    if not config_path:
+        return {}
+    with open(config_path, "r") as f:
+        config = yaml.safe_load(f)
+    return config
+def prepare_params(
+    prompt: Union[str, List[str]] = None,
+    prompt_2: Optional[Union[str, List[str]]] = None,
+    height: Optional[int] = 512,
+    width: Optional[int] = 512,
+    num_inference_steps: int = 28,
+    timesteps: List[int] = None,
+    guidance_scale: float = 3.5,
+    num_images_per_prompt: Optional[int] = 1,
+    generator: Optional[Union[torch.Generator, List[torch.Generator]]] = None,
+    latents: Optional[torch.FloatTensor] = None,
+    prompt_embeds: Optional[torch.FloatTensor] = None,
+    pooled_prompt_embeds: Optional[torch.FloatTensor] = None,
+    output_type: Optional[str] = "pil",
+    return_dict: bool = True,
+    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+    callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
+    callback_on_step_end_tensor_inputs: List[str] = ["latents"],
+    max_sequence_length: int = 512,
+    **kwargs: dict,
+):
+    return (
+        prompt,
+        prompt_2,
+        height,
+        width,
+        num_inference_steps,
+        timesteps,
+        guidance_scale,
+        num_images_per_prompt,
+        generator,
+        latents,
+        prompt_embeds,
+        pooled_prompt_embeds,
+        output_type,
+        return_dict,
+        joint_attention_kwargs,
+        callback_on_step_end,
+        callback_on_step_end_tensor_inputs,
+        max_sequence_length,
+    )
+def seed_everything(seed: int = 42):
+    torch.backends.cudnn.deterministic = True
+    torch.manual_seed(seed)
+    np.random.seed(seed)
+@torch.no_grad()
+def generate(
+    pipeline: FluxPipeline,
+    conditions: List[Condition] = None,
+    config_path: str = None,
+    model_config: Optional[Dict[str, Any]] = {},
+    condition_scale: float = 1.0,
+    default_lora: bool = False,
+    default_lora_path: str = None,
+    image_guidance_scale: float = 1.0,
+    **params: dict,
+):
+    """
+    Enhanced Flux text-to-image generation with padding token orthogonalization.
+    This function implements the padding token orthogonalization method from the poster
+    "Enhanced Text-to-Image Generation via Padding Token Orthogonalization" to improve
+    text-image alignment quality.
+    Args:
+        pipeline: FluxPipeline instance
+        conditions: List of condition objects
+        config_path: Path to configuration file
+        model_config: Model configuration dictionary. Supports:
+            - padding_orthogonalization_enabled (bool): Enable/disable orthogonalization (default: True)
+            - preserve_norm (bool): Preserve original embedding norms (default: True)
+            - orthogonalize_all_tokens (bool): Orthogonalize all tokens vs only padding (default: False)
+        condition_scale: Scale factor for conditions
+        default_lora: Whether to use default LoRA
+        default_lora_path: Path to default LoRA weights
+        image_guidance_scale: Scale for image guidance
+        **params: Additional generation parameters
+    Returns:
+        Generated images with enhanced text-image alignment
+    """
+    model_config = model_config or get_config(config_path).get("model", {})
+    if condition_scale != 1:
+        for name, module in pipeline.transformer.named_modules():
+            if not name.endswith(".attn"):
+                continue
+            module.c_factor = torch.ones(1, 1) * condition_scale
+    if default_lora and default_lora_path:
+        pipeline.load_lora_weights(default_lora_path)
+    self = pipeline
+    (
+        prompt,
+        prompt_2,
+        height,
+        width,
+        num_inference_steps,
+        timesteps,
+        guidance_scale,
+        num_images_per_prompt,
+        generator,
+        latents,
+        prompt_embeds,
+        pooled_prompt_embeds,
+        output_type,
+        return_dict,
+        joint_attention_kwargs,
+        callback_on_step_end,
+        callback_on_step_end_tensor_inputs,
+        max_sequence_length,
+    ) = prepare_params(**params)
+    height = height or self.default_sample_size * self.vae_scale_factor
+    width = width or self.default_sample_size * self.vae_scale_factor
+    # 1. Check inputs. Raise error if not correct
+    self.check_inputs(
+        prompt,
+        prompt_2,
+        height,
+        width,
+        prompt_embeds=prompt_embeds,
+        pooled_prompt_embeds=pooled_prompt_embeds,
+        callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
+        max_sequence_length=max_sequence_length,
+    )
+    self._guidance_scale = guidance_scale
+    self._joint_attention_kwargs = joint_attention_kwargs
+    self._interrupt = False
+    # 2. Define call parameters
+    if prompt is not None and isinstance(prompt, str):
+        batch_size = 1
+    elif prompt is not None and isinstance(prompt, list):
+        batch_size = len(prompt)
+    else:
+        batch_size = prompt_embeds.shape[0]
+    device = self._execution_device
+    lora_scale = (
+        self.joint_attention_kwargs.get("scale", None)
+        if self.joint_attention_kwargs is not None
+        else None
+    )
+    (
+        prompt_embeds,
+        pooled_prompt_embeds,
+        text_ids,
+    ) = self.encode_prompt(
+        prompt=prompt,
+        prompt_2=prompt_2,
+        prompt_embeds=prompt_embeds,
+        pooled_prompt_embeds=pooled_prompt_embeds,
+        device=device,
+        num_images_per_prompt=num_images_per_prompt,
+        max_sequence_length=max_sequence_length,
+        lora_scale=lora_scale,
+    )
+    # Apply Padding Token Orthogonalization for enhanced text-image alignment
+    if model_config.get('padding_orthogonalization_enabled', True):
+        prompt_embeds = apply_padding_token_orthogonalization(
+            prompt_embeds=prompt_embeds,
+            text_attention_mask=None,  # Will use heuristic if not available
+            config=model_config,
+        )
+    # 4. Prepare latent variables
+    num_channels_latents = self.transformer.config.in_channels // 4
+    latents, latent_image_ids = self.prepare_latents(
+        batch_size * num_images_per_prompt,
+        num_channels_latents,
+        height,
+        width,
+        prompt_embeds.dtype,
+        device,
+        generator,
+        latents,
+    )
+    # 4.1. Prepare conditions
+    condition_latents, condition_ids, condition_type_ids = ([] for _ in range(3))
+    use_condition = conditions is not None or []
+    if use_condition:
+        assert len(conditions) <= 1, "Only one condition is supported for now."
+        if not default_lora:
+            pipeline.set_adapters(conditions[0].condition_type)
+        for condition in conditions:
+            print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
+            print(f"Condition: {condition.condition_type}")
+            tokens, ids, type_id = condition.encode(self)
+            condition_latents.append(tokens)  # [batch_size, token_n, token_dim]
+            condition_ids.append(ids)  # [token_n, id_dim(3)]
+            condition_type_ids.append(type_id)  # [token_n, 1]
+        condition_latents = torch.cat(condition_latents, dim=1)
+        condition_ids = torch.cat(condition_ids, dim=0)
+        condition_type_ids = torch.cat(condition_type_ids, dim=0)
+    # 5. Prepare timesteps
+    sigmas = np.linspace(1.0, 1 / num_inference_steps, num_inference_steps)
+    image_seq_len = latents.shape[1]
+    mu = calculate_shift(
+        image_seq_len,
+        self.scheduler.config.base_image_seq_len,
+        self.scheduler.config.max_image_seq_len,
+        self.scheduler.config.base_shift,
+        self.scheduler.config.max_shift,
+    )
+    timesteps, num_inference_steps = retrieve_timesteps(
+        self.scheduler,
+        num_inference_steps,
+        device,
+        timesteps,
+        sigmas,
+        mu=mu,
+    )
+    num_warmup_steps = max(
+        len(timesteps) - num_inference_steps * self.scheduler.order, 0
+    )
+    self._num_timesteps = len(timesteps)
+    # 6. Denoising loop
+    with self.progress_bar(total=num_inference_steps) as progress_bar:
+        for i, t in enumerate(timesteps):
+            if self.interrupt:
+                continue
+            # broadcast to batch dimension in a way that's compatible with ONNX/Core ML
+            timestep = t.expand(latents.shape[0]).to(latents.dtype)
+            # handle guidance
+            if self.transformer.config.guidance_embeds:
+                guidance = torch.tensor([guidance_scale], device=device)
+                guidance = guidance.expand(latents.shape[0])
+            else:
+                guidance = None
+            noise_pred = tranformer_forward(
+                self.transformer,
+                model_config=model_config,
+                # Inputs of the condition (new feature)
+                condition_latents=condition_latents if use_condition else None,
+                condition_ids=condition_ids if use_condition else None,
+                condition_type_ids=condition_type_ids if use_condition else None,
+                # Inputs to the original transformer
+                hidden_states=latents,
+                # YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transforme rmodel (we should not keep it but I want to keep the inputs same for the model for testing)
+                timestep=timestep / 1000,
+                guidance=guidance,
+                pooled_projections=pooled_prompt_embeds,
+                encoder_hidden_states=prompt_embeds,
+                txt_ids=text_ids,
+                img_ids=latent_image_ids,
+                joint_attention_kwargs=self.joint_attention_kwargs,
+                return_dict=False,
+            )[0]
+            if image_guidance_scale != 1.0:
+                uncondition_latents = condition.encode(self, empty=True)[0]
+                # 修复：在 guidance 为 None 的情况下，创建适当的替代张量
+                # 创建一个形状为 [latents.shape[0]] 的全 1 张量
+                guidance_replacement = torch.ones(latents.shape[0], device=device)
+                unc_pred = tranformer_forward(
+                    self.transformer,
+                    model_config=model_config,
+                    # Inputs of the condition (new feature)
+                    condition_latents=uncondition_latents if use_condition else None,
+                    condition_ids=condition_ids if use_condition else None,
+                    condition_type_ids=condition_type_ids if use_condition else None,
+                    # Inputs to the original transformer
+                    hidden_states=latents,
+                    # YiYi notes: divide it by 1000 for now because we scale it by 1000 in the transforme rmodel (we should not keep it but I want to keep the inputs same for the model for testing)
+                    timestep=timestep / 1000,
+                    # guidance=torch.ones_like(guidance),
+                    guidance=guidance_replacement,
+                    pooled_projections=pooled_prompt_embeds,
+                    encoder_hidden_states=prompt_embeds,
+                    txt_ids=text_ids,
+                    img_ids=latent_image_ids,
+                    # joint_attention_kwargs=self.joint_attention_kwargs,
+                    joint_attention_kwargs=None,
+                    return_dict=False,
+                )[0]
+                noise_pred = unc_pred + image_guidance_scale * (noise_pred - unc_pred)
+            # compute the previous noisy sample x_t -> x_t-1
+            latents_dtype = latents.dtype
+            latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
+            if latents.dtype != latents_dtype:
+                if torch.backends.mps.is_available():
+                    # some platforms (eg. apple mps) misbehave due to a pytorch bug: https://github.com/pytorch/pytorch/pull/99272
+                    latents = latents.to(latents_dtype)
+            if callback_on_step_end is not None:
+                callback_kwargs = {}
+                for k in callback_on_step_end_tensor_inputs:
+                    callback_kwargs[k] = locals()[k]
+                callback_outputs = callback_on_step_end(self, i, t, callback_kwargs)
+                latents = callback_outputs.pop("latents", latents)
+                prompt_embeds = callback_outputs.pop("prompt_embeds", prompt_embeds)
+            # call the callback, if provided
+            if i == len(timesteps) - 1 or (
+                (i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0
+            ):
+                progress_bar.update()
+    if output_type == "latent":
+        image = latents
+    else:
+        latents = self._unpack_latents(latents, height, width, self.vae_scale_factor)
+        latents = (
+            latents / self.vae.config.scaling_factor
+        ) + self.vae.config.shift_factor
+        image = self.vae.decode(latents, return_dict=False)[0]
+        image = self.image_processor.postprocess(image, output_type=output_type)
+    # Offload all models
+    self.maybe_free_model_hooks()
+    if condition_scale != 1:
+        for name, module in pipeline.transformer.named_modules():
+            if not name.endswith(".attn"):
+                continue
+            del module.c_factor
+    if not return_dict:
+        return (image,)
+    return FluxPipelineOutput(images=image)

flux/lora_controller.py ADDED Viewed

	@@ -0,0 +1,77 @@

+from peft.tuners.tuners_utils import BaseTunerLayer
+from typing import List, Any, Optional, Type
+from .condition import condition_dict
+class enable_lora:
+    def __init__(self, lora_modules: List[BaseTunerLayer], activated: bool) -> None:
+        self.activated: bool = activated
+        if activated:
+            return
+        self.lora_modules: List[BaseTunerLayer] = [
+            each for each in lora_modules if isinstance(each, BaseTunerLayer)
+        ]
+        self.scales = [
+            {
+                active_adapter: lora_module.scaling[active_adapter]
+                for active_adapter in lora_module.active_adapters
+            }
+            for lora_module in self.lora_modules
+        ]
+    def __enter__(self) -> None:
+        if self.activated:
+            return
+        for lora_module in self.lora_modules:
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            for active_adapter in lora_module.active_adapters:
+                if active_adapter in condition_dict.keys():
+                    lora_module.scaling[active_adapter] = 0.0
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[Any],
+    ) -> None:
+        if self.activated:
+            return
+        for i, lora_module in enumerate(self.lora_modules):
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            for active_adapter in lora_module.active_adapters:
+                lora_module.scaling[active_adapter] = self.scales[i][active_adapter]
+class set_lora_scale:
+    def __init__(self, lora_modules: List[BaseTunerLayer], scale: float) -> None:
+        self.lora_modules: List[BaseTunerLayer] = [
+            each for each in lora_modules if isinstance(each, BaseTunerLayer)
+        ]
+        self.scales = [
+            {
+                active_adapter: lora_module.scaling[active_adapter]
+                for active_adapter in lora_module.active_adapters
+            }
+            for lora_module in self.lora_modules
+        ]
+        self.scale = scale
+    def __enter__(self) -> None:
+        for lora_module in self.lora_modules:
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            lora_module.scale_layer(self.scale)
+    def __exit__(
+        self,
+        exc_type: Optional[Type[BaseException]],
+        exc_val: Optional[BaseException],
+        exc_tb: Optional[Any],
+    ) -> None:
+        for i, lora_module in enumerate(self.lora_modules):
+            if not isinstance(lora_module, BaseTunerLayer):
+                continue
+            for active_adapter in lora_module.active_adapters:
+                lora_module.scaling[active_adapter] = self.scales[i][active_adapter]

flux/padding_orthogonalization.py ADDED Viewed

	@@ -0,0 +1,252 @@

+"""
+Enhanced Text-to-Image Generation via Padding Token Orthogonalization
+This module implements the padding token orthogonalization method described in the poster
+"Enhanced Text-to-Image Generation via Padding Token Orthogonalization" by Jiafeng Mao,
+Qianru Qiu, Xueting Wang from CyberAgent AI Lab.
+The core idea is to use padding tokens as registers that collect, store, and redistribute
+features across layers via attention pathways through Gram-Schmidt orthogonalization.
+"""
+import torch
+import torch.nn as nn
+from typing import Optional, Tuple
+import logging
+logger = logging.getLogger(__name__)
+def orthogonalize_rows(X: torch.Tensor) -> torch.Tensor:
+    """
+    Orthogonalize rows of matrix X using QR decomposition.
+    This is the core function from the poster: Q, _ = torch.linalg.qr(X.T) return Q.T
+    Args:
+        X: Input tensor of shape (..., n_rows, n_cols)
+    Returns:
+        Orthogonalized tensor of the same shape
+    """
+    # Save original dtype and convert to float32 for QR decomposition
+    original_dtype = X.dtype
+    original_shape = X.shape
+    # Convert to float32 if needed (QR doesn't support bfloat16)
+    if X.dtype == torch.bfloat16:
+        X = X.to(torch.float32)
+    # Handle batch dimensions by flattening
+    if X.dim() > 2:
+        # Reshape to (batch_size, n_rows, n_cols)
+        X_flat = X.view(-1, original_shape[-2], original_shape[-1])
+        results = []
+        for i in range(X_flat.shape[0]):
+            # Apply QR decomposition: Q, _ = torch.linalg.qr(X.T)
+            Q, _ = torch.linalg.qr(X_flat[i].T)
+            # Return Q.T to get orthogonalized rows
+            results.append(Q.T)
+        result = torch.stack(results, dim=0)
+        # Reshape back to original shape
+        result = result.view(original_shape)
+    else:
+        # Simple 2D case
+        Q, _ = torch.linalg.qr(X.T)
+        result = Q.T
+    # Convert back to original dtype
+    if original_dtype == torch.bfloat16:
+        result = result.to(original_dtype)
+    return result
+class PaddingTokenOrthogonalizer(nn.Module):
+    """
+    A module that applies padding token orthogonalization to text embeddings.
+    Based on the poster's method, this enhances text-image alignment by:
+    1. Identifying padding tokens in the sequence
+    2. Orthogonalizing their representations using QR decomposition
+    3. Maintaining feature diversity and preventing biased attention
+    """
+    def __init__(
+        self,
+        enabled: bool = True,
+        preserve_norm: bool = True,
+        orthogonalize_all: bool = False,
+    ):
+        """
+        Args:
+            enabled: Whether to apply orthogonalization
+            preserve_norm: Whether to preserve the original norm of tokens
+            orthogonalize_all: If True, orthogonalize all tokens; if False, only padding tokens
+        """
+        super().__init__()
+        self.enabled = enabled
+        self.preserve_norm = preserve_norm
+        self.orthogonalize_all = orthogonalize_all
+    def identify_padding_tokens(
+        self,
+        embeddings: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        pad_token_id: Optional[int] = None,
+        input_ids: Optional[torch.Tensor] = None
+    ) -> torch.Tensor:
+        """
+        Identify padding token positions in the sequence.
+        Args:
+            embeddings: Token embeddings [batch, seq_len, hidden_size]
+            attention_mask: Attention mask where 0 indicates padding
+            pad_token_id: ID of the padding token
+            input_ids: Input token IDs
+        Returns:
+            Boolean mask indicating padding positions [batch, seq_len]
+        """
+        batch_size, seq_len = embeddings.shape[:2]
+        if attention_mask is not None:
+            # Attention mask: 1 for real tokens, 0 for padding
+            return ~attention_mask.bool()
+        elif pad_token_id is not None and input_ids is not None:
+            return input_ids == pad_token_id
+        else:
+            # Fallback: assume last 25% of sequence are padding tokens
+            # This is a heuristic based on common practice
+            padding_start = int(seq_len * 0.75)
+            mask = torch.zeros(batch_size, seq_len, dtype=torch.bool, device=embeddings.device)
+            mask[:, padding_start:] = True
+            return mask
+    def forward(
+        self,
+        embeddings: torch.Tensor,
+        attention_mask: Optional[torch.Tensor] = None,
+        pad_token_id: Optional[int] = None,
+        input_ids: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        """
+        Apply padding token orthogonalization.
+        Args:
+            embeddings: Token embeddings [batch, seq_len, hidden_size]
+            attention_mask: Attention mask where 1 indicates real tokens
+            pad_token_id: ID of the padding token
+            input_ids: Input token IDs
+        Returns:
+            Enhanced embeddings with orthogonalized padding tokens
+        """
+        if not self.enabled:
+            return embeddings
+        # Store original norms if we need to preserve them
+        if self.preserve_norm:
+            original_norms = torch.norm(embeddings, dim=-1, keepdim=True)
+        if self.orthogonalize_all:
+            # Orthogonalize all tokens in the sequence
+            enhanced_embeddings = orthogonalize_rows(embeddings)
+        else:
+            # Only orthogonalize padding tokens
+            padding_mask = self.identify_padding_tokens(
+                embeddings, attention_mask, pad_token_id, input_ids
+            )
+            enhanced_embeddings = embeddings.clone()
+            # Process each sample in the batch
+            for batch_idx in range(embeddings.shape[0]):
+                padding_indices = torch.where(padding_mask[batch_idx])[0]
+                if len(padding_indices) > 1:  # Need at least 2 tokens to orthogonalize
+                    # Extract padding token embeddings
+                    padding_embeddings = embeddings[batch_idx, padding_indices]
+                    # Apply orthogonalization
+                    orthogonalized = orthogonalize_rows(padding_embeddings)
+                    # Put back orthogonalized embeddings
+                    enhanced_embeddings[batch_idx, padding_indices] = orthogonalized
+        # Restore original norms if requested
+        if self.preserve_norm:
+            current_norms = torch.norm(enhanced_embeddings, dim=-1, keepdim=True)
+            enhanced_embeddings = enhanced_embeddings * (original_norms / (current_norms + 1e-8))
+        return enhanced_embeddings
+def apply_padding_token_orthogonalization(
+    prompt_embeds: torch.Tensor,
+    text_attention_mask: Optional[torch.Tensor] = None,
+    config: Optional[dict] = None,
+) -> torch.Tensor:
+    """
+    Convenience function to apply padding token orthogonalization to prompt embeddings.
+    Args:
+        prompt_embeds: Text prompt embeddings [batch, seq_len, hidden_size]
+        text_attention_mask: Attention mask for text tokens
+        config: Configuration dictionary with orthogonalization settings
+    Returns:
+        Enhanced prompt embeddings
+    """
+    if config is None:
+        config = {}
+    orthogonalizer = PaddingTokenOrthogonalizer(
+        enabled=config.get('padding_orthogonalization_enabled', True),
+        preserve_norm=config.get('preserve_norm', True),
+        orthogonalize_all=config.get('orthogonalize_all_tokens', False),
+    )
+    return orthogonalizer(
+        embeddings=prompt_embeds,
+        attention_mask=text_attention_mask,
+    )
+# Gram-Schmidt orthogonalization alternative implementation
+def gram_schmidt_orthogonalization(vectors: torch.Tensor, eps: float = 1e-8) -> torch.Tensor:
+    """
+    Alternative implementation using explicit Gram-Schmidt process.
+    This provides more control but is generally slower than QR decomposition.
+    Args:
+        vectors: Input vectors to orthogonalize [n_vectors, dim]
+        eps: Small epsilon for numerical stability
+    Returns:
+        Orthogonalized vectors
+    """
+    n_vectors = vectors.shape[0]
+    orthogonal_vectors = torch.zeros_like(vectors)
+    for i in range(n_vectors):
+        vector = vectors[i].clone()
+        # Subtract projections onto previous orthogonal vectors
+        for j in range(i):
+            projection = torch.dot(vector, orthogonal_vectors[j]) / (
+                torch.dot(orthogonal_vectors[j], orthogonal_vectors[j]) + eps
+            )
+            vector = vector - projection * orthogonal_vectors[j]
+        # Normalize
+        norm = torch.norm(vector)
+        if norm > eps:
+            orthogonal_vectors[i] = vector / norm
+        else:
+            # Handle zero vector case
+            orthogonal_vectors[i] = vector
+    return orthogonal_vectors

flux/pipeline_tools.py ADDED Viewed

	@@ -0,0 +1,80 @@

+from diffusers.pipelines import FluxPipeline
+from diffusers.utils import logging
+from diffusers.pipelines.flux.pipeline_flux import logger
+from torch import Tensor
+from typing import Optional, Dict, Any
+from .padding_orthogonalization import apply_padding_token_orthogonalization
+def encode_images(pipeline: FluxPipeline, images: Tensor):
+    images = pipeline.image_processor.preprocess(images)
+    images = images.to(pipeline.device).to(pipeline.dtype)
+    images = pipeline.vae.encode(images).latent_dist.sample()
+    images = (
+        images - pipeline.vae.config.shift_factor
+    ) * pipeline.vae.config.scaling_factor
+    images_tokens = pipeline._pack_latents(images, *images.shape)
+    images_ids = pipeline._prepare_latent_image_ids(
+        images.shape[0],
+        images.shape[2],
+        images.shape[3],
+        pipeline.device,
+        pipeline.dtype,
+    )
+    if images_tokens.shape[1] != images_ids.shape[0]:
+        images_ids = pipeline._prepare_latent_image_ids(
+            images.shape[0],
+            images.shape[2] // 2,
+            images.shape[3] // 2,
+            pipeline.device,
+            pipeline.dtype,
+        )
+    return images_tokens, images_ids
+def prepare_text_input(
+    pipeline: FluxPipeline,
+    prompts,
+    max_sequence_length=512,
+    model_config: Optional[Dict[str, Any]] = None
+):
+    """
+    Prepare text input with optional padding token orthogonalization.
+    Args:
+        pipeline: FluxPipeline instance
+        prompts: Text prompts to encode
+        max_sequence_length: Maximum sequence length
+        model_config: Optional configuration for orthogonalization
+    Returns:
+        Tuple of (prompt_embeds, pooled_prompt_embeds, text_ids)
+    """
+    # Turn off warnings (CLIP overflow)
+    logger.setLevel(logging.ERROR)
+    (
+        prompt_embeds,
+        pooled_prompt_embeds,
+        text_ids,
+    ) = pipeline.encode_prompt(
+        prompt=prompts,
+        prompt_2=None,
+        prompt_embeds=None,
+        pooled_prompt_embeds=None,
+        device=pipeline.device,
+        num_images_per_prompt=1,
+        max_sequence_length=max_sequence_length,
+        lora_scale=None,
+    )
+    # Apply padding token orthogonalization if configured
+    if model_config and model_config.get('padding_orthogonalization_enabled', False):
+        prompt_embeds = apply_padding_token_orthogonalization(
+            prompt_embeds=prompt_embeds,
+            text_attention_mask=None,
+            config=model_config,
+        )
+    # Turn on warnings
+    logger.setLevel(logging.WARNING)
+    return prompt_embeds, pooled_prompt_embeds, text_ids

flux/transformer.py ADDED Viewed

	@@ -0,0 +1,252 @@

+import torch
+from diffusers.pipelines import FluxPipeline
+from typing import List, Union, Optional, Dict, Any, Callable
+from .block import block_forward, single_block_forward
+from .lora_controller import enable_lora
+from accelerate.utils import is_torch_version
+from diffusers.models.transformers.transformer_flux import (
+    FluxTransformer2DModel,
+    Transformer2DModelOutput,
+    USE_PEFT_BACKEND,
+    scale_lora_layers,
+    unscale_lora_layers,
+    logger,
+)
+import numpy as np
+def prepare_params(
+    hidden_states: torch.Tensor,
+    encoder_hidden_states: torch.Tensor = None,
+    pooled_projections: torch.Tensor = None,
+    timestep: torch.LongTensor = None,
+    img_ids: torch.Tensor = None,
+    txt_ids: torch.Tensor = None,
+    guidance: torch.Tensor = None,
+    joint_attention_kwargs: Optional[Dict[str, Any]] = None,
+    controlnet_block_samples=None,
+    controlnet_single_block_samples=None,
+    return_dict: bool = True,
+    **kwargs: dict,
+):
+    return (
+        hidden_states,
+        encoder_hidden_states,
+        pooled_projections,
+        timestep,
+        img_ids,
+        txt_ids,
+        guidance,
+        joint_attention_kwargs,
+        controlnet_block_samples,
+        controlnet_single_block_samples,
+        return_dict,
+    )
+def tranformer_forward(
+    transformer: FluxTransformer2DModel,
+    condition_latents: torch.Tensor,
+    condition_ids: torch.Tensor,
+    condition_type_ids: torch.Tensor,
+    model_config: Optional[Dict[str, Any]] = {},
+    c_t=0,
+    **params: dict,
+):
+    self = transformer
+    use_condition = condition_latents is not None
+    (
+        hidden_states,
+        encoder_hidden_states,
+        pooled_projections,
+        timestep,
+        img_ids,
+        txt_ids,
+        guidance,
+        joint_attention_kwargs,
+        controlnet_block_samples,
+        controlnet_single_block_samples,
+        return_dict,
+    ) = prepare_params(**params)
+    if joint_attention_kwargs is not None:
+        joint_attention_kwargs = joint_attention_kwargs.copy()
+        lora_scale = joint_attention_kwargs.pop("scale", 1.0)
+    else:
+        lora_scale = 1.0
+    if USE_PEFT_BACKEND:
+        # weight the lora layers by setting `lora_scale` for each PEFT layer
+        scale_lora_layers(self, lora_scale)
+    else:
+        if (
+            joint_attention_kwargs is not None
+            and joint_attention_kwargs.get("scale", None) is not None
+        ):
+            logger.warning(
+                "Passing `scale` via `joint_attention_kwargs` when not using the PEFT backend is ineffective."
+            )
+    with enable_lora((self.x_embedder,), model_config.get("latent_lora", False)):
+        hidden_states = self.x_embedder(hidden_states)
+    condition_latents = self.x_embedder(condition_latents) if use_condition else None
+    timestep = timestep.to(hidden_states.dtype) * 1000
+    if guidance is not None:
+        guidance = guidance.to(hidden_states.dtype) * 1000
+    else:
+        guidance = None
+    temb = (
+        self.time_text_embed(timestep, pooled_projections)
+        if guidance is None
+        else self.time_text_embed(timestep, guidance, pooled_projections)
+    )
+    cond_temb = (
+        self.time_text_embed(torch.ones_like(timestep) * c_t * 1000, pooled_projections)
+        if guidance is None
+        else self.time_text_embed(
+            torch.ones_like(timestep) * c_t * 1000, torch.ones_like(guidance) * 1000, pooled_projections
+        )
+    )
+    encoder_hidden_states = self.context_embedder(encoder_hidden_states)
+    if txt_ids.ndim == 3:
+        logger.warning(
+            "Passing `txt_ids` 3d torch.Tensor is deprecated."
+            "Please remove the batch dimension and pass it as a 2d torch Tensor"
+        )
+        txt_ids = txt_ids[0]
+    if img_ids.ndim == 3:
+        logger.warning(
+            "Passing `img_ids` 3d torch.Tensor is deprecated."
+            "Please remove the batch dimension and pass it as a 2d torch Tensor"
+        )
+        img_ids = img_ids[0]
+    ids = torch.cat((txt_ids, img_ids), dim=0)
+    image_rotary_emb = self.pos_embed(ids)
+    if use_condition:
+        # condition_ids[:, :1] = condition_type_ids
+        cond_rotary_emb = self.pos_embed(condition_ids)
+    # hidden_states = torch.cat([hidden_states, condition_latents], dim=1)
+    for index_block, block in enumerate(self.transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+            ckpt_kwargs: Dict[str, Any] = (
+                {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            )
+            encoder_hidden_states, hidden_states, condition_latents = (
+                torch.utils.checkpoint.checkpoint(
+                    block_forward,
+                    self=block,
+                    model_config=model_config,
+                    hidden_states=hidden_states,
+                    encoder_hidden_states=encoder_hidden_states,
+                    condition_latents=condition_latents if use_condition else None,
+                    temb=temb,
+                    cond_temb=cond_temb if use_condition else None,
+                    cond_rotary_emb=cond_rotary_emb if use_condition else None,
+                    image_rotary_emb=image_rotary_emb,
+                    **ckpt_kwargs,
+                )
+            )
+        else:
+            encoder_hidden_states, hidden_states, condition_latents = block_forward(
+                block,
+                model_config=model_config,
+                hidden_states=hidden_states,
+                encoder_hidden_states=encoder_hidden_states,
+                condition_latents=condition_latents if use_condition else None,
+                temb=temb,
+                cond_temb=cond_temb if use_condition else None,
+                cond_rotary_emb=cond_rotary_emb if use_condition else None,
+                image_rotary_emb=image_rotary_emb,
+            )
+        # controlnet residual
+        if controlnet_block_samples is not None:
+            interval_control = len(self.transformer_blocks) / len(
+                controlnet_block_samples
+            )
+            interval_control = int(np.ceil(interval_control))
+            hidden_states = (
+                hidden_states
+                + controlnet_block_samples[index_block // interval_control]
+            )
+    hidden_states = torch.cat([encoder_hidden_states, hidden_states], dim=1)
+    for index_block, block in enumerate(self.single_transformer_blocks):
+        if self.training and self.gradient_checkpointing:
+            ckpt_kwargs: Dict[str, Any] = (
+                {"use_reentrant": False} if is_torch_version(">=", "1.11.0") else {}
+            )
+            result = torch.utils.checkpoint.checkpoint(
+                single_block_forward,
+                self=block,
+                model_config=model_config,
+                hidden_states=hidden_states,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+                **(
+                    {
+                        "condition_latents": condition_latents,
+                        "cond_temb": cond_temb,
+                        "cond_rotary_emb": cond_rotary_emb,
+                    }
+                    if use_condition
+                    else {}
+                ),
+                **ckpt_kwargs,
+            )
+        else:
+            result = single_block_forward(
+                block,
+                model_config=model_config,
+                hidden_states=hidden_states,
+                temb=temb,
+                image_rotary_emb=image_rotary_emb,
+                **(
+                    {
+                        "condition_latents": condition_latents,
+                        "cond_temb": cond_temb,
+                        "cond_rotary_emb": cond_rotary_emb,
+                    }
+                    if use_condition
+                    else {}
+                ),
+            )
+        if use_condition:
+            hidden_states, condition_latents = result
+        else:
+            hidden_states = result
+        # controlnet residual
+        if controlnet_single_block_samples is not None:
+            interval_control = len(self.single_transformer_blocks) / len(
+                controlnet_single_block_samples
+            )
+            interval_control = int(np.ceil(interval_control))
+            hidden_states[:, encoder_hidden_states.shape[1] :, ...] = (
+                hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+                + controlnet_single_block_samples[index_block // interval_control]
+            )
+    hidden_states = hidden_states[:, encoder_hidden_states.shape[1] :, ...]
+    hidden_states = self.norm_out(hidden_states, temb)
+    output = self.proj_out(hidden_states)
+    if USE_PEFT_BACKEND:
+        # remove `lora_scale` from each PEFT layer
+        unscale_lora_layers(self, lora_scale)
+    if not return_dict:
+        return (output,)
+    return Transformer2DModelOutput(sample=output)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,66 @@

+[project]
+name = "ads_gen"
+version = "0.1.0"
+description = "ZenCtrl AP"
+authors = [{ name = "Dummy User", email = "dummy@gmail.com" }]
+requires-python = ">=3.12"
+dependencies = [
+    "diffusers==0.35.0",
+    "gradio>=5.29.0",
+    "jupyter>=1.1.1",
+    "matplotlib>=3.10.3",
+    "opencv-python>=4.11.0.86",
+    "peft>=0.17.0",
+    "protobuf>=4.21.5",
+    "sentencepiece>=0.2.0",
+    "torchao>=0.10.0",
+    "torchvision>=0.22.0",
+    "transformers>=4.55.0",
+    "datasets>=2.13.0,<3",
+    "gcsfs>=2023.1.0,<2024",
+    "pillow>=9.5.0,<10",
+    "setuptools>=68.0.0,<69",
+    "tensorboard>=2.13.0,<3",
+    "omegaconf>=2.3.0,<3",
+    "einops>=0.6.1,<0.7",
+    "scipy>1.10.1",
+    "seaborn>=0.12.2,<0.13",
+    "tensorflow>=2.12.0,<3",
+    "tensorflow-datasets>=4.9.2,<5",
+    "hydra-core>=1.3.2,<2",
+    "torch-tb-profiler>=0.4.1,<0.5",
+    "faiss-cpu>=1.7.4,<2",
+    "triton==3.3.0",
+    "bitsandbytes==0.45.2",
+    "prdc>=0.2,<0.3",
+    "pytorch-fid>=0.3.0,<0.4",
+    "python-json-logger>=2.0.7,<3",
+    "multiprocess>=0.70.12",
+    "pyyaml>=6.0.1,<7",
+    "timm>=0.9.5,<0.10",
+    "rich>=13.5.2,<14",
+    "gdown>=4.7.1,<5",
+    "dreamsim>=0.1.3",
+    "scikit-image>=0.24.0",
+    "nvitop>=1.5.0",
+    "segment-anything==1.0",
+]
+[tool.hatch.build.targets.wheel]
+packages = ["app", "ralf"]
+[tool.hatch.build.targets.sdist]
+include = ["app", "ralf"]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.uv]
+[[tool.uv.index]]
+name = "pytorch-cu124"
+url = "https://download.pytorch.org/whl/cu124"
+explicit = true
+[tool.uv.sources]
+segment-anything = { git = "https://github.com/facebookresearch/segment-anything.git" }

requirements.txt ADDED Viewed

	@@ -0,0 +1,938 @@

+# This file was autogenerated by uv via the following command:
+#    uv pip compile pyproject.toml
+absl-py==2.3.1
+    # via
+    #   array-record
+    #   dm-tree
+    #   etils
+    #   keras
+    #   tensorboard
+    #   tensorflow
+    #   tensorflow-datasets
+    #   tensorflow-metadata
+accelerate==1.10.1
+    # via peft
+aiofiles==24.1.0
+    # via gradio
+aiohappyeyeballs==2.6.1
+    # via aiohttp
+aiohttp==3.13.0
+    # via
+    #   datasets
+    #   fsspec
+    #   gcsfs
+aiosignal==1.4.0
+    # via aiohttp
+annotated-types==0.7.0
+    # via pydantic
+antlr4-python3-runtime==4.9.3
+    # via
+    #   hydra-core
+    #   omegaconf
+anyio==4.11.0
+    # via
+    #   gradio
+    #   httpx
+    #   jupyter-server
+    #   starlette
+argon2-cffi==25.1.0
+    # via jupyter-server
+argon2-cffi-bindings==25.1.0
+    # via argon2-cffi
+array-record==0.8.1
+    # via tensorflow-datasets
+arrow==1.3.0
+    # via isoduration
+asttokens==3.0.0
+    # via stack-data
+astunparse==1.6.3
+    # via tensorflow
+async-lru==2.0.5
+    # via jupyterlab
+attrs==25.4.0
+    # via
+    #   aiohttp
+    #   dm-tree
+    #   jsonschema
+    #   referencing
+babel==2.17.0
+    # via jupyterlab-server
+beautifulsoup4==4.14.2
+    # via
+    #   gdown
+    #   nbconvert
+bitsandbytes==0.45.2
+    # via ads-gen (pyproject.toml)
+bleach==6.2.0
+    # via nbconvert
+brotli==1.1.0
+    # via gradio
+cachetools==6.2.0
+    # via google-auth
+certifi==2025.10.5
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+cffi==2.0.0
+    # via argon2-cffi-bindings
+charset-normalizer==3.4.3
+    # via requests
+click==8.3.0
+    # via
+    #   typer
+    #   uvicorn
+comm==0.2.3
+    # via
+    #   ipykernel
+    #   ipywidgets
+    # via matplotlib
+cycler==0.12.1
+    # via matplotlib
+datasets==2.21.0
+    # via ads-gen (pyproject.toml)
+debugpy==1.8.17
+    # via ipykernel
+decorator==5.2.1
+    # via
+    #   gcsfs
+    #   ipython
+defusedxml==0.7.1
+    # via nbconvert
+diffusers==0.35.0
+    # via ads-gen (pyproject.toml)
+dill==0.3.8
+    # via
+    #   datasets
+    #   multiprocess
+dm-tree==0.1.9
+    # via tensorflow-datasets
+docstring-parser==0.17.0
+    # via simple-parsing
+dreamsim==0.2.1
+    # via ads-gen (pyproject.toml)
+einops==0.6.1
+    # via
+    #   ads-gen (pyproject.toml)
+    #   etils
+etils==1.13.0
+    # via
+    #   array-record
+    #   tensorflow-datasets
+executing==2.2.1
+    # via stack-data
+faiss-cpu==1.12.0
+    # via ads-gen (pyproject.toml)
+fastapi==0.118.2
+    # via gradio
+fastjsonschema==2.21.2
+    # via nbformat
+ffmpy==0.6.2
+    # via gradio
+filelock==3.20.0
+    # via
+    #   datasets
+    #   diffusers
+    #   gdown
+    #   huggingface-hub
+    #   torch
+    #   transformers
+flatbuffers==25.9.23
+    # via tensorflow
+fonttools==4.60.1
+    # via matplotlib
+fqdn==1.5.1
+    # via jsonschema
+frozenlist==1.8.0
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec==2023.12.2
+    # via
+    #   datasets
+    #   etils
+    #   gcsfs
+    #   gradio-client
+    #   huggingface-hub
+    #   torch
+ftfy==6.3.1
+    # via open-clip-torch
+gast==0.6.0
+    # via tensorflow
+gcsfs==2023.12.2.post1
+    # via ads-gen (pyproject.toml)
+gdown==4.7.3
+    # via ads-gen (pyproject.toml)
+google-api-core
+    # via
+    #   google-cloud-core
+    #   google-cloud-storage
+google-auth==2.41.1
+    # via
+    #   gcsfs
+    #   google-api-core
+    #   google-auth-oauthlib
+    #   google-cloud-core
+    #   google-cloud-storage
+google-auth-oauthlib==1.2.2
+    # via gcsfs
+google-cloud-core==2.4.3
+    # via google-cloud-storage
+google-cloud-storage==3.4.1
+    # via gcsfs
+google-crc32c==1.7.1
+    # via
+    #   google-cloud-storage
+    #   google-resumable-media
+google-pasta==0.2.0
+    # via tensorflow
+google-resumable-media==2.7.2
+    # via google-cloud-storage
+googleapis-common-protos
+    # via
+    #   google-api-core
+    #   tensorflow-metadata
+gradio==5.49.1
+    # via ads-gen (pyproject.toml)
+gradio-client==1.13.3
+    # via gradio
+groovy==0.1.2
+    # via gradio
+grpcio==1.75.1
+    # via
+    #   tensorboard
+    #   tensorflow
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+h5py==3.14.0
+    # via
+    #   keras
+    #   tensorflow
+hf-xet==1.1.10
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httpx==0.28.1
+    # via
+    #   gradio
+    #   gradio-client
+    #   jupyterlab
+    #   safehttpx
+huggingface-hub==0.35.3
+    # via
+    #   accelerate
+    #   datasets
+    #   diffusers
+    #   gradio
+    #   gradio-client
+    #   open-clip-torch
+    #   peft
+    #   timm
+    #   tokenizers
+    #   transformers
+hydra-core==1.3.2
+    # via ads-gen (pyproject.toml)
+idna==3.10
+    # via
+    #   anyio
+    #   httpx
+    #   jsonschema
+    #   requests
+    #   yarl
+imageio==2.37.0
+    # via scikit-image
+immutabledict==4.2.1
+    # via tensorflow-datasets
+importlib-metadata==8.7.0
+    # via diffusers
+importlib-resources==6.5.2
+    # via etils
+ipykernel==6.30.1
+    # via
+    #   jupyter
+    #   jupyter-console
+    #   jupyterlab
+ipython
+    # via
+    #   ipykernel
+    #   ipywidgets
+    #   jupyter-console
+ipython-pygments-lexers==1.1.1
+    # via ipython
+ipywidgets==8.1.7
+    # via jupyter
+isoduration==20.11.0
+    # via jsonschema
+jedi==0.19.2
+    # via ipython
+jinja2==3.1.6
+    # via
+    #   gradio
+    #   jupyter-server
+    #   jupyterlab
+    #   jupyterlab-server
+    #   nbconvert
+    #   torch
+joblib==1.5.2
+    # via
+    #   prdc
+    #   scikit-learn
+json5==0.12.1
+    # via jupyterlab-server
+jsonpointer==3.0.0
+    # via jsonschema
+jsonschema==4.25.1
+    # via
+    #   jupyter-events
+    #   jupyterlab-server
+    #   nbformat
+jsonschema-specifications==2025.9.1
+    # via jsonschema
+jupyter==1.1.1
+    # via ads-gen (pyproject.toml)
+jupyter-client==8.6.3
+    # via
+    #   ipykernel
+    #   jupyter-console
+    #   jupyter-server
+    #   nbclient
+jupyter-console==6.6.3
+    # via jupyter
+jupyter-core==5.8.1
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-server
+    #   jupyterlab
+    #   nbclient
+    #   nbconvert
+    #   nbformat
+jupyter-events==0.12.0
+    # via jupyter-server
+jupyter-lsp==2.3.0
+    # via jupyterlab
+jupyter-server==2.17.0
+    # via
+    #   jupyter-lsp
+    #   jupyterlab
+    #   jupyterlab-server
+    #   notebook
+    #   notebook-shim
+jupyter-server-terminals==0.5.3
+    # via jupyter-server
+jupyterlab==4.4.9
+    # via
+    #   jupyter
+    #   notebook
+jupyterlab-pygments==0.3.0
+    # via nbconvert
+jupyterlab-server==2.27.3
+    # via
+    #   jupyterlab
+    #   notebook
+jupyterlab-widgets==3.0.15
+    # via ipywidgets
+keras==3.11.3
+    # via tensorflow
+kiwisolver==1.4.9
+    # via matplotlib
+lark==1.3.0
+    # via rfc3987-syntax
+lazy-loader==0.4
+    # via scikit-image
+libclang==18.1.1
+    # via tensorflow
+markdown==3.9
+    # via tensorboard
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via
+    #   gradio
+    #   jinja2
+    #   nbconvert
+    #   werkzeug
+matplotlib==3.10.7
+    # via
+    #   ads-gen (pyproject.toml)
+    #   seaborn
+matplotlib-inline==0.1.7
+    # via
+    #   ipykernel
+    #   ipython
+mdurl==0.1.2
+    # via markdown-it-py
+mistune==3.1.4
+    # via nbconvert
+ml-dtypes==0.5.3
+    # via
+    #   keras
+    #   tensorflow
+mpmath==1.3.0
+    # via sympy
+multidict==6.7.0
+    # via
+    #   aiohttp
+    #   yarl
+multiprocess==0.70.16
+    # via
+    #   ads-gen (pyproject.toml)
+    #   datasets
+namex==0.1.0
+    # via keras
+nbclient==0.10.2
+    # via nbconvert
+nbconvert==7.16.6
+    # via
+    #   jupyter
+    #   jupyter-server
+nbformat==5.10.4
+    # via
+    #   jupyter-server
+    #   nbclient
+    #   nbconvert
+nest-asyncio==1.6.0
+    # via ipykernel
+networkx
+    # via
+    #   scikit-image
+    #   torch
+notebook==7.4.7
+    # via jupyter
+notebook-shim==0.2.4
+    # via
+    #   jupyterlab
+    #   notebook
+numpy==1.26.4
+    # via
+    #   accelerate
+    #   bitsandbytes
+    #   contourpy
+    #   datasets
+    #   diffusers
+    #   dm-tree
+    #   dreamsim
+    #   etils
+    #   faiss-cpu
+    #   gradio
+    #   h5py
+    #   imageio
+    #   keras
+    #   matplotlib
+    #   ml-dtypes
+    #   opencv-python
+    #   pandas
+    #   peft
+    #   prdc
+    #   pytorch-fid
+    #   scikit-image
+    #   scikit-learn
+    #   scipy
+    #   seaborn
+    #   tensorboard
+    #   tensorflow
+    #   tensorflow-datasets
+    #   tifffile
+    #   torchvision
+    #   transformers
+nvidia-cublas-cu12==12.6.4.1
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.6.80
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.6.77
+    # via torch
+nvidia-cuda-runtime-cu12==12.6.77
+    # via torch
+nvidia-cudnn-cu12==9.5.1.17
+    # via torch
+nvidia-cufft-cu12==11.3.0.4
+    # via torch
+nvidia-cufile-cu12==1.11.1.6
+    # via torch
+nvidia-curand-cu12==10.3.7.77
+    # via torch
+nvidia-cusolver-cu12==11.7.1.2
+    # via torch
+nvidia-cusparse-cu12==12.5.4.2
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.6.3
+    # via torch
+nvidia-ml-py==13.580.82
+    # via nvitop
+nvidia-nccl-cu12==2.26.2
+    # via torch
+nvidia-nvjitlink-cu12==12.6.85
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvtx-cu12==12.6.77
+    # via torch
+nvitop==1.5.3
+    # via ads-gen (pyproject.toml)
+oauthlib==3.3.1
+    # via requests-oauthlib
+omegaconf==2.3.0
+    # via
+    #   ads-gen (pyproject.toml)
+    #   hydra-core
+open-clip-torch==2.32.0
+    # via dreamsim
+opencv-python==4.11.0.86
+    # via ads-gen (pyproject.toml)
+opt-einsum==3.4.0
+    # via tensorflow
+optree==0.17.0
+    # via keras
+orjson==3.11.3
+    # via gradio
+packaging==25.0
+    # via
+    #   accelerate
+    #   datasets
+    #   faiss-cpu
+    #   gradio
+    #   gradio-client
+    #   huggingface-hub
+    #   hydra-core
+    #   ipykernel
+    #   jupyter-events
+    #   jupyter-server
+    #   jupyterlab
+    #   jupyterlab-server
+    #   keras
+    #   lazy-loader
+    #   matplotlib
+    #   nbconvert
+    #   peft
+    #   scikit-image
+    #   tensorboard
+    #   tensorflow
+    #   transformers
+pandas==2.3.3
+    # via
+    #   datasets
+    #   gradio
+    #   seaborn
+    #   torch-tb-profiler
+pandocfilters==1.5.1
+    # via nbconvert
+parso==0.8.5
+    # via jedi
+peft==0.17.1
+    # via
+    #   ads-gen (pyproject.toml)
+    #   dreamsim
+pexpect==4.9.0
+    # via ipython
+pillow==9.5.0
+    # via
+    #   ads-gen (pyproject.toml)
+    #   diffusers
+    #   dreamsim
+    #   gradio
+    #   imageio
+    #   matplotlib
+    #   pytorch-fid
+    #   scikit-image
+    #   tensorboard
+    #   torchvision
+platformdirs==4.5.0
+    # via jupyter-core
+prdc==0.2
+    # via ads-gen (pyproject.toml)
+prometheus-client==0.23.1
+    # via jupyter-server
+promise==2.3
+    # via tensorflow-datasets
+prompt-toolkit==3.0.52
+    # via
+    #   ipython
+    #   jupyter-console
+propcache==0.4.1
+    # via
+    #   aiohttp
+    #   yarl
+proto-plus
+    # via google-api-core
+protobuf
+    # via
+    #   ads-gen (pyproject.toml)
+    #   google-api-core
+    #   googleapis-common-protos
+    #   proto-plus
+    #   tensorboard
+    #   tensorflow
+    #   tensorflow-datasets
+    #   tensorflow-metadata
+psutil==7.1.0
+    # via
+    #   accelerate
+    #   ipykernel
+    #   nvitop
+    #   peft
+    #   tensorflow-datasets
+ptyprocess==0.7.0
+    # via
+    #   pexpect
+    #   terminado
+pure-eval==0.2.3
+    # via stack-data
+pyarrow==21.0.0
+    # via
+    #   datasets
+    #   tensorflow-datasets
+pyasn1==0.6.1
+    # via
+    #   pyasn1-modules
+    #   rsa
+pyasn1-modules==0.4.2
+    # via google-auth
+pycparser==2.23
+    # via cffi
+pydantic==2.11.10
+    # via
+    #   fastapi
+    #   gradio
+pydantic-core==2.33.2
+    # via pydantic
+pydub==0.25.1
+    # via gradio
+pygments==2.19.2
+    # via
+    #   ipython
+    #   ipython-pygments-lexers
+    #   jupyter-console
+    #   nbconvert
+    #   rich
+pyparsing==3.2.5
+    # via matplotlib
+pysocks==1.7.1
+    # via requests
+python-dateutil==2.9.0.post0
+    # via
+    #   arrow
+    #   jupyter-client
+    #   matplotlib
+    #   pandas
+python-json-logger==2.0.7
+    # via
+    #   ads-gen (pyproject.toml)
+    #   jupyter-events
+python-multipart==0.0.20
+    # via gradio
+pytorch-fid==0.3.0
+    # via ads-gen (pyproject.toml)
+pytz==2025.2
+    # via pandas
+pyyaml==6.0.3
+    # via
+    #   ads-gen (pyproject.toml)
+    #   accelerate
+    #   datasets
+    #   gradio
+    #   huggingface-hub
+    #   jupyter-events
+    #   omegaconf
+    #   peft
+    #   timm
+    #   transformers
+pyzmq==27.1.0
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-server
+referencing==0.36.2
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+    #   jupyter-events
+regex==2025.9.18
+    # via
+    #   diffusers
+    #   open-clip-torch
+    #   transformers
+requests==2.32.5
+    # via
+    #   datasets
+    #   diffusers
+    #   fsspec
+    #   gcsfs
+    #   gdown
+    #   google-api-core
+    #   google-cloud-storage
+    #   huggingface-hub
+    #   jupyterlab-server
+    #   requests-oauthlib
+    #   tensorflow
+    #   tensorflow-datasets
+    #   transformers
+requests-oauthlib==2.0.0
+    # via google-auth-oauthlib
+rfc3339-validator==0.1.4
+    # via
+    #   jsonschema
+    #   jupyter-events
+rfc3986-validator==0.1.1
+    # via
+    #   jsonschema
+    #   jupyter-events
+rfc3987-syntax==1.1.0
+    # via jsonschema
+rich==13.9.4
+    # via
+    #   ads-gen (pyproject.toml)
+    #   keras
+    #   typer
+rpds-py==0.27.1
+    # via
+    #   jsonschema
+    #   referencing
+rsa==4.9.1
+    # via google-auth
+ruff==0.14.0
+    # via gradio
+safehttpx==0.1.6
+    # via gradio
+safetensors==0.6.2
+    # via
+    #   accelerate
+    #   diffusers
+    #   open-clip-torch
+    #   peft
+    #   timm
+    #   transformers
+scikit-image==0.24.0
+    # via ads-gen (pyproject.toml)
+scikit-learn==1.7.2
+    # via prdc
+scipy
+    # via
+    #   ads-gen (pyproject.toml)
+    #   dreamsim
+    #   prdc
+    #   pytorch-fid
+    #   scikit-image
+    #   scikit-learn
+seaborn==0.12.2
+    # via ads-gen (pyproject.toml)
+segment-anything @ git+https://github.com/facebookresearch/segment-anything.git@dca509fe793f601edb92606367a655c15ac00fdf
+    # via ads-gen (pyproject.toml)
+semantic-version==2.10.0
+    # via gradio
+send2trash==1.8.3
+    # via jupyter-server
+sentencepiece==0.2.1
+    # via ads-gen (pyproject.toml)
+setuptools==68.2.2
+    # via
+    #   ads-gen (pyproject.toml)
+    #   jupyterlab
+    #   tensorboard
+    #   tensorflow
+    #   torch
+    #   triton
+shellingham==1.5.4
+    # via typer
+simple-parsing==0.1.7
+    # via tensorflow-datasets
+six==1.17.0
+    # via
+    #   astunparse
+    #   gdown
+    #   google-pasta
+    #   promise
+    #   python-dateutil
+    #   rfc3339-validator
+    #   tensorflow
+sniffio==1.3.1
+    # via anyio
+soupsieve==2.8
+    # via beautifulsoup4
+stack-data==0.6.3
+    # via ipython
+starlette==0.48.0
+    # via
+    #   fastapi
+    #   gradio
+sympy==1.14.0
+    # via torch
+tensorboard
+    # via
+    #   ads-gen (pyproject.toml)
+    #   tensorflow
+    #   torch-tb-profiler
+tensorboard-data-server==0.7.2
+    # via tensorboard
+tensorflow
+    # via ads-gen (pyproject.toml)
+tensorflow-datasets
+    # via ads-gen (pyproject.toml)
+tensorflow-metadata
+    # via tensorflow-datasets
+termcolor==3.1.0
+    # via
+    #   tensorflow
+    #   tensorflow-datasets
+terminado==0.18.1
+    # via
+    #   jupyter-server
+    #   jupyter-server-terminals
+threadpoolctl==3.6.0
+    # via scikit-learn
+tifffile
+    # via scikit-image
+timm==0.9.16
+    # via
+    #   ads-gen (pyproject.toml)
+    #   dreamsim
+    #   open-clip-torch
+tinycss2==1.4.0
+    # via bleach
+tokenizers==0.22.1
+    # via transformers
+toml==0.10.2
+    # via tensorflow-datasets
+tomlkit==0.13.3
+    # via gradio
+torch==2.7.0
+    # via
+    #   accelerate
+    #   bitsandbytes
+    #   dreamsim
+    #   open-clip-torch
+    #   peft
+    #   pytorch-fid
+    #   timm
+    #   torchvision
+torch-tb-profiler==0.4.3
+    # via ads-gen (pyproject.toml)
+torchao==0.13.0
+    # via ads-gen (pyproject.toml)
+torchvision==0.22.0
+    # via
+    #   ads-gen (pyproject.toml)
+    #   dreamsim
+    #   open-clip-torch
+    #   pytorch-fid
+    #   timm
+tornado==6.5.2
+    # via
+    #   ipykernel
+    #   jupyter-client
+    #   jupyter-server
+    #   jupyterlab
+    #   notebook
+    #   terminado
+tqdm==4.67.1
+    # via
+    #   datasets
+    #   etils
+    #   gdown
+    #   huggingface-hub
+    #   open-clip-torch
+    #   peft
+    #   tensorflow-datasets
+    #   transformers
+traitlets==5.14.3
+    # via
+    #   ipykernel
+    #   ipython
+    #   ipywidgets
+    #   jupyter-client
+    #   jupyter-console
+    #   jupyter-core
+    #   jupyter-events
+    #   jupyter-server
+    #   jupyterlab
+    #   matplotlib-inline
+    #   nbclient
+    #   nbconvert
+    #   nbformat
+transformers==4.57.0
+    # via
+    #   ads-gen (pyproject.toml)
+    #   dreamsim
+    #   peft
+triton==3.3.0
+    # via
+    #   ads-gen (pyproject.toml)
+    #   torch
+typer==0.19.2
+    # via gradio
+types-python-dateutil==2.9.0.20251008
+    # via arrow
+typing-extensions==4.15.0
+    # via
+    #   aiosignal
+    #   anyio
+    #   beautifulsoup4
+    #   etils
+    #   fastapi
+    #   gradio
+    #   gradio-client
+    #   grpcio
+    #   huggingface-hub
+    #   optree
+    #   pydantic
+    #   pydantic-core
+    #   referencing
+    #   simple-parsing
+    #   starlette
+    #   tensorflow
+    #   torch
+    #   typer
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via pydantic
+tzdata==2025.2
+    # via pandas
+uri-template==1.3.0
+    # via jsonschema
+urllib3==2.5.0
+    # via requests
+uvicorn==0.37.0
+    # via gradio
+wcwidth==0.2.14
+    # via
+    #   ftfy
+    #   prompt-toolkit
+webcolors==24.11.1
+    # via jsonschema
+webencodings==0.5.1
+    # via
+    #   bleach
+    #   tinycss2
+websocket-client==1.9.0
+    # via jupyter-server
+websockets==15.0.1
+    # via gradio-client
+werkzeug==3.1.3
+    # via tensorboard
+wheel==0.45.1
+    # via astunparse
+widgetsnbextension==4.0.14
+    # via ipywidgets
+wrapt==1.17.3
+    # via
+    #   dm-tree
+    #   tensorflow
+    #   tensorflow-datasets
+xxhash==3.6.0
+    # via datasets
+yarl==1.22.0
+    # via aiohttp
+zipp==3.23.0
+    # via
+    #   etils
+    #   importlib-metadata

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff