AbstractPhil
/

lune-leco-adapters

Model card Files Files and versions

xet

Community

AbstractPhil commited on Nov 20, 2025

Commit

a59941b

verified ·

1 Parent(s): 3fea588

way better trainer

Browse files

Files changed (1) hide show

flow_leco_trainer.py +425 -396

flow_leco_trainer.py CHANGED Viewed

@@ -1,16 +1,19 @@
 """
-Lune LECO Trainer - Proper Concept Group Implementation
 """
 import os
 import json
 import datetime
 from dataclasses import dataclass, asdict, field
-from typing import List, Literal
 from tqdm.auto import tqdm
-from enum import Enum
 import torch
 import torch.nn.functional as F
 from torch.utils.tensorboard import SummaryWriter
 from safetensors.torch import save_file
@@ -20,100 +23,144 @@ from transformers import CLIPTextModel, CLIPTokenizer
 from huggingface_hub import hf_hub_download
-class ActionType(str, Enum):
-    """LECO action types"""
-    ERASE = "erase"           # sources → empty
-    ENHANCE = "enhance"       # sources → amplified
-    REPLACE = "replace"       # sources → target
-    NEUTRALIZE = "neutralize" # sources → neutral
-@dataclass
-class ConceptGroup:
-    """
-    A group of related concepts to transform together.
-    Training strategy:
-    - Sample from sources: these are the concepts to modify
-    - Transform to target: what they should become
-    - Use neutral as intermediate: optional neutral reference point
-    - Preserve negatives: concepts that should NOT be affected
-    Examples:
-        # Erase multiple anime styles
-        ConceptGroup(
-            sources=["anime", "manga", "cartoon"],
-            target="",
-            negatives=["realistic", "photograph"],
-            weight=1.0
-        )
-        # Replace artists
-        ConceptGroup(
-            sources=["van gogh", "picasso"],
-            target="monet",
-            neutral="painting",
-            negatives=["photograph", "digital art"],
-            weight=1.0
-        )
-        # Neutralize NSFW to safe
-        ConceptGroup(
-            sources=["nsfw", "nude", "explicit"],
-            target="safe",
-            neutral="person",
-            negatives=["portrait", "art", "figure drawing"],
-            weight=2.0
-        )
-    """
-    sources: List[str]                    # Concepts to modify (sampled during training)
-    target: str = ""                      # What to transform to (empty = erase)
-    neutral: str = ""                     # Optional neutral reference point
-    negatives: List[str] = field(default_factory=list)  # Concepts to preserve
-    weight: float = 1.0                   # Group importance
-    preservation_weight: float = 0.5      # How strongly to preserve negatives
 @dataclass
-class LECOConfig:
-    # Model paths
     output_dir: str = "./leco_outputs"
     base_model_repo: str = "AbstractPhil/sd15-flow-lune-flux"
     base_checkpoint: str = "sd15_flow_flux_t2_6_pose_t4_6_port_t1_4_s18765.pt"
-    # HuggingFace upload
-    hf_repo_id: str = "AbstractPhil/lune-leco-adapters"
-    upload_to_hub: bool = False
-    # Training data
-    action: ActionType = ActionType.ERASE
-    concept_groups: List[ConceptGroup] = field(default_factory=list)
-    # LoRA architecture
-    lora_rank: int = 4
     lora_alpha: float = 1.0
-    lora_dropout: float = 0.0
-    training_method: Literal["full", "selfattn", "xattn", "noxattn", "innoxattn"] = "xattn"
-    # Training hyperparameters
     seed: int = 42
-    iterations: int = 1000
-    lr: float = 1e-4
-    # Sampling strategy
-    sources_per_step: int = 2  # How many source concepts to sample per step
-    # Flow-matching parameters
     shift: float = 2.5
     min_timestep: float = 0.0
     max_timestep: float = 1000.0
-    # Resolution
     resolution: int = 512
 def get_target_modules(training_method: str) -> List[str]:
-    """Get layer names to inject LoRA based on training method."""
     attn1 = ["attn1.to_q", "attn1.to_k", "attn1.to_v", "attn1.to_out.0"]
     attn2 = ["attn2.to_q", "attn2.to_k", "attn2.to_v", "attn2.to_out.0"]
@@ -127,8 +174,8 @@ def get_target_modules(training_method: str) -> List[str]:
     return method_map.get(training_method, attn1 + attn2)
-def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
-    """Create LoRA layers in ComfyUI/A1111 compatible format."""
     target_modules = get_target_modules(config.training_method)
     lora_state = {}
     trainable_params = []
@@ -136,11 +183,13 @@ def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
     def get_lora_key(module_path: str) -> str:
         return f"lora_unet_{module_path.replace('.', '_')}"
     for name, module in unet.named_modules():
         if not any(target in name for target in target_modules):
             continue
-        if not isinstance(module, torch.nn.Linear):
             continue
         lora_key = get_lora_key(name)
@@ -148,11 +197,11 @@ def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
         out_dim = module.out_features
         rank = config.lora_rank
-        lora_down = torch.nn.Parameter(torch.zeros(rank, in_dim))
-        lora_up = torch.nn.Parameter(torch.zeros(out_dim, rank))
-        torch.nn.init.kaiming_uniform_(lora_down, a=1.0)
-        torch.nn.init.zeros_(lora_up)
         lora_state[f"{lora_key}.lora_down.weight"] = lora_down
         lora_state[f"{lora_key}.lora_up.weight"] = lora_up
@@ -165,8 +214,8 @@ def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
     return lora_state, trainable_params
-def apply_lora_hooks(unet: torch.nn.Module, lora_state: dict, scale: float = 1.0) -> list:
-    """Apply LoRA using forward hooks."""
     handles = []
     for key in lora_state:
@@ -201,197 +250,195 @@ def remove_lora_hooks(handles: list):
         handle.remove()
-@torch.no_grad()
-def encode_text(prompt: str, tokenizer, text_encoder, device) -> torch.Tensor:
-    """Encode text to CLIP embeddings"""
-    tokens = tokenizer(
-        prompt,
-        padding="max_length",
-        max_length=tokenizer.model_max_length,
-        truncation=True,
-        return_tensors="pt"
-    ).input_ids.to(device)
-    return text_encoder(tokens)[0]
-def compute_concept_group_loss(
-    unet: torch.nn.Module,
-    lora_state: dict,
-    group: ConceptGroup,
     tokenizer,
     text_encoder,
-    config: LECOConfig,
     device: str = "cuda"
 ):
-    """
-    Compute LECO loss for a concept group.
-    Strategy:
-    1. Sample source concepts from group.sources
-    2. Compute transformation: source → target (using neutral if provided)
-    3. Preserve negatives (ensure LoRA doesn't affect them)
-    The LoRA learns to transform ALL sources to the same target.
-    """
-    import random
-    # Sample source concepts for this step
-    num_sources = min(config.sources_per_step, len(group.sources))
-    sampled_sources = random.sample(group.sources, num_sources)
-    # Sample timestep (shared for this group)
     min_sigma = config.min_timestep / 1000.0
     max_sigma = config.max_timestep / 1000.0
-    sigma = min_sigma + torch.rand(1, device=device) * (max_sigma - min_sigma)
     sigma = (config.shift * sigma) / (1 + (config.shift - 1) * sigma)
     timestep = sigma * 1000.0
     sigma_expanded = sigma.view(1, 1, 1, 1)
-    total_loss = 0
-    metrics = {
-        "source_loss": 0,
-        "preservation_loss": 0,
-        "sources_processed": 0,
-        "negatives_processed": 0
-    }
-    # === SOURCE TRANSFORMATION LOSS ===
-    for source_concept in sampled_sources:
-        noise = torch.randn(1, 4, config.resolution // 8, config.resolution // 8, device=device)
-        noisy_input = sigma_expanded * noise
-        # Encode prompts
-        source_emb = encode_text(source_concept, tokenizer, text_encoder, device)
-        target_emb = encode_text(group.target, tokenizer, text_encoder, device)
-        # Optional: use neutral as intermediate reference
-        if group.neutral:
-            neutral_emb = encode_text(group.neutral, tokenizer, text_encoder, device)
-        else:
-            neutral_emb = None
-        # Compute target direction WITHOUT LoRA
-        with torch.no_grad():
-            pred_source = unet(
-                noisy_input, timestep,
-                encoder_hidden_states=source_emb,
-                return_dict=False
-            )[0]
-            pred_target = unet(
-                noisy_input, timestep,
-                encoder_hidden_states=target_emb,
-                return_dict=False
-            )[0]
-            # Determine transformation direction
-            if group.neutral and neutral_emb is not None:
-                # Use neutral as reference: source → neutral → target
-                pred_neutral = unet(
-                    noisy_input, timestep,
-                    encoder_hidden_states=neutral_emb,
-                    return_dict=False
-                )[0]
-                # Two-step transformation
-                step1 = pred_neutral - pred_source  # source → neutral
-                step2 = pred_target - pred_neutral   # neutral → target
-                target_delta = step1 + step2         # combined transformation
-            else:
-                # Direct transformation: source → target
-                target_delta = pred_target - pred_source
-        # Apply LoRA and measure its effect
-        handles = apply_lora_hooks(unet, lora_state, scale=1.0)
-        try:
-            pred_with_lora = unet(
-                noisy_input, timestep,
-                encoder_hidden_states=source_emb,
-                return_dict=False
-            )[0]
-        finally:
-            remove_lora_hooks(handles)
-        # LoRA contribution
-        lora_delta = pred_with_lora - pred_source
-        # Loss: LoRA should reproduce the transformation
-        source_loss = F.mse_loss(lora_delta, target_delta)
-        total_loss += source_loss * group.weight
-        metrics["source_loss"] += source_loss.item()
-        metrics["sources_processed"] += 1
-    # === PRESERVATION LOSS (negatives should remain unchanged) ===
-    for negative_concept in group.negatives:
-        noise = torch.randn(1, 4, config.resolution // 8, config.resolution // 8, device=device)
-        noisy_input = sigma_expanded * noise
-        negative_emb = encode_text(negative_concept, tokenizer, text_encoder, device)
-        # Baseline without LoRA
-        with torch.no_grad():
-            pred_negative = unet(
-                noisy_input, timestep,
-                encoder_hidden_states=negative_emb,
-                return_dict=False
-            )[0]
-        # With LoRA
-        handles = apply_lora_hooks(unet, lora_state, scale=1.0)
-        try:
-            pred_with_lora = unet(
-                noisy_input, timestep,
-                encoder_hidden_states=negative_emb,
-                return_dict=False
-            )[0]
-        finally:
-            remove_lora_hooks(handles)
-        # Penalize any change
-        preservation_loss = F.mse_loss(pred_with_lora, pred_negative)
-        total_loss += preservation_loss * group.preservation_weight
-        metrics["preservation_loss"] += preservation_loss.item()
-        metrics["negatives_processed"] += 1
-    # Average metrics
-    if metrics["sources_processed"] > 0:
-        metrics["source_loss"] /= metrics["sources_processed"]
-    if metrics["negatives_processed"] > 0:
-        metrics["preservation_loss"] /= metrics["negatives_processed"]
-    metrics["timestep"] = timestep.item()
-    metrics["sigma"] = sigma.item()
     return total_loss, metrics
-def train_leco(config: LECOConfig):
-    """Main training loop with proper concept groups"""
     device = "cuda"
     torch.manual_seed(config.seed)
-    if not config.concept_groups:
-        raise ValueError("No concept groups specified!")
-    # Validate concept groups
-    for group in config.concept_groups:
-        if not group.sources:
-            raise ValueError("Each concept group must have at least one source concept")
-    # Setup output
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    # Create name from first group
-    first_group = config.concept_groups[0]
-    source_names = "_".join([s.replace(" ", "")[:10] for s in first_group.sources[:2]])
-    if len(first_group.sources) > 2:
-        source_names += f"_plus{len(first_group.sources)-2}"
-    run_name = f"{config.action.value}_{source_names}_{timestamp}"
-    output_dir = os.path.join(config.output_dir, run_name)
     os.makedirs(output_dir, exist_ok=True)
     writer = SummaryWriter(log_dir=output_dir, flush_secs=60)
@@ -400,11 +447,33 @@ def train_leco(config: LECOConfig):
         json.dump(asdict(config), f, indent=2)
     print("="*80)
-    print(f"LECO Training: {config.action.value.upper()}")
     print("="*80)
-    # Load model
-    print("\nLoading base model...")
     checkpoint_path = hf_hub_download(
         repo_id=config.base_model_repo,
         filename=config.base_checkpoint,
@@ -420,14 +489,28 @@ def train_leco(config: LECOConfig):
     student_dict = checkpoint["student"]
     cleaned_dict = {k[5:] if k.startswith("unet.") else k: v for k, v in student_dict.items()}
-    unet.load_state_dict(cleaned_dict, strict=False)
     unet = unet.to(device)
     unet.requires_grad_(False)
     unet.eval()
-    print("✓ Loaded UNet")
-    # Load CLIP
-    print("Loading CLIP text encoder...")
     tokenizer = CLIPTokenizer.from_pretrained(
         "runwayml/stable-diffusion-v1-5", subfolder="tokenizer"
     )
@@ -439,77 +522,99 @@ def train_leco(config: LECOConfig):
     text_encoder.eval()
     print("✓ Loaded CLIP")
-    # Create LoRA layers
-    print(f"\nInjecting LoRA (rank={config.lora_rank}, alpha={config.lora_alpha})...")
-    lora_state, trainable_params = create_lora_layers(unet, config)
-    # Move Parameters to device IN-PLACE
     print(f"Moving LoRA parameters to {device}...")
     for param in trainable_params:
         param.data = param.data.to(device)
-    # Move other tensors to device
     for key, value in lora_state.items():
-        if isinstance(value, torch.Tensor) and not isinstance(value, torch.nn.Parameter):
             lora_state[key] = value.to(device)
     optimizer = torch.optim.AdamW(trainable_params, lr=config.lr, weight_decay=0.01)
-    # Print config
     print(f"\nTraining Configuration:")
-    print(f"  Action: {config.action.value}")
-    print(f"  Concept groups: {len(config.concept_groups)}")
-    for i, group in enumerate(config.concept_groups, 1):
-        print(f"\n  Group {i} (weight: {group.weight}):")
-        print(f"    Sources: {', '.join(group.sources)}")
-        print(f"    Target: '{group.target}'" if group.target else "    Target: (erase)")
-        if group.neutral:
-            print(f"    Neutral: '{group.neutral}'")
-        if group.negatives:
-            print(f"    Preserve: {', '.join(group.negatives)}")
     print(f"\n  Iterations: {config.iterations}")
     print(f"  Learning rate: {config.lr}")
-    print(f"  Training method: {config.training_method}")
-    print(f"  Sources per step: {config.sources_per_step}")
     print("="*80 + "\n")
-    # Training loop
     progress = tqdm(range(config.iterations), desc="Training")
     for step in progress:
-        import random
-        # Sample a concept group
-        group = random.choice(config.concept_groups)
-        # Compute loss for this group
-        loss, metrics = compute_concept_group_loss(
-            unet, lora_state, group,
-            tokenizer, text_encoder, config, device
         )
-        # Backprop
         loss.backward()
         grad_norm = torch.nn.utils.clip_grad_norm_(trainable_params, max_norm=1.0)
         optimizer.step()
         optimizer.zero_grad()
-        # Logging
         writer.add_scalar("loss/total", loss.item(), step)
-        writer.add_scalar("loss/source", metrics["source_loss"], step)
-        writer.add_scalar("loss/preservation", metrics["preservation_loss"], step)
         writer.add_scalar("grad_norm", grad_norm.item(), step)
         progress.set_postfix({
             "loss": f"{loss.item():.4f}",
-            "src": f"{metrics['source_loss']:.4f}",
-            "pres": f"{metrics['preservation_loss']:.4f}",
             "grad": f"{grad_norm.item():.3f}"
         })
-        if (step + 1) % 200 == 0 or step == config.iterations - 1:
-            save_checkpoint(lora_state, config, output_dir, step + 1, source_names)
     writer.close()
@@ -521,120 +626,44 @@ def train_leco(config: LECOConfig):
     return output_dir
-def save_checkpoint(lora_state, config, output_dir, step, name_suffix):
-    """Save LoRA in SafeTensors format"""
-    save_dict = {}
-    for key, value in lora_state.items():
-        if isinstance(value, torch.Tensor) and not key.endswith("._module"):
-            save_dict[key] = value.detach().cpu()
-    # Build metadata
-    all_sources = []
-    all_targets = []
-    all_negatives = []
-    for group in config.concept_groups:
-        all_sources.extend(group.sources)
-        if group.target:
-            all_targets.append(group.target)
-        all_negatives.extend(group.negatives)
-    metadata = {
-        "ss_network_module": "networks.lora",
-        "ss_network_dim": str(config.lora_rank),
-        "ss_network_alpha": str(config.lora_alpha),
-        "ss_base_model": "runwayml/stable-diffusion-v1-5",
-        "ss_training_method": config.training_method,
-        "leco_action": config.action.value,
-        "leco_sources": ", ".join(all_sources),
-        "leco_targets": ", ".join(all_targets) if all_targets else "",
-        "leco_negatives": ", ".join(all_negatives),
-        "leco_step": str(step),
-        "leco_num_groups": str(len(config.concept_groups))
-    }
-    filename = f"leco_{name_suffix}_r{config.lora_rank}_s{step}.safetensors"
-    filepath = os.path.join(output_dir, filename)
-    save_file(save_dict, filepath, metadata=metadata)
-    print(f"\n✓ Saved: {filename}")
-# ============================================================================
-# EXAMPLE CONFIGURATIONS
-# ============================================================================
 if __name__ == "__main__":
-    # Example 1: Erase anime styles (multiple sources → empty)
-    config_erase_anime = LECOConfig(
-        action=ActionType.ERASE,
-        concept_groups=[
-            ConceptGroup(
-                sources=["anime", "manga", "cartoon"],
-                target="",  # Erase
-                negatives=["realistic", "photograph", "painting"],
-                weight=1.0
-            )
-        ],
-        iterations=1000,
-        lora_rank=4,
-        training_method="xattn"  # Cross-attention for semantic content
-    )
-    # Example 2: Replace artists (multiple sources → single target)
-    config_replace_artists = LECOConfig(
-        action=ActionType.REPLACE,
-        concept_groups=[
-            ConceptGroup(
-                sources=["van gogh", "picasso", "dali"],
-                target="monet",
-                neutral="painting",  # Use painting as neutral reference
-                negatives=["photograph", "digital art"],
-                weight=1.0
-            )
-        ],
-        iterations=800,
-        lora_rank=8,
-        training_method="xattn"
-    )
-    # Example 3: Neutralize NSFW (multiple sources → safe target)
-    config_nsfw = LECOConfig(
-        action=ActionType.NEUTRALIZE,
-        concept_groups=[
-            ConceptGroup(
-                sources=["nsfw", "nude", "explicit", "naked"],
-                target="clothed",
-                neutral="person",
-                negatives=["portrait", "figure drawing", "classical art", "sculpture"],
-                weight=2.0,
-                preservation_weight=0.8  # Strong preservation
-            )
-        ],
-        iterations=1200,
-        lora_rank=4,
-        training_method="full"
     )
-    # Example 4: Your original request - weird food combos
-    config_food = LECOConfig(
-        action=ActionType.ERASE,
-        concept_groups=[
-            ConceptGroup(
-                sources=["potato chicken sandwich", "taco pizza", "banana sushi"],
-                target="",
-                neutral="food",
-                negatives=["normal sandwiches", "table", "walls", "plates", "restaurant"],
-                weight=1.0,
-                preservation_weight=1.5
-            )
-        ],
-        iterations=1000,
-        lora_rank=4,
         training_method="xattn",
-        sources_per_step=2  # Sample 2 weird foods per training step
     )
-    # Train
-    train_leco(config_erase_anime)

 """
+LECO Attribute Binding Trainer - COMPLETE WITH PROPER FLOW MATCHING
+Complete script with correct flow matching SNR and velocity prediction
 """
 import os
 import json
 import datetime
+import random
 from dataclasses import dataclass, asdict, field
+from typing import List, Tuple
 from tqdm.auto import tqdm
+from itertools import product
 import torch
+import torch.nn as nn
 import torch.nn.functional as F
 from torch.utils.tensorboard import SummaryWriter
 from safetensors.torch import save_file
 from huggingface_hub import hf_hub_download
+# ============================================================================
+# DATA STRUCTURES
+# ============================================================================
+@dataclass(frozen=True)
+class AttributePair:
+    """A specific combination that should stay distinct"""
+    attr1: str
+    attr2: str
+    negatives: Tuple[str, ...] = ()
+    weight: float = 1.0
 @dataclass
+class AttributeBindingConfig:
+    """Config for attribute binding training"""
     output_dir: str = "./leco_outputs"
     base_model_repo: str = "AbstractPhil/sd15-flow-lune-flux"
     base_checkpoint: str = "sd15_flow_flux_t2_6_pose_t4_6_port_t1_4_s18765.pt"
+    name_prefix: str = "leco"
+    attribute_pairs: List[AttributePair] = field(default_factory=list)
+    lora_rank: int = 8
     lora_alpha: float = 1.0
+    training_method: str = "xattn"
     seed: int = 42
+    iterations: int = 500
+    save_every: int = 250
+    lr: float = 2e-4
+    pairs_per_batch: int = 4
+    negatives_per_positive: int = 2
+    # Min-SNR parameters
+    use_min_snr: bool = True
+    min_snr_gamma: float = 5.0
+    # Flow matching parameters
     shift: float = 2.5
     min_timestep: float = 0.0
     max_timestep: float = 1000.0
     resolution: int = 512
+@dataclass
+class LECOConfig:
+    """Minimal config for LoRA creation"""
+    lora_rank: int = 4
+    lora_alpha: float = 1.0
+    training_method: str = "xattn"
+# ============================================================================
+# ATTRIBUTE COMBINATION HELPERS
+# ============================================================================
+def extract_color(text: str) -> str:
+    """Extract color from text"""
+    colors = [
+        "red", "blue", "green", "yellow", "purple", "orange", "pink",
+        "black", "white", "brown", "blonde", "silver", "gold", "cyan",
+        "magenta", "teal", "lavender", "gray", "grey", "beige", "navy",
+        "maroon", "turquoise", "violet", "indigo", "crimson"
+    ]
+    text_lower = text.lower()
+    for color in colors:
+        if color in text_lower:
+            return color
+    return None
+def generate_smart_negatives(attr1: str, attr2: str, all_negatives: List[str] = None) -> List[str]:
+    """Automatically generate wrong combinations"""
+    negatives = []
+    color1 = extract_color(attr1)
+    color2 = extract_color(attr2)
+    if color1 and color2 and color1 != color2:
+        swapped_attr1 = attr1.replace(color1, color2)
+        swapped_attr2 = attr2.replace(color2, color1)
+        negatives.append(f"{swapped_attr1}, {swapped_attr2}")
+        negatives.append(f"{attr1}, {attr2.replace(color2, color1)}")
+        negatives.append(f"{attr1.replace(color1, color2)}, {attr2}")
+    # Add universal negatives to combinations
+    if all_negatives:
+        for neg in all_negatives:
+            negatives.append(f"{attr1}, {attr2}, {neg}")
+    return list(set(negatives))
+def create_attribute_combinations(
+    pair_attr1: List[str],
+    pair_attr2: List[str],
+    negatives: List[str] = None,
+    weight: float = 1.0,
+    auto_generate_negatives: bool = True
+) -> List[AttributePair]:
+    """Create all combinations of two attribute lists"""
+    pairs = []
+    for attr1, attr2 in product(pair_attr1, pair_attr2):
+        if auto_generate_negatives:
+            neg_list = generate_smart_negatives(attr1, attr2, negatives)
+        else:
+            neg_list = []
+            if negatives:
+                for neg in negatives:
+                    neg_list.append(f"{attr1}, {neg}")
+                    neg_list.append(f"{neg}, {attr2}")
+        pairs.append(AttributePair(
+            attr1=attr1,
+            attr2=attr2,
+            negatives=tuple(neg_list),
+            weight=weight
+        ))
+    return pairs
+def combine_attribute_groups(*groups: List[AttributePair]) -> List[AttributePair]:
+    """Combine multiple attribute groups"""
+    combined = []
+    for group in groups:
+        combined.extend(group)
+    return combined
+# ============================================================================
+# LORA UTILITIES
+# ============================================================================
 def get_target_modules(training_method: str) -> List[str]:
+    """Get layer names to inject LoRA"""
     attn1 = ["attn1.to_q", "attn1.to_k", "attn1.to_v", "attn1.to_out.0"]
     attn2 = ["attn2.to_q", "attn2.to_k", "attn2.to_v", "attn2.to_out.0"]
     return method_map.get(training_method, attn1 + attn2)
+def create_lora_layers(unet: nn.Module, config: LECOConfig):
+    """Create LoRA layers"""
     target_modules = get_target_modules(config.training_method)
     lora_state = {}
     trainable_params = []
     def get_lora_key(module_path: str) -> str:
         return f"lora_unet_{module_path.replace('.', '_')}"
+    print(f"Creating LoRA layers (method: {config.training_method})...")
     for name, module in unet.named_modules():
         if not any(target in name for target in target_modules):
             continue
+        if not isinstance(module, nn.Linear):
             continue
         lora_key = get_lora_key(name)
         out_dim = module.out_features
         rank = config.lora_rank
+        lora_down = nn.Parameter(torch.zeros(rank, in_dim))
+        lora_up = nn.Parameter(torch.zeros(out_dim, rank))
+        nn.init.kaiming_uniform_(lora_down, a=1.0)
+        nn.init.zeros_(lora_up)
         lora_state[f"{lora_key}.lora_down.weight"] = lora_down
         lora_state[f"{lora_key}.lora_up.weight"] = lora_up
     return lora_state, trainable_params
+def apply_lora_hooks(unet: nn.Module, lora_state: dict, scale: float = 1.0) -> list:
+    """Apply LoRA using forward hooks"""
     handles = []
     for key in lora_state:
         handle.remove()
+# ============================================================================
+# TRAINING LOSS WITH PROPER FLOW MATCHING
+# ============================================================================
+def compute_attribute_binding_loss_batched(
+    unet,
+    lora_state,
+    positive_pairs: List[AttributePair],
     tokenizer,
     text_encoder,
+    config: AttributeBindingConfig,
     device: str = "cuda"
 ):
+    """Batched attribute binding with PROPER FLOW MATCHING"""
+    # 1. Sample sigma with constrained range (matching your training code)
     min_sigma = config.min_timestep / 1000.0
     max_sigma = config.max_timestep / 1000.0
+    sigma = torch.rand(1, device=device)
+    sigma = min_sigma + sigma * (max_sigma - min_sigma)  # Constrain to range
+    # Apply shift transformation
     sigma = (config.shift * sigma) / (1 + (config.shift - 1) * sigma)
     timestep = sigma * 1000.0
     sigma_expanded = sigma.view(1, 1, 1, 1)
+    # 2. Flow matching: x_t = sigma * noise + (1 - sigma) * x_0
+    # For LECO: we use pure noise as x_0 (no clean latents available)
+    noise = torch.randn(1, 4, config.resolution // 8, config.resolution // 8, device=device)
+    noisy_input = sigma_expanded * noise  # Simplified since x_0 = 0 (centered)
+    # Build prompts
+    positive_prompts = []
+    negative_prompts = []
+    pair_weights = []
+    for pair in positive_pairs:
+        correct = f"{pair.attr1}, {pair.attr2}"
+        positive_prompts.append(correct)
+        pair_weights.append(pair.weight)
+        if pair.negatives:
+            sampled_negs = random.sample(
+                list(pair.negatives),
+                min(config.negatives_per_positive, len(pair.negatives))
+            )
+            negative_prompts.extend(sampled_negs)
+    if not positive_prompts:
+        return torch.tensor(0.0, device=device), {
+            "positive_loss": 0, "negative_loss": 0,
+            "positive_count": 0, "negative_count": 0,
+            "timestep": 0.0, "snr_weight": 1.0
+        }
+    neutral_prompt = ""
+    all_prompts = [neutral_prompt] + positive_prompts + negative_prompts
+    text_inputs = tokenizer(
+        all_prompts,
+        padding="max_length",
+        max_length=tokenizer.model_max_length,
+        truncation=True,
+        return_tensors="pt"
+    ).to(device)
+    all_embeddings = text_encoder(text_inputs.input_ids)[0]
+    neutral_emb = all_embeddings[0:1]
+    positive_embs = all_embeddings[1:1+len(positive_prompts)]
+    negative_embs = all_embeddings[1+len(positive_prompts):]
+    batch_size = len(all_prompts) - 1
+    noisy_input_batch = noisy_input.repeat(batch_size, 1, 1, 1)
+    timestep_batch = timestep.repeat(batch_size)
+    combined_embs = torch.cat([positive_embs, negative_embs], dim=0)
+    # Get VELOCITY predictions
+    with torch.no_grad():
+        vel_neutral = unet(
+            noisy_input, timestep_batch[0:1],
+            encoder_hidden_states=neutral_emb,
+            return_dict=False
+        )[0]
+        vel_baseline = unet(
+            noisy_input_batch, timestep_batch,
+            encoder_hidden_states=combined_embs,
+            return_dict=False
+        )[0]
+    vel_positive_baseline = vel_baseline[:len(positive_prompts)]
+    vel_negative_baseline = vel_baseline[len(positive_prompts):]
+    handles = apply_lora_hooks(unet, lora_state, scale=1.0)
+    try:
+        vel_with_lora = unet(
+            noisy_input_batch, timestep_batch,
+            encoder_hidden_states=combined_embs,
+            return_dict=False
+        )[0]
+    finally:
+        remove_lora_hooks(handles)
+    vel_positive_lora = vel_with_lora[:len(positive_prompts)]
+    vel_negative_lora = vel_with_lora[len(positive_prompts):]
+    # 3. Compute FLOW MATCHING SNR (not DDPM)
+    snr_weight = 1.0
+    if config.use_min_snr:
+        # Flow matching SNR: ((1 - sigma)^2) / (sigma^2)
+        sigma_sq = sigma.squeeze() ** 2
+        snr = ((1 - sigma.squeeze()) ** 2) / (sigma_sq + 1e-8)
+        # Min-SNR clamping
+        snr_clamped = torch.minimum(snr, torch.tensor(config.min_snr_gamma, device=device))
+        snr_weight_tensor = snr_clamped / snr
+        # Velocity prediction adjustment: divide by (SNR + 1)
+        snr_weight_tensor = snr_weight_tensor / (snr + 1)
+        snr_weight = snr_weight_tensor.item()
+    else:
+        snr_weight_tensor = torch.ones(1, device=device)
+    # Compute losses
+    vel_neutral_expanded = vel_neutral.expand_as(vel_positive_baseline)
+    target_positive_direction = vel_positive_baseline - vel_neutral_expanded
+    lora_positive_delta = vel_positive_lora - vel_positive_baseline
+    positive_loss_per_sample = F.mse_loss(
+        lora_positive_delta,
+        target_positive_direction * 0.3,
+        reduction='none'
+    ).mean(dim=(1,2,3))
+    # Apply both pair weights and SNR weights
+    pair_weights_tensor = torch.tensor(pair_weights, device=device)
+    weighted_positive_loss = (positive_loss_per_sample * pair_weights_tensor * snr_weight_tensor).mean()
+    negative_loss = torch.tensor(0.0, device=device)
+    lora_negative_norm = 0.0
+    if len(negative_prompts) > 0:
+        vel_neutral_expanded_neg = vel_neutral.expand_as(vel_negative_baseline)
+        target_negative_direction = vel_neutral_expanded_neg - vel_negative_baseline
+        lora_negative_delta = vel_negative_lora - vel_negative_baseline
+        negative_loss = F.mse_loss(lora_negative_delta, target_negative_direction * 0.2, reduction='mean')
+        negative_loss = negative_loss * snr_weight_tensor
+        lora_negative_norm = lora_negative_delta.norm().item()
+    total_loss = weighted_positive_loss + negative_loss * 0.5
+    metrics = {
+        "positive_loss": weighted_positive_loss.item(),
+        "negative_loss": negative_loss.item() if isinstance(negative_loss, torch.Tensor) else 0.0,
+        "positive_count": len(positive_prompts),
+        "negative_count": len(negative_prompts),
+        "timestep": timestep.item(),
+        "sigma": sigma.item(),
+        "snr_weight": snr_weight,
+        "lora_positive_norm": lora_positive_delta.norm().item(),
+        "lora_negative_norm": lora_negative_norm
+    }
     return total_loss, metrics
+# ============================================================================
+# TRAINING FUNCTION
+# ============================================================================
+def train_attribute_binding(config: AttributeBindingConfig):
+    """Fast training for attribute binding with Min-SNR"""
     device = "cuda"
     torch.manual_seed(config.seed)
+    if not config.attribute_pairs:
+        raise ValueError("No attribute pairs specified!")
+    pairs_with_negatives = sum(1 for p in config.attribute_pairs if p.negatives)
+    print(f"Pairs with explicit negatives: {pairs_with_negatives}/{len(config.attribute_pairs)}")
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    output_dir = os.path.join(config.output_dir, f"attribute_binding_{timestamp}")
     os.makedirs(output_dir, exist_ok=True)
     writer = SummaryWriter(log_dir=output_dir, flush_secs=60)
         json.dump(asdict(config), f, indent=2)
     print("="*80)
+    print("ATTRIBUTE BINDING TRAINING")
+    if config.use_min_snr:
+        print(f"Using Min-SNR Weighting (gamma={config.min_snr_gamma})")
     print("="*80)
+    # VERIFY UNET LOADING
+    print("\nVerifying UNet loading...")
+    print("Loading base SD1.5 UNet for comparison...")
+    unet_base = UNet2DConditionModel.from_pretrained(
+        "runwayml/stable-diffusion-v1-5",
+        subfolder="unet",
+        torch_dtype=torch.float32
+    ).to(device)
+    # Create test inputs
+    test_latents = torch.randn(1, 4, 64, 64, device=device)
+    test_timestep = torch.tensor([500], device=device)
+    test_encoder = torch.randn(1, 77, 768, device=device)
+    with torch.no_grad():
+        baseline_out = unet_base(test_latents, test_timestep, encoder_hidden_states=test_encoder, return_dict=False)[0]
+    print(f"Baseline output norm: {baseline_out.norm().item():.6f}")
+    del unet_base
+    torch.cuda.empty_cache()
+    print("\nLoading Lune flow-matching model...")
     checkpoint_path = hf_hub_download(
         repo_id=config.base_model_repo,
         filename=config.base_checkpoint,
     student_dict = checkpoint["student"]
     cleaned_dict = {k[5:] if k.startswith("unet.") else k: v for k, v in student_dict.items()}
+    missing, unexpected = unet.load_state_dict(cleaned_dict, strict=False)
+    print(f"Missing keys: {len(missing)}, Unexpected keys: {len(unexpected)}")
     unet = unet.to(device)
     unet.requires_grad_(False)
     unet.eval()
+    # Verify Lune loaded correctly
+    with torch.no_grad():
+        lune_out = unet(test_latents, test_timestep, encoder_hidden_states=test_encoder, return_dict=False)[0]
+    print(f"Lune output norm: {lune_out.norm().item():.6f}")
+    diff = (lune_out - baseline_out).abs().mean().item()
+    print(f"Difference from baseline: {diff:.6f}")
+    if diff < 1e-4:
+        print("⚠️  WARNING: Outputs are nearly identical - checkpoint may not have loaded!")
+    else:
+        print("✓ Lune checkpoint loaded correctly (outputs differ)")
+    print("\nLoading CLIP...")
     tokenizer = CLIPTokenizer.from_pretrained(
         "runwayml/stable-diffusion-v1-5", subfolder="tokenizer"
     )
     text_encoder.eval()
     print("✓ Loaded CLIP")
+    print(f"\nCreating LoRA (rank={config.lora_rank})...")
+    leco_config = LECOConfig(
+        lora_rank=config.lora_rank,
+        lora_alpha=config.lora_alpha,
+        training_method=config.training_method
+    )
+    lora_state, trainable_params = create_lora_layers(unet, leco_config)
     print(f"Moving LoRA parameters to {device}...")
     for param in trainable_params:
         param.data = param.data.to(device)
     for key, value in lora_state.items():
+        if isinstance(value, torch.Tensor) and not isinstance(value, nn.Parameter):
             lora_state[key] = value.to(device)
     optimizer = torch.optim.AdamW(trainable_params, lr=config.lr, weight_decay=0.01)
     print(f"\nTraining Configuration:")
+    print(f"  Attribute pairs: {len(config.attribute_pairs)}")
+    for i, pair in enumerate(config.attribute_pairs[:3], 1):
+        print(f"    {i}. {pair.attr1} + {pair.attr2} (weight: {pair.weight})")
+        if pair.negatives:
+            print(f"       Negatives: {len(pair.negatives)} total")
+    if len(config.attribute_pairs) > 3:
+        print(f"    ... and {len(config.attribute_pairs)-3} more")
     print(f"\n  Iterations: {config.iterations}")
+    print(f"  Pairs per batch: {config.pairs_per_batch}")
+    print(f"  Negatives per positive: {config.negatives_per_positive}")
     print(f"  Learning rate: {config.lr}")
     print("="*80 + "\n")
     progress = tqdm(range(config.iterations), desc="Training")
     for step in progress:
+        sampled_pairs = random.sample(
+            config.attribute_pairs,
+            min(config.pairs_per_batch, len(config.attribute_pairs))
+        )
+        loss, metrics = compute_attribute_binding_loss_batched(
+            unet, lora_state,
+            sampled_pairs,
+            tokenizer, text_encoder,
+            config,
+            device
         )
         loss.backward()
         grad_norm = torch.nn.utils.clip_grad_norm_(trainable_params, max_norm=1.0)
         optimizer.step()
         optimizer.zero_grad()
         writer.add_scalar("loss/total", loss.item(), step)
+        writer.add_scalar("loss/positive", metrics["positive_loss"], step)
+        writer.add_scalar("loss/negative", metrics["negative_loss"], step)
         writer.add_scalar("grad_norm", grad_norm.item(), step)
+        writer.add_scalar("snr_weight", metrics["snr_weight"], step)
         progress.set_postfix({
             "loss": f"{loss.item():.4f}",
+            "pos": f"{metrics['positive_loss']:.3f}",
+            "neg": f"{metrics['negative_loss']:.3f}",
+            "snr": f"{metrics['snr_weight']:.2f}",
             "grad": f"{grad_norm.item():.3f}"
         })
+        if (step + 1) % config.save_every == 0 or step == config.iterations - 1:
+            save_dict = {}
+            for key, value in lora_state.items():
+                if isinstance(value, torch.Tensor) and not key.endswith("._module"):
+                    save_dict[key] = value.detach().cpu()
+            metadata = {
+                "ss_network_module": "networks.lora",
+                "ss_network_dim": str(config.lora_rank),
+                "ss_network_alpha": str(config.lora_alpha),
+                "ss_training_method": config.training_method,
+                "leco_action": "attribute_binding",
+                "leco_num_pairs": str(len(config.attribute_pairs)),
+                "leco_step": str(step + 1),
+                "leco_min_snr": str(config.use_min_snr),
+                "leco_min_snr_gamma": str(config.min_snr_gamma)
+            }
+            filename = f"{config.name_prefix}_r{config.lora_rank}_s{step+1}.safetensors"
+            filepath = os.path.join(output_dir, filename)
+            save_file(save_dict, filepath, metadata=metadata)
+            print(f"\n✓ Saved: {filename}")
     writer.close()
     return output_dir
 if __name__ == "__main__":
+    # Example 1: Hair + Clothes colors
+    universal_negs = ["ugly, duplicate, morbid, mutilated, blurry, fuzzy, out of frame, gross"]
+    hair_colors = ["red hair", "blue hair", "green hair"]
+    clothes = ["red shirt", "blue shirt", "green shirt"]
+    hair_clothes_pairs = create_attribute_combinations(
+        pair_attr1=hair_colors,
+        pair_attr2=clothes,
+        negatives=universal_negs,
+        weight=1.0,
+        auto_generate_negatives=True
     )
+    print(f"Generated {len(hair_clothes_pairs)} hair+clothes pairs")
+    # Training config
+    config = AttributeBindingConfig(
+        name_prefix="color_clothes_test",
+        attribute_pairs=hair_clothes_pairs,
+        iterations=5000,
+        lora_rank=16,
+        lr=2e-4,
+        pairs_per_batch=4,
+        negatives_per_positive=3,
         training_method="xattn",
+        save_every=250,
+        # Flow matching parameters
+        shift=2.5,
+        min_timestep=0.0,
+        max_timestep=1000.0,
+        # Min-SNR enabled
+        use_min_snr=True,
+        min_snr_gamma=5.0
     )
+    train_attribute_binding(config)