AbstractPhil
/

lune-leco-adapters

Model card Files Files and versions

xet

Community

AbstractPhil commited on Nov 20, 2025

Commit

3fea588

verified ·

1 Parent(s): f1b7957

Update flow_leco_trainer.py

Browse files

Files changed (1) hide show

flow_leco_trainer.py +309 -199

flow_leco_trainer.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Lune LECO Trainer - Fixed
 """
 import os
@@ -22,36 +22,56 @@ from huggingface_hub import hf_hub_download
 class ActionType(str, Enum):
     """LECO action types"""
-    ERASE = "erase"
-    ENHANCE = "enhance"
-    REPLACE = "replace"
-    SUPPRESS = "suppress"
 @dataclass
-class ConceptPair:
     """
-    Single concept transformation pair.
-    The LoRA learns: pred(concept) - pred(anchor)
     Examples:
-        Erase: ConceptPair("anime style", "")
-        Enhance: ConceptPair("masterpiece", "")
-        Replace: ConceptPair("van gogh", "monet")
-        Suppress: ConceptPair("nsfw", "sfw")
     """
-    concept: str
-    anchor: str = ""
-    weight: float = 1.0
-    inference_weight: float = -1.0
-@dataclass
-class PreservationSet:
-    """Prompts that should remain unchanged"""
-    prompts: List[str] = field(default_factory=list)
-    weight: float = 0.3
 @dataclass
@@ -61,28 +81,29 @@ class LECOConfig:
     base_model_repo: str = "AbstractPhil/sd15-flow-lune-flux"
     base_checkpoint: str = "sd15_flow_flux_t2_6_pose_t4_6_port_t1_4_s18765.pt"
-    # HuggingFace
     hf_repo_id: str = "AbstractPhil/lune-leco-adapters"
     upload_to_hub: bool = False
     # Training data
     action: ActionType = ActionType.ERASE
-    concept_pairs: List[ConceptPair] = field(default_factory=list)
-    preservation: PreservationSet = field(default_factory=PreservationSet)
     # LoRA architecture
     lora_rank: int = 4
     lora_alpha: float = 1.0
     lora_dropout: float = 0.0
-    training_method: Literal["full", "selfattn", "xattn", "noxattn", "innoxattn"] = "full"
-    # Training
     seed: int = 42
     iterations: int = 1000
     lr: float = 1e-4
-    pairs_per_step: int = 1
-    # Flow-matching
     shift: float = 2.5
     min_timestep: float = 0.0
     max_timestep: float = 1000.0
@@ -92,7 +113,7 @@ class LECOConfig:
 def get_target_modules(training_method: str) -> List[str]:
-    """Get layer names for LoRA injection"""
     attn1 = ["attn1.to_q", "attn1.to_k", "attn1.to_v", "attn1.to_out.0"]
     attn2 = ["attn2.to_q", "attn2.to_k", "attn2.to_v", "attn2.to_out.0"]
@@ -107,7 +128,7 @@ def get_target_modules(training_method: str) -> List[str]:
 def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
-    """Create LoRA layers in ComfyUI/A1111 format"""
     target_modules = get_target_modules(config.training_method)
     lora_state = {}
     trainable_params = []
@@ -115,9 +136,6 @@ def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
     def get_lora_key(module_path: str) -> str:
         return f"lora_unet_{module_path.replace('.', '_')}"
-    print(f"Creating LoRA layers (method: {config.training_method})...")
-    layer_count = 0
     for name, module in unet.named_modules():
         if not any(target in name for target in target_modules):
             continue
@@ -130,9 +148,6 @@ def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
         out_dim = module.out_features
         rank = config.lora_rank
-        # LoRA matrices
-        # down: [rank, in_features]
-        # up: [out_features, rank]
         lora_down = torch.nn.Parameter(torch.zeros(rank, in_dim))
         lora_up = torch.nn.Parameter(torch.zeros(out_dim, rank))
@@ -145,21 +160,13 @@ def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
         lora_state[f"{lora_key}._module"] = module
         trainable_params.extend([lora_down, lora_up])
-        layer_count += 1
-    print(f"✓ Created {layer_count} LoRA layers ({len(trainable_params)} parameters)")
     return lora_state, trainable_params
 def apply_lora_hooks(unet: torch.nn.Module, lora_state: dict, scale: float = 1.0) -> list:
-    """
-    Apply LoRA using forward hooks.
-    LoRA computation: out = out + scale * (x @ down.T @ up.T)
-    Using F.linear: F.linear(x, W) computes x @ W.T
-    So: F.linear(F.linear(x, down), up) gives x @ down.T @ up.T ✓
-    """
     handles = []
     for key in lora_state:
@@ -178,9 +185,6 @@ def apply_lora_hooks(unet: torch.nn.Module, lora_state: dict, scale: float = 1.0
         def make_hook(down, up, s):
             def forward_hook(mod, inp, out):
                 x = inp[0]
-                # F.linear handles transpose internally
-                # down is [rank, in_features], F.linear does x @ down.T
-                # up is [out_features, rank], F.linear does result @ up.T
                 lora_out = F.linear(F.linear(x, down), up)
                 return out + lora_out * s
             return forward_hook
@@ -211,145 +215,182 @@ def encode_text(prompt: str, tokenizer, text_encoder, device) -> torch.Tensor:
     return text_encoder(tokens)[0]
-def compute_leco_loss(
     unet: torch.nn.Module,
     lora_state: dict,
-    pair: ConceptPair,
     tokenizer,
     text_encoder,
     config: LECOConfig,
     device: str = "cuda"
 ):
     """
-    Compute LECO loss for a concept pair.
-    Trains LoRA to reproduce: pred(concept) - pred(anchor)
-    """
-    # Sample timestep
-    min_sigma = config.min_timestep / 1000.0
-    max_sigma = config.max_timestep / 1000.0
-    sigma = min_sigma + torch.rand(1, device=device) * (max_sigma - min_sigma)
-    sigma = (config.shift * sigma) / (1 + (config.shift - 1) * sigma)
-    timestep = sigma * 1000.0
-    sigma = sigma.view(1, 1, 1, 1)
-    # Random noise
-    noise = torch.randn(1, 4, config.resolution // 8, config.resolution // 8, device=device)
-    noisy_input = sigma * noise
-    # Encode prompts
-    concept_emb = encode_text(pair.concept, tokenizer, text_encoder, device)
-    anchor_emb = encode_text(pair.anchor, tokenizer, text_encoder, device)
-    # Compute target direction (without LoRA)
-    with torch.no_grad():
-        pred_concept = unet(
-            noisy_input, timestep,
-            encoder_hidden_states=concept_emb,
-            return_dict=False
-        )[0]
-        pred_anchor = unet(
-            noisy_input, timestep,
-            encoder_hidden_states=anchor_emb,
-            return_dict=False
-        )[0]
-        target_delta = pred_concept - pred_anchor
-    # Apply LoRA and get its contribution
-    handles = apply_lora_hooks(unet, lora_state, scale=1.0)
-    try:
-        pred_with_lora = unet(
-            noisy_input, timestep,
-            encoder_hidden_states=concept_emb,
-            return_dict=False
-        )[0]
-        lora_delta = pred_with_lora - pred_concept
-        loss = F.mse_loss(lora_delta, target_delta)
-    finally:
-        remove_lora_hooks(handles)
-    return loss, {
-        "timestep": timestep.item(),
-        "sigma": sigma.item(),
-        "target_norm": target_delta.norm().item(),
-        "lora_norm": lora_delta.norm().item()
-    }
-def compute_preservation_loss(
-    unet: torch.nn.Module,
-    lora_state: dict,
-    preservation: PreservationSet,
-    tokenizer,
-    text_encoder,
-    config: LECOConfig,
-    device: str = "cuda"
-):
-    """Penalize LoRA changes to preservation prompts"""
-    if not preservation.prompts:
-        return 0.0, {}
     min_sigma = config.min_timestep / 1000.0
     max_sigma = config.max_timestep / 1000.0
     sigma = min_sigma + torch.rand(1, device=device) * (max_sigma - min_sigma)
     sigma = (config.shift * sigma) / (1 + (config.shift - 1) * sigma)
     timestep = sigma * 1000.0
-    sigma = sigma.view(1, 1, 1, 1)
     total_loss = 0
-    for prompt in preservation.prompts:
         noise = torch.randn(1, 4, config.resolution // 8, config.resolution // 8, device=device)
-        noisy_input = sigma * noise
-        prompt_emb = encode_text(prompt, tokenizer, text_encoder, device)
         with torch.no_grad():
-            pred_base = unet(
                 noisy_input, timestep,
-                encoder_hidden_states=prompt_emb,
                 return_dict=False
             )[0]
         handles = apply_lora_hooks(unet, lora_state, scale=1.0)
         try:
             pred_with_lora = unet(
                 noisy_input, timestep,
-                encoder_hidden_states=prompt_emb,
                 return_dict=False
             )[0]
         finally:
             remove_lora_hooks(handles)
-        total_loss += F.mse_loss(pred_with_lora, pred_base)
-    avg_loss = total_loss / len(preservation.prompts)
-    return avg_loss, {"count": len(preservation.prompts), "avg": avg_loss.item()}
 def train_leco(config: LECOConfig):
-    """Main training loop"""
     device = "cuda"
     torch.manual_seed(config.seed)
-    if not config.concept_pairs:
-        raise ValueError("No concept pairs specified!")
     # Setup output
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
-    concept_names = "_".join([
-        p.concept.replace(" ", "")[:12]
-        for p in config.concept_pairs[:2]
-    ])
-    if len(config.concept_pairs) > 2:
-        concept_names += f"_plus{len(config.concept_pairs)-2}"
-    run_name = f"{config.action.value}_{concept_names}_{timestamp}"
     output_dir = os.path.join(config.output_dir, run_name)
     os.makedirs(output_dir, exist_ok=True)
@@ -398,30 +439,39 @@ def train_leco(config: LECOConfig):
     text_encoder.eval()
     print("✓ Loaded CLIP")
-    # Create LoRA
     print(f"\nInjecting LoRA (rank={config.lora_rank}, alpha={config.lora_alpha})...")
     lora_state, trainable_params = create_lora_layers(unet, config)
-    for key in lora_state:
-        if isinstance(lora_state[key], torch.Tensor):
-            lora_state[key] = lora_state[key].to(device)
     optimizer = torch.optim.AdamW(trainable_params, lr=config.lr, weight_decay=0.01)
     # Print config
     print(f"\nTraining Configuration:")
     print(f"  Action: {config.action.value}")
-    print(f"  Concept pairs: {len(config.concept_pairs)}")
-    for i, pair in enumerate(config.concept_pairs, 1):
-        anchor_str = f"→ '{pair.anchor}'" if pair.anchor else "(none)"
-        print(f"    {i}. '{pair.concept}' {anchor_str} (weight: {pair.weight})")
-    if config.preservation.prompts:
-        print(f"  Preservation: {len(config.preservation.prompts)} prompts")
     print(f"\n  Iterations: {config.iterations}")
     print(f"  Learning rate: {config.lr}")
     print(f"  Training method: {config.training_method}")
     print("="*80 + "\n")
     # Training loop
@@ -429,50 +479,37 @@ def train_leco(config: LECOConfig):
     for step in progress:
         import random
-        if config.pairs_per_step >= len(config.concept_pairs):
-            active_pairs = config.concept_pairs
-        else:
-            active_pairs = random.sample(config.concept_pairs, config.pairs_per_step)
-        total_loss = 0
-        all_metrics = []
-        for pair in active_pairs:
-            loss, metrics = compute_leco_loss(
-                unet, lora_state, pair,
-                tokenizer, text_encoder, config, device
-            )
-            total_loss += loss * pair.weight
-            all_metrics.append(metrics)
-        if config.preservation.prompts:
-            pres_loss, pres_metrics = compute_preservation_loss(
-                unet, lora_state, config.preservation,
-                tokenizer, text_encoder, config, device
-            )
-            total_loss += pres_loss * config.preservation.weight
-        else:
-            pres_loss = 0
-        total_loss.backward()
         grad_norm = torch.nn.utils.clip_grad_norm_(trainable_params, max_norm=1.0)
         optimizer.step()
         optimizer.zero_grad()
         # Logging
-        writer.add_scalar("loss/total", total_loss.item(), step)
-        writer.add_scalar("loss/preservation", pres_loss if isinstance(pres_loss, (float, int)) else pres_loss.item(), step)
         writer.add_scalar("grad_norm", grad_norm.item(), step)
-        avg_target = sum(m["target_norm"] for m in all_metrics) / len(all_metrics)
         progress.set_postfix({
-            "loss": f"{total_loss.item():.4f}",
-            "grad": f"{grad_norm.item():.3f}",
-            "target": f"{avg_target:.3f}"
         })
         if (step + 1) % 200 == 0 or step == config.iterations - 1:
-            save_checkpoint(lora_state, config, output_dir, step + 1, concept_names)
     writer.close()
@@ -492,8 +529,15 @@ def save_checkpoint(lora_state, config, output_dir, step, name_suffix):
         if isinstance(value, torch.Tensor) and not key.endswith("._module"):
             save_dict[key] = value.detach().cpu()
-    concepts_str = ", ".join([p.concept for p in config.concept_pairs])
-    anchors_str = ", ".join([p.anchor for p in config.concept_pairs if p.anchor])
     metadata = {
         "ss_network_module": "networks.lora",
@@ -502,9 +546,11 @@ def save_checkpoint(lora_state, config, output_dir, step, name_suffix):
         "ss_base_model": "runwayml/stable-diffusion-v1-5",
         "ss_training_method": config.training_method,
         "leco_action": config.action.value,
-        "leco_concepts": concepts_str,
-        "leco_anchors": anchors_str,
-        "leco_step": str(step)
     }
     filename = f"leco_{name_suffix}_r{config.lora_rank}_s{step}.safetensors"
@@ -514,17 +560,81 @@ def save_checkpoint(lora_state, config, output_dir, step, name_suffix):
     print(f"\n✓ Saved: {filename}")
 if __name__ == "__main__":
-    config = LECOConfig(
-        action=ActionType.ENHANCE,
-        concept_pairs=[
-            ConceptPair("masterpiece", "", weight=1.0),
-            ConceptPair("best quality", "", weight=1.0),
-            ConceptPair("highly detailed", "", weight=0.8),
         ],
-        iterations=600,
         lora_rank=4,
-        training_method="selfattn"
     )
-    train_leco(config)

 """
+Lune LECO Trainer - Proper Concept Group Implementation
 """
 import os
 class ActionType(str, Enum):
     """LECO action types"""
+    ERASE = "erase"           # sources → empty
+    ENHANCE = "enhance"       # sources → amplified
+    REPLACE = "replace"       # sources → target
+    NEUTRALIZE = "neutralize" # sources → neutral
 @dataclass
+class ConceptGroup:
     """
+    A group of related concepts to transform together.
+    Training strategy:
+    - Sample from sources: these are the concepts to modify
+    - Transform to target: what they should become
+    - Use neutral as intermediate: optional neutral reference point
+    - Preserve negatives: concepts that should NOT be affected
     Examples:
+        # Erase multiple anime styles
+        ConceptGroup(
+            sources=["anime", "manga", "cartoon"],
+            target="",
+            negatives=["realistic", "photograph"],
+            weight=1.0
+        )
+        # Replace artists
+        ConceptGroup(
+            sources=["van gogh", "picasso"],
+            target="monet",
+            neutral="painting",
+            negatives=["photograph", "digital art"],
+            weight=1.0
+        )
+        # Neutralize NSFW to safe
+        ConceptGroup(
+            sources=["nsfw", "nude", "explicit"],
+            target="safe",
+            neutral="person",
+            negatives=["portrait", "art", "figure drawing"],
+            weight=2.0
+        )
     """
+    sources: List[str]                    # Concepts to modify (sampled during training)
+    target: str = ""                      # What to transform to (empty = erase)
+    neutral: str = ""                     # Optional neutral reference point
+    negatives: List[str] = field(default_factory=list)  # Concepts to preserve
+    weight: float = 1.0                   # Group importance
+    preservation_weight: float = 0.5      # How strongly to preserve negatives
 @dataclass
     base_model_repo: str = "AbstractPhil/sd15-flow-lune-flux"
     base_checkpoint: str = "sd15_flow_flux_t2_6_pose_t4_6_port_t1_4_s18765.pt"
+    # HuggingFace upload
     hf_repo_id: str = "AbstractPhil/lune-leco-adapters"
     upload_to_hub: bool = False
     # Training data
     action: ActionType = ActionType.ERASE
+    concept_groups: List[ConceptGroup] = field(default_factory=list)
     # LoRA architecture
     lora_rank: int = 4
     lora_alpha: float = 1.0
     lora_dropout: float = 0.0
+    training_method: Literal["full", "selfattn", "xattn", "noxattn", "innoxattn"] = "xattn"
+    # Training hyperparameters
     seed: int = 42
     iterations: int = 1000
     lr: float = 1e-4
+    # Sampling strategy
+    sources_per_step: int = 2  # How many source concepts to sample per step
+    # Flow-matching parameters
     shift: float = 2.5
     min_timestep: float = 0.0
     max_timestep: float = 1000.0
 def get_target_modules(training_method: str) -> List[str]:
+    """Get layer names to inject LoRA based on training method."""
     attn1 = ["attn1.to_q", "attn1.to_k", "attn1.to_v", "attn1.to_out.0"]
     attn2 = ["attn2.to_q", "attn2.to_k", "attn2.to_v", "attn2.to_out.0"]
 def create_lora_layers(unet: torch.nn.Module, config: LECOConfig):
+    """Create LoRA layers in ComfyUI/A1111 compatible format."""
     target_modules = get_target_modules(config.training_method)
     lora_state = {}
     trainable_params = []
     def get_lora_key(module_path: str) -> str:
         return f"lora_unet_{module_path.replace('.', '_')}"
     for name, module in unet.named_modules():
         if not any(target in name for target in target_modules):
             continue
         out_dim = module.out_features
         rank = config.lora_rank
         lora_down = torch.nn.Parameter(torch.zeros(rank, in_dim))
         lora_up = torch.nn.Parameter(torch.zeros(out_dim, rank))
         lora_state[f"{lora_key}._module"] = module
         trainable_params.extend([lora_down, lora_up])
+    print(f"✓ Created {len(trainable_params)//2} LoRA layers ({len(trainable_params)} parameters)")
     return lora_state, trainable_params
 def apply_lora_hooks(unet: torch.nn.Module, lora_state: dict, scale: float = 1.0) -> list:
+    """Apply LoRA using forward hooks."""
     handles = []
     for key in lora_state:
         def make_hook(down, up, s):
             def forward_hook(mod, inp, out):
                 x = inp[0]
                 lora_out = F.linear(F.linear(x, down), up)
                 return out + lora_out * s
             return forward_hook
     return text_encoder(tokens)[0]
+def compute_concept_group_loss(
     unet: torch.nn.Module,
     lora_state: dict,
+    group: ConceptGroup,
     tokenizer,
     text_encoder,
     config: LECOConfig,
     device: str = "cuda"
 ):
     """
+    Compute LECO loss for a concept group.
+    Strategy:
+    1. Sample source concepts from group.sources
+    2. Compute transformation: source → target (using neutral if provided)
+    3. Preserve negatives (ensure LoRA doesn't affect them)
+    The LoRA learns to transform ALL sources to the same target.
+    """
+    import random
+    # Sample source concepts for this step
+    num_sources = min(config.sources_per_step, len(group.sources))
+    sampled_sources = random.sample(group.sources, num_sources)
+    # Sample timestep (shared for this group)
     min_sigma = config.min_timestep / 1000.0
     max_sigma = config.max_timestep / 1000.0
     sigma = min_sigma + torch.rand(1, device=device) * (max_sigma - min_sigma)
     sigma = (config.shift * sigma) / (1 + (config.shift - 1) * sigma)
     timestep = sigma * 1000.0
+    sigma_expanded = sigma.view(1, 1, 1, 1)
     total_loss = 0
+    metrics = {
+        "source_loss": 0,
+        "preservation_loss": 0,
+        "sources_processed": 0,
+        "negatives_processed": 0
+    }
+    # === SOURCE TRANSFORMATION LOSS ===
+    for source_concept in sampled_sources:
         noise = torch.randn(1, 4, config.resolution // 8, config.resolution // 8, device=device)
+        noisy_input = sigma_expanded * noise
+        # Encode prompts
+        source_emb = encode_text(source_concept, tokenizer, text_encoder, device)
+        target_emb = encode_text(group.target, tokenizer, text_encoder, device)
+        # Optional: use neutral as intermediate reference
+        if group.neutral:
+            neutral_emb = encode_text(group.neutral, tokenizer, text_encoder, device)
+        else:
+            neutral_emb = None
+        # Compute target direction WITHOUT LoRA
         with torch.no_grad():
+            pred_source = unet(
+                noisy_input, timestep,
+                encoder_hidden_states=source_emb,
+                return_dict=False
+            )[0]
+            pred_target = unet(
                 noisy_input, timestep,
+                encoder_hidden_states=target_emb,
                 return_dict=False
             )[0]
+            # Determine transformation direction
+            if group.neutral and neutral_emb is not None:
+                # Use neutral as reference: source → neutral → target
+                pred_neutral = unet(
+                    noisy_input, timestep,
+                    encoder_hidden_states=neutral_emb,
+                    return_dict=False
+                )[0]
+                # Two-step transformation
+                step1 = pred_neutral - pred_source  # source → neutral
+                step2 = pred_target - pred_neutral   # neutral → target
+                target_delta = step1 + step2         # combined transformation
+            else:
+                # Direct transformation: source → target
+                target_delta = pred_target - pred_source
+        # Apply LoRA and measure its effect
         handles = apply_lora_hooks(unet, lora_state, scale=1.0)
         try:
             pred_with_lora = unet(
                 noisy_input, timestep,
+                encoder_hidden_states=source_emb,
                 return_dict=False
             )[0]
         finally:
             remove_lora_hooks(handles)
+        # LoRA contribution
+        lora_delta = pred_with_lora - pred_source
+        # Loss: LoRA should reproduce the transformation
+        source_loss = F.mse_loss(lora_delta, target_delta)
+        total_loss += source_loss * group.weight
+        metrics["source_loss"] += source_loss.item()
+        metrics["sources_processed"] += 1
+    # === PRESERVATION LOSS (negatives should remain unchanged) ===
+    for negative_concept in group.negatives:
+        noise = torch.randn(1, 4, config.resolution // 8, config.resolution // 8, device=device)
+        noisy_input = sigma_expanded * noise
+        negative_emb = encode_text(negative_concept, tokenizer, text_encoder, device)
+        # Baseline without LoRA
+        with torch.no_grad():
+            pred_negative = unet(
+                noisy_input, timestep,
+                encoder_hidden_states=negative_emb,
+                return_dict=False
+            )[0]
+        # With LoRA
+        handles = apply_lora_hooks(unet, lora_state, scale=1.0)
+        try:
+            pred_with_lora = unet(
+                noisy_input, timestep,
+                encoder_hidden_states=negative_emb,
+                return_dict=False
+            )[0]
+        finally:
+            remove_lora_hooks(handles)
+        # Penalize any change
+        preservation_loss = F.mse_loss(pred_with_lora, pred_negative)
+        total_loss += preservation_loss * group.preservation_weight
+        metrics["preservation_loss"] += preservation_loss.item()
+        metrics["negatives_processed"] += 1
+    # Average metrics
+    if metrics["sources_processed"] > 0:
+        metrics["source_loss"] /= metrics["sources_processed"]
+    if metrics["negatives_processed"] > 0:
+        metrics["preservation_loss"] /= metrics["negatives_processed"]
+    metrics["timestep"] = timestep.item()
+    metrics["sigma"] = sigma.item()
+    return total_loss, metrics
 def train_leco(config: LECOConfig):
+    """Main training loop with proper concept groups"""
     device = "cuda"
     torch.manual_seed(config.seed)
+    if not config.concept_groups:
+        raise ValueError("No concept groups specified!")
+    # Validate concept groups
+    for group in config.concept_groups:
+        if not group.sources:
+            raise ValueError("Each concept group must have at least one source concept")
     # Setup output
     timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
+    # Create name from first group
+    first_group = config.concept_groups[0]
+    source_names = "_".join([s.replace(" ", "")[:10] for s in first_group.sources[:2]])
+    if len(first_group.sources) > 2:
+        source_names += f"_plus{len(first_group.sources)-2}"
+    run_name = f"{config.action.value}_{source_names}_{timestamp}"
     output_dir = os.path.join(config.output_dir, run_name)
     os.makedirs(output_dir, exist_ok=True)
     text_encoder.eval()
     print("✓ Loaded CLIP")
+    # Create LoRA layers
     print(f"\nInjecting LoRA (rank={config.lora_rank}, alpha={config.lora_alpha})...")
     lora_state, trainable_params = create_lora_layers(unet, config)
+    # Move Parameters to device IN-PLACE
+    print(f"Moving LoRA parameters to {device}...")
+    for param in trainable_params:
+        param.data = param.data.to(device)
+    # Move other tensors to device
+    for key, value in lora_state.items():
+        if isinstance(value, torch.Tensor) and not isinstance(value, torch.nn.Parameter):
+            lora_state[key] = value.to(device)
     optimizer = torch.optim.AdamW(trainable_params, lr=config.lr, weight_decay=0.01)
     # Print config
     print(f"\nTraining Configuration:")
     print(f"  Action: {config.action.value}")
+    print(f"  Concept groups: {len(config.concept_groups)}")
+    for i, group in enumerate(config.concept_groups, 1):
+        print(f"\n  Group {i} (weight: {group.weight}):")
+        print(f"    Sources: {', '.join(group.sources)}")
+        print(f"    Target: '{group.target}'" if group.target else "    Target: (erase)")
+        if group.neutral:
+            print(f"    Neutral: '{group.neutral}'")
+        if group.negatives:
+            print(f"    Preserve: {', '.join(group.negatives)}")
     print(f"\n  Iterations: {config.iterations}")
     print(f"  Learning rate: {config.lr}")
     print(f"  Training method: {config.training_method}")
+    print(f"  Sources per step: {config.sources_per_step}")
     print("="*80 + "\n")
     # Training loop
     for step in progress:
         import random
+        # Sample a concept group
+        group = random.choice(config.concept_groups)
+        # Compute loss for this group
+        loss, metrics = compute_concept_group_loss(
+            unet, lora_state, group,
+            tokenizer, text_encoder, config, device
+        )
+        # Backprop
+        loss.backward()
         grad_norm = torch.nn.utils.clip_grad_norm_(trainable_params, max_norm=1.0)
         optimizer.step()
         optimizer.zero_grad()
         # Logging
+        writer.add_scalar("loss/total", loss.item(), step)
+        writer.add_scalar("loss/source", metrics["source_loss"], step)
+        writer.add_scalar("loss/preservation", metrics["preservation_loss"], step)
         writer.add_scalar("grad_norm", grad_norm.item(), step)
         progress.set_postfix({
+            "loss": f"{loss.item():.4f}",
+            "src": f"{metrics['source_loss']:.4f}",
+            "pres": f"{metrics['preservation_loss']:.4f}",
+            "grad": f"{grad_norm.item():.3f}"
         })
         if (step + 1) % 200 == 0 or step == config.iterations - 1:
+            save_checkpoint(lora_state, config, output_dir, step + 1, source_names)
     writer.close()
         if isinstance(value, torch.Tensor) and not key.endswith("._module"):
             save_dict[key] = value.detach().cpu()
+    # Build metadata
+    all_sources = []
+    all_targets = []
+    all_negatives = []
+    for group in config.concept_groups:
+        all_sources.extend(group.sources)
+        if group.target:
+            all_targets.append(group.target)
+        all_negatives.extend(group.negatives)
     metadata = {
         "ss_network_module": "networks.lora",
         "ss_base_model": "runwayml/stable-diffusion-v1-5",
         "ss_training_method": config.training_method,
         "leco_action": config.action.value,
+        "leco_sources": ", ".join(all_sources),
+        "leco_targets": ", ".join(all_targets) if all_targets else "",
+        "leco_negatives": ", ".join(all_negatives),
+        "leco_step": str(step),
+        "leco_num_groups": str(len(config.concept_groups))
     }
     filename = f"leco_{name_suffix}_r{config.lora_rank}_s{step}.safetensors"
     print(f"\n✓ Saved: {filename}")
+# ============================================================================
+# EXAMPLE CONFIGURATIONS
+# ============================================================================
 if __name__ == "__main__":
+    # Example 1: Erase anime styles (multiple sources → empty)
+    config_erase_anime = LECOConfig(
+        action=ActionType.ERASE,
+        concept_groups=[
+            ConceptGroup(
+                sources=["anime", "manga", "cartoon"],
+                target="",  # Erase
+                negatives=["realistic", "photograph", "painting"],
+                weight=1.0
+            )
+        ],
+        iterations=1000,
+        lora_rank=4,
+        training_method="xattn"  # Cross-attention for semantic content
+    )
+    # Example 2: Replace artists (multiple sources → single target)
+    config_replace_artists = LECOConfig(
+        action=ActionType.REPLACE,
+        concept_groups=[
+            ConceptGroup(
+                sources=["van gogh", "picasso", "dali"],
+                target="monet",
+                neutral="painting",  # Use painting as neutral reference
+                negatives=["photograph", "digital art"],
+                weight=1.0
+            )
+        ],
+        iterations=800,
+        lora_rank=8,
+        training_method="xattn"
+    )
+    # Example 3: Neutralize NSFW (multiple sources → safe target)
+    config_nsfw = LECOConfig(
+        action=ActionType.NEUTRALIZE,
+        concept_groups=[
+            ConceptGroup(
+                sources=["nsfw", "nude", "explicit", "naked"],
+                target="clothed",
+                neutral="person",
+                negatives=["portrait", "figure drawing", "classical art", "sculpture"],
+                weight=2.0,
+                preservation_weight=0.8  # Strong preservation
+            )
+        ],
+        iterations=1200,
+        lora_rank=4,
+        training_method="full"
+    )
+    # Example 4: Your original request - weird food combos
+    config_food = LECOConfig(
+        action=ActionType.ERASE,
+        concept_groups=[
+            ConceptGroup(
+                sources=["potato chicken sandwich", "taco pizza", "banana sushi"],
+                target="",
+                neutral="food",
+                negatives=["normal sandwiches", "table", "walls", "plates", "restaurant"],
+                weight=1.0,
+                preservation_weight=1.5
+            )
         ],
+        iterations=1000,
         lora_rank=4,
+        training_method="xattn",
+        sources_per_step=2  # Sample 2 weird foods per training step
     )
+    # Train
+    train_leco(config_erase_anime)