Spaces:

AbstractPhil
/

lyra-xl-playground

Running on Zero

App Files Files Community

AbstractPhil commited on 19 days ago

Commit

dda3cf4

verified ·

1 Parent(s): a7aafe6

Update app.py

Browse files

Files changed (1) hide show

app.py +1062 -325

app.py CHANGED Viewed

@@ -9,6 +9,11 @@ Supports Illustrious XL, standard SDXL, and SD1.5 variants.
 Lyra VAE Versions:
 - v1: SD1.5 (768 dim CLIP + T5-base) - geofractal.model.vae.vae_lyra
 - v2: SDXL/Illustrious (768 CLIP-L + 1280 CLIP-G + 2048 T5-XL) - geofractal.model.vae.vae_lyra_v2
 """
 import os
@@ -17,7 +22,7 @@ import torch
 import gradio as gr
 import numpy as np
 from PIL import Image
-from typing import Optional, Dict, Tuple
 import spaces
 from safetensors.torch import load_file as load_safetensors
@@ -38,35 +43,29 @@ from transformers import (
 )
 from huggingface_hub import hf_hub_download
-# Lazy imports for Lyra
-LYRA_V1_AVAILABLE = False
-LYRA_V2_AVAILABLE = False
-LyraV1 = None
-LyraV1Config = None
-LyraV2 = None
-LyraV2Config = None
-def _load_lyra_imports():
-    """Lazy load Lyra VAE modules."""
-    global LYRA_V1_AVAILABLE, LYRA_V2_AVAILABLE
-    global LyraV1, LyraV1Config, LyraV2, LyraV2Config
-    try:
-        from geofractal.model.vae.vae_lyra import MultiModalVAE as _LyraV1, MultiModalVAEConfig as _LyraV1Config
-        LyraV1 = _LyraV1
-        LyraV1Config = _LyraV1Config
-        LYRA_V1_AVAILABLE = True
-    except ImportError:
-        print("⚠️ Lyra VAE v1 not available")
-    try:
-        from geofractal.model.vae.vae_lyra_v2 import MultiModalVAE as _LyraV2, MultiModalVAEConfig as _LyraV2Config
-        LyraV2 = _LyraV2
-        LyraV2Config = _LyraV2Config
-        LYRA_V2_AVAILABLE = True
-    except ImportError:
-        print("⚠️ Lyra VAE v2 not available")
 # ============================================================================
@@ -76,66 +75,32 @@ def _load_lyra_imports():
 ARCH_SD15 = "sd15"
 ARCH_SDXL = "sdxl"
-# Scheduler options
 SCHEDULER_EULER_A = "Euler Ancestral"
 SCHEDULER_EULER = "Euler"
 SCHEDULER_DPM_2M_SDE = "DPM++ 2M SDE"
 SCHEDULER_DPM_2M = "DPM++ 2M"
-SDXL_SCHEDULERS = [SCHEDULER_EULER_A, SCHEDULER_EULER, SCHEDULER_DPM_2M_SDE, SCHEDULER_DPM_2M]
-# ============================================================================
-# SCHEDULER FACTORY
-# ============================================================================
-def get_scheduler(scheduler_name: str, config_path: str = "stabilityai/stable-diffusion-xl-base-1.0"):
-    """Create scheduler by name."""
-    if scheduler_name == SCHEDULER_EULER_A:
-        return EulerAncestralDiscreteScheduler.from_pretrained(
-            config_path, subfolder="scheduler"
-        )
-    elif scheduler_name == SCHEDULER_EULER:
-        return EulerDiscreteScheduler.from_pretrained(
-            config_path, subfolder="scheduler"
-        )
-    elif scheduler_name == SCHEDULER_DPM_2M_SDE:
-        return DPMSolverSDEScheduler.from_pretrained(
-            config_path, subfolder="scheduler",
-            algorithm_type="sde-dpmsolver++",
-            solver_order=2,
-        )
-    elif scheduler_name == SCHEDULER_DPM_2M:
-        return DPMSolverMultistepScheduler.from_pretrained(
-            config_path, subfolder="scheduler",
-            algorithm_type="dpmsolver++",
-            solver_order=2,
-        )
-    else:
-        # Default to Euler Ancestral
-        return EulerAncestralDiscreteScheduler.from_pretrained(
-            config_path, subfolder="scheduler"
-        )
-# ============================================================================
-# MODEL LOADING UTILITIES
-# ============================================================================
-def get_clip_hidden_state(
-    model_output,
-    clip_skip: int = 1,
-    output_hidden_states: bool = True
-) -> torch.Tensor:
-    """Extract hidden state with clip_skip support."""
-    if clip_skip == 1 or not output_hidden_states:
-        return model_output.last_hidden_state
-    if hasattr(model_output, 'hidden_states') and model_output.hidden_states is not None:
-        return model_output.hidden_states[-clip_skip]
-    return model_output.last_hidden_state
 # ============================================================================
@@ -143,168 +108,281 @@ def get_clip_hidden_state(
 # ============================================================================
 class LazyT5Encoder:
-    """Lazy loader for T5 encoder - only loads when first accessed."""
-    def __init__(self, model_name: str = "google/flan-t5-xl", device: str = "cuda"):
         self.model_name = model_name
         self.device = device
         self._encoder = None
         self._tokenizer = None
     @property
-    def encoder(self):
         if self._encoder is None:
-            print(f"📥 Loading T5 encoder: {self.model_name}...")
             self._encoder = T5EncoderModel.from_pretrained(
                 self.model_name,
-                torch_dtype=torch.float16
             ).to(self.device)
             self._encoder.eval()
-            print("✓ T5 encoder loaded")
         return self._encoder
     @property
-    def tokenizer(self):
         if self._tokenizer is None:
             print(f"📥 Loading T5 tokenizer: {self.model_name}...")
             self._tokenizer = T5Tokenizer.from_pretrained(self.model_name)
             print("✓ T5 tokenizer loaded")
         return self._tokenizer
-    def is_loaded(self):
-        return self._encoder is not None
 class LazyLyraModel:
-    """Lazy loader for Lyra VAE - only loads when first accessed."""
-    def __init__(self, repo_id: str, device: str = "cuda", version: int = 2):
         self.repo_id = repo_id
         self.device = device
-        self.version = version
         self._model = None
     @property
     def model(self):
         if self._model is None:
-            _load_lyra_imports()
-            if self.version == 2:
-                self._model = self._load_v2()
             else:
-                self._model = self._load_v1()
         return self._model
-    def _load_v2(self):
         if not LYRA_V2_AVAILABLE:
-            print("⚠️ Lyra VAE v2 not available")
-            return None
-        print(f"🎵 Loading Lyra VAE v2 from {self.repo_id}...")
-        try:
-            from huggingface_hub import list_repo_files
-            config_path = hf_hub_download(
-                repo_id=self.repo_id,
-                filename="config.json",
-                repo_type="model"
-            )
-            with open(config_path, 'r') as f:
-                config_dict = json.load(f)
-            print(f"  ✓ Config: {config_dict.get('fusion_strategy', 'unknown')} fusion")
-            # Auto-detect checkpoint
-            repo_files = list_repo_files(self.repo_id, repo_type="model")
-            checkpoint_files = [f for f in repo_files if f.endswith('.pt')]
-            checkpoint_files = [f for f in checkpoint_files if 'checkpoint' in f.lower()]
-            if not checkpoint_files:
-                raise FileNotFoundError(f"No checkpoint found in {self.repo_id}")
-            import re
-            def extract_step(name):
-                match = re.search(r'(\d+)\.pt', name)
-                return int(match.group(1)) if match else 0
-            checkpoint_files.sort(key=extract_step, reverse=True)
-            checkpoint_filename = checkpoint_files[0]
-            print(f"  ✓ Using: {checkpoint_filename}")
-            checkpoint_path = hf_hub_download(
-                repo_id=self.repo_id,
-                filename=checkpoint_filename,
-                repo_type="model"
-            )
             checkpoint = torch.load(checkpoint_path, map_location="cpu")
-            vae_config = LyraV2Config(
-                modality_dims=config_dict.get('modality_dims', {
-                    "clip_l": 768, "clip_g": 1280,
-                    "t5_xl_l": 2048, "t5_xl_g": 2048
-                }),
-                modality_seq_lens=config_dict.get('modality_seq_lens', {
-                    "clip_l": 77, "clip_g": 77,
-                    "t5_xl_l": 512, "t5_xl_g": 512
-                }),
-                binding_config=config_dict.get('binding_config', {
-                    "clip_l": {"t5_xl_l": 0.3},
-                    "clip_g": {"t5_xl_g": 0.3},
-                    "t5_xl_l": {},
-                    "t5_xl_g": {}
-                }),
-                latent_dim=config_dict.get('latent_dim', 2048),
-                seq_len=config_dict.get('seq_len', 77),
-                encoder_layers=config_dict.get('encoder_layers', 3),
-                decoder_layers=config_dict.get('decoder_layers', 3),
-                hidden_dim=config_dict.get('hidden_dim', 2048),
-                dropout=config_dict.get('dropout', 0.1),
-                fusion_strategy=config_dict.get('fusion_strategy', 'adaptive_cantor'),
-                fusion_heads=config_dict.get('fusion_heads', 8),
-                fusion_dropout=config_dict.get('fusion_dropout', 0.1),
-                cantor_depth=config_dict.get('cantor_depth', 8),
-                cantor_local_window=config_dict.get('cantor_local_window', 3),
-                alpha_init=config_dict.get('alpha_init', 1.0),
-                beta_init=config_dict.get('beta_init', 0.3),
-            )
-            lyra_model = LyraV2(vae_config)
             state_dict = checkpoint.get('model_state_dict', checkpoint)
-            missing, unexpected = lyra_model.load_state_dict(state_dict, strict=False)
-            if missing:
-                print(f"  ⚠️ Missing keys: {len(missing)}")
-            if unexpected:
-                print(f"  ⚠️ Unexpected keys: {len(unexpected)}")
-            lyra_model.to(self.device)
-            lyra_model.eval()
-            total_params = sum(p.numel() for p in lyra_model.parameters())
-            print(f"✅ Lyra VAE v2 loaded ({total_params/1e6:.1f}M params)")
-            return lyra_model
-        except Exception as e:
-            print(f"❌ Failed to load Lyra VAE v2: {e}")
-            import traceback
-            traceback.print_exc()
-            return None
-    def _load_v1(self):
-        if not LYRA_V1_AVAILABLE:
-            print("⚠️ Lyra VAE v1 not available")
-            return None
-        # Similar implementation for v1...
-        return None
-    def is_loaded(self):
-        return self._model is not None
 # ============================================================================
@@ -312,7 +390,10 @@ class LazyLyraModel:
 # ============================================================================
 class SDXLFlowMatchingPipeline:
-    """Pipeline for SDXL-based flow-matching inference with dual CLIP encoders."""
     def __init__(
         self,
@@ -337,7 +418,7 @@ class SDXLFlowMatchingPipeline:
         self.scheduler = scheduler
         self.device = device
-        # Lazy loaders
         self.t5_loader = t5_loader
         self.lyra_loader = lyra_loader
@@ -345,23 +426,41 @@ class SDXLFlowMatchingPipeline:
         self.clip_skip = clip_skip
         self.vae_scale_factor = 0.13025
         self.arch = ARCH_SDXL
     def set_scheduler(self, scheduler_name: str):
-        """Switch scheduler."""
-        self.scheduler = get_scheduler(scheduler_name)
     @property
-    def t5_encoder(self):
         return self.t5_loader.encoder if self.t5_loader else None
     @property
-    def t5_tokenizer(self):
         return self.t5_loader.tokenizer if self.t5_loader else None
     @property
     def lyra_model(self):
         return self.lyra_loader.model if self.lyra_loader else None
     def encode_prompt(
         self,
         prompt: str,
@@ -406,6 +505,7 @@ class SDXLFlowMatchingPipeline:
             prompt_embeds_g = get_clip_hidden_state(clip_g_output, clip_skip, output_hidden_states)
             pooled_prompt_embeds = clip_g_output.text_embeds
         prompt_embeds = torch.cat([prompt_embeds_l, prompt_embeds_g], dim=-1)
         # Negative prompt
@@ -457,17 +557,24 @@ class SDXLFlowMatchingPipeline:
         t5_summary: str = "",
         lyra_strength: float = 0.3
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
-        """Encode prompts using Lyra VAE v2 fusion (CLIP + T5)."""
-        if self.lyra_model is None or self.t5_encoder is None:
-            raise ValueError("Lyra VAE components not initialized")
         # Get standard CLIP embeddings first
         prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
             prompt, negative_prompt, clip_skip
         )
-        # Format T5 input
         SUMMARY_SEPARATOR = "¶"
         if t5_summary.strip():
             t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {t5_summary}"
@@ -475,7 +582,7 @@ class SDXLFlowMatchingPipeline:
             t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {prompt}"
         # Get T5 embeddings
-        t5_inputs = self.t5_tokenizer(
             t5_prompt,
             max_length=512,
             padding='max_length',
@@ -484,9 +591,11 @@ class SDXLFlowMatchingPipeline:
         ).to(self.device)
         with torch.no_grad():
-            t5_embeds = self.t5_encoder(**t5_inputs).last_hidden_state
         clip_l_dim = 768
         clip_l_embeds = prompt_embeds[..., :clip_l_dim]
         clip_g_embeds = prompt_embeds[..., clip_l_dim:]
@@ -497,7 +606,7 @@ class SDXLFlowMatchingPipeline:
                 't5_xl_l': t5_embeds.float(),
                 't5_xl_g': t5_embeds.float()
             }
-            reconstructions, mu, logvar, _ = self.lyra_model(
                 modality_inputs,
                 target_modalities=['clip_l', 'clip_g']
             )
@@ -505,7 +614,7 @@ class SDXLFlowMatchingPipeline:
             lyra_clip_l = reconstructions['clip_l'].to(prompt_embeds.dtype)
             lyra_clip_g = reconstructions['clip_g'].to(prompt_embeds.dtype)
-            # Normalize if stats are off
             clip_l_std_ratio = lyra_clip_l.std() / (clip_l_embeds.std() + 1e-8)
             clip_g_std_ratio = lyra_clip_g.std() / (clip_g_embeds.std() + 1e-8)
@@ -517,14 +626,60 @@ class SDXLFlowMatchingPipeline:
                 lyra_clip_g = (lyra_clip_g - lyra_clip_g.mean()) / (lyra_clip_g.std() + 1e-8)
                 lyra_clip_g = lyra_clip_g * clip_g_embeds.std() + clip_g_embeds.mean()
-        # Blend
         fused_clip_l = (1 - lyra_strength) * clip_l_embeds + lyra_strength * lyra_clip_l
         fused_clip_g = (1 - lyra_strength) * clip_g_embeds + lyra_strength * lyra_clip_g
         prompt_embeds_fused = torch.cat([fused_clip_l, fused_clip_g], dim=-1)
-        # Negative prompt - just use original CLIP
-        return prompt_embeds_fused, negative_prompt_embeds, pooled, negative_pooled
     def _get_add_time_ids(
         self,
@@ -545,11 +700,14 @@ class SDXLFlowMatchingPipeline:
         negative_prompt: str = "",
         height: int = 1024,
         width: int = 1024,
-        num_inference_steps: int = 25,
-        guidance_scale: float = 7.0,
         seed: Optional[int] = None,
         use_lyra: bool = False,
-        clip_skip: int = 2,
         t5_summary: str = "",
         lyra_strength: float = 1.0,
         progress_callback=None
@@ -561,8 +719,8 @@ class SDXLFlowMatchingPipeline:
         else:
             generator = None
-        # Encode prompts
-        if use_lyra and self.lyra_loader is not None:
             prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt_lyra(
                 prompt, negative_prompt, clip_skip, t5_summary, lyra_strength
             )
@@ -587,9 +745,10 @@ class SDXLFlowMatchingPipeline:
         self.scheduler.set_timesteps(num_inference_steps, device=self.device)
         timesteps = self.scheduler.timesteps
-        latents = latents * self.scheduler.init_noise_sigma
-        # Time embeddings for SDXL
         original_size = (height, width)
         target_size = (height, width)
         crops_coords_top_left = (0, 0)
@@ -605,7 +764,14 @@ class SDXLFlowMatchingPipeline:
                 progress_callback(i, num_inference_steps, f"Step {i+1}/{num_inference_steps}")
             latent_model_input = torch.cat([latents] * 2) if guidance_scale > 1.0 else latents
-            latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
             timestep = t.expand(latent_model_input.shape[0])
@@ -635,7 +801,22 @@ class SDXLFlowMatchingPipeline:
                 noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                 noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
-            latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
         # Decode
         latents = latents / self.vae_scale_factor
@@ -651,12 +832,310 @@ class SDXLFlowMatchingPipeline:
         return image
 # ============================================================================
 # MODEL LOADERS
 # ============================================================================
 def load_illustrious_xl(
-    repo_id: str = "AbstractPhil/illustrious-xl-v1",
     filename: str = "illustriousXL_v01.safetensors",
     device: str = "cuda"
 ) -> Tuple[UNet2DConditionModel, AutoencoderKL, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer, CLIPTokenizer]:
@@ -668,7 +1147,7 @@ def load_illustrious_xl(
     checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model")
     print(f"✓ Downloaded: {checkpoint_path}")
-    print("📦 Loading pipeline...")
     pipe = StableDiffusionXLPipeline.from_single_file(
         checkpoint_path,
         torch_dtype=torch.float16,
@@ -686,6 +1165,51 @@ def load_illustrious_xl(
     torch.cuda.empty_cache()
     print("✅ Illustrious XL loaded!")
     return unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2
@@ -694,60 +1218,111 @@ def load_illustrious_xl(
 # PIPELINE INITIALIZATION
 # ============================================================================
-def initialize_sdxl_pipeline(
-    model_choice: str,
-    scheduler_name: str = SCHEDULER_EULER_A,
-    device: str = "cuda"
-):
-    """Initialize SDXL pipeline with lazy T5/Lyra loading."""
     print(f"🚀 Initializing {model_choice} pipeline...")
-    # Load base model
-    if "Illustrious" in model_choice:
-        unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2 = load_illustrious_xl(device=device)
     else:
-        # SDXL Base
-        from diffusers import StableDiffusionXLPipeline
-        pipe = StableDiffusionXLPipeline.from_pretrained(
-            "stabilityai/stable-diffusion-xl-base-1.0",
-            torch_dtype=torch.float16,
         )
-        unet = pipe.unet.to(device)
-        vae = pipe.vae.to(device)
-        text_encoder = pipe.text_encoder.to(device)
-        text_encoder_2 = pipe.text_encoder_2.to(device)
-        tokenizer = pipe.tokenizer
-        tokenizer_2 = pipe.tokenizer_2
-        del pipe
-        torch.cuda.empty_cache()
-    # Create lazy loaders (don't download yet)
-    t5_loader = LazyT5Encoder(model_name="google/flan-t5-xl", device=device)
-    lyra_loader = LazyLyraModel(
-        repo_id="AbstractPhil/vae-lyra-xl-adaptive-cantor-illustrious",
-        device=device,
-        version=2
-    )
-    # Get scheduler
-    scheduler = get_scheduler(scheduler_name)
-    pipeline = SDXLFlowMatchingPipeline(
-        vae=vae,
-        text_encoder=text_encoder,
-        text_encoder_2=text_encoder_2,
-        tokenizer=tokenizer,
-        tokenizer_2=tokenizer_2,
-        unet=unet,
-        scheduler=scheduler,
-        device=device,
-        t5_loader=t5_loader,
-        lyra_loader=lyra_loader,
-        clip_skip=2
-    )
-    print("✅ Pipeline initialized (T5/Lyra will load on first use)")
     return pipeline
@@ -757,20 +1332,15 @@ def initialize_sdxl_pipeline(
 CURRENT_PIPELINE = None
 CURRENT_MODEL = None
-CURRENT_SCHEDULER = None
-def get_pipeline(model_choice: str, scheduler_name: str = SCHEDULER_EULER_A):
     """Get or create pipeline for selected model."""
-    global CURRENT_PIPELINE, CURRENT_MODEL, CURRENT_SCHEDULER
     if CURRENT_PIPELINE is None or CURRENT_MODEL != model_choice:
-        CURRENT_PIPELINE = initialize_sdxl_pipeline(model_choice, scheduler_name, device="cuda")
         CURRENT_MODEL = model_choice
-        CURRENT_SCHEDULER = scheduler_name
-    elif CURRENT_SCHEDULER != scheduler_name:
-        CURRENT_PIPELINE.set_scheduler(scheduler_name)
-        CURRENT_SCHEDULER = scheduler_name
     return CURRENT_PIPELINE
@@ -779,18 +1349,36 @@ def get_pipeline(model_choice: str, scheduler_name: str = SCHEDULER_EULER_A):
 # INFERENCE
 # ============================================================================
-@spaces.GPU(duration=120)
 def generate_image(
     prompt: str,
     t5_summary: str,
     negative_prompt: str,
     model_choice: str,
-    scheduler_name: str,
     clip_skip: int,
     num_steps: int,
     cfg_scale: float,
     width: int,
     height: int,
     use_lyra: bool,
     lyra_strength: float,
     seed: int,
@@ -806,9 +1394,18 @@ def generate_image(
         progress((step + 1) / total, desc=desc)
     try:
-        pipeline = get_pipeline(model_choice, scheduler_name)
-        if not use_lyra or pipeline.lyra_loader is None:
             progress(0.05, desc="Generating...")
             image = pipeline(
@@ -818,6 +1415,9 @@ def generate_image(
                 width=width,
                 num_inference_steps=num_steps,
                 guidance_scale=cfg_scale,
                 seed=seed,
                 use_lyra=False,
                 clip_skip=clip_skip,
@@ -828,6 +1428,7 @@ def generate_image(
             return image, None, seed
         else:
             progress(0.05, desc="Generating standard...")
             image_standard = pipeline(
@@ -837,13 +1438,16 @@ def generate_image(
                 width=width,
                 num_inference_steps=num_steps,
                 guidance_scale=cfg_scale,
                 seed=seed,
                 use_lyra=False,
                 clip_skip=clip_skip,
                 progress_callback=lambda s, t, d: progress(0.05 + (s/t) * 0.45, desc=d)
             )
-            progress(0.5, desc="Loading Lyra + T5 (first run only)...")
             image_lyra = pipeline(
                 prompt=prompt,
@@ -852,6 +1456,9 @@ def generate_image(
                 width=width,
                 num_inference_steps=num_steps,
                 guidance_scale=cfg_scale,
                 seed=seed,
                 use_lyra=True,
                 clip_skip=clip_skip,
@@ -879,93 +1486,217 @@ def create_demo():
     with gr.Blocks() as demo:
         gr.Markdown("""
-        # 🌙 Lyra/Illustrious XL Image Generation
         **Geometric crystalline diffusion** by [AbstractPhil](https://huggingface.co/AbstractPhil)
         | Model | Architecture | Lyra Version | Best For |
         |-------|-------------|--------------|----------|
         | **Illustrious XL** | SDXL | v2 (T5-XL) | Anime/illustration, high detail |
         | **SDXL Base** | SDXL | v2 (T5-XL) | Photorealistic, general purpose |
-        **Lyra VAE** fuses CLIP + T5-XL embeddings using adaptive Cantor attention.
-        T5 and Lyra only load when you enable the Lyra checkbox!
         """)
         with gr.Row():
             with gr.Column(scale=1):
                 prompt = gr.TextArea(
-                    label="Prompt",
                     value="masterpiece, best quality, 1girl, blue hair, school uniform, cherry blossoms, detailed background",
                     lines=3
                 )
                 t5_summary = gr.TextArea(
-                    label="T5 Summary (for Lyra)",
-                    value="A beautiful anime girl with flowing blue hair wearing a school uniform, surrounded by delicate pink cherry blossoms",
                     lines=2,
-                    info="Natural language description for T5. Leave empty to use prompt."
                 )
                 negative_prompt = gr.TextArea(
                     label="Negative Prompt",
-                    value="lowres, bad anatomy, bad hands, text, error, worst quality, low quality",
                     lines=2
                 )
-                with gr.Row():
-                    model_choice = gr.Dropdown(
-                        label="Model",
-                        choices=["Illustrious XL", "SDXL Base"],
-                        value="Illustrious XL"
-                    )
-                    scheduler_name = gr.Dropdown(
-                        label="Scheduler",
-                        choices=SDXL_SCHEDULERS,
-                        value=SCHEDULER_EULER_A
-                    )
                 clip_skip = gr.Slider(
                     label="CLIP Skip",
-                    minimum=1, maximum=4, value=2, step=1,
-                    info="2 recommended for Illustrious"
                 )
                 use_lyra = gr.Checkbox(
-                    label="Enable Lyra VAE (loads T5-XL on first use)",
                     value=False,
-                    info="Compare standard vs geometric fusion"
                 )
                 lyra_strength = gr.Slider(
                     label="Lyra Blend Strength",
-                    minimum=0.0, maximum=2.0, value=1.0, step=0.05,
-                    info="0.0 = pure CLIP, 1.0 = pure Lyra"
                 )
                 with gr.Accordion("Generation Settings", open=True):
-                    num_steps = gr.Slider(label="Steps", minimum=1, maximum=50, value=25, step=1)
-                    cfg_scale = gr.Slider(label="CFG Scale", minimum=1.0, maximum=15.0, value=7.0, step=0.5)
                     with gr.Row():
-                        width = gr.Slider(label="Width", minimum=512, maximum=1536, value=1024, step=64)
-                        height = gr.Slider(label="Height", minimum=512, maximum=1536, value=1024, step=64)
-                    seed = gr.Slider(label="Seed", minimum=0, maximum=2**32 - 1, value=42, step=1)
-                    randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
                 generate_btn = gr.Button("🎨 Generate", variant="primary", size="lg")
             with gr.Column(scale=1):
                 with gr.Row():
-                    output_image_standard = gr.Image(label="Standard", type="pil")
-                    output_image_lyra = gr.Image(label="Lyra Fusion 🎵", type="pil", visible=True)
                 output_seed = gr.Number(label="Seed", precision=0)
         # Event handlers
         def on_lyra_toggle(enabled):
             if enabled:
                 return {
                     output_image_standard: gr.update(visible=True, label="Standard"),
@@ -977,6 +1708,12 @@ def create_demo():
                     output_image_lyra: gr.update(visible=False)
                 }
         use_lyra.change(
             fn=on_lyra_toggle,
             inputs=[use_lyra],
@@ -986,9 +1723,9 @@ def create_demo():
         generate_btn.click(
             fn=generate_image,
             inputs=[
-                prompt, t5_summary, negative_prompt, model_choice, scheduler_name,
-                clip_skip, num_steps, cfg_scale, width, height,
-                use_lyra, lyra_strength, seed, randomize_seed
             ],
             outputs=[output_image_standard, output_image_lyra, output_seed]
         )

 Lyra VAE Versions:
 - v1: SD1.5 (768 dim CLIP + T5-base) - geofractal.model.vae.vae_lyra
 - v2: SDXL/Illustrious (768 CLIP-L + 1280 CLIP-G + 2048 T5-XL) - geofractal.model.vae.vae_lyra_v2
+Features:
+- Lazy loading: T5 and Lyra only download when first used
+- Multiple schedulers: Euler Ancestral, Euler, DPM++ 2M SDE, DPM++ 2M
+- Integrated loader module for automatic version detection
 """
 import os
 import gradio as gr
 import numpy as np
 from PIL import Image
+from typing import Optional, Dict, Tuple, Union
 import spaces
 from safetensors.torch import load_file as load_safetensors
 )
 from huggingface_hub import hf_hub_download
+# Import Lyra VAE v1 (SD1.5) from geofractal
+try:
+    from geofractal.model.vae.vae_lyra import MultiModalVAE as LyraV1, MultiModalVAEConfig as LyraV1Config
+    LYRA_V1_AVAILABLE = True
+except ImportError:
+    print("⚠️ Lyra VAE v1 not available")
+    LYRA_V1_AVAILABLE = False
+# Import Lyra VAE v2 (SDXL/Illustrious) from geofractal
+try:
+    from geofractal.model.vae.vae_lyra_v2 import MultiModalVAE as LyraV2, MultiModalVAEConfig as LyraV2Config
+    LYRA_V2_AVAILABLE = True
+except ImportError:
+    print("⚠️ Lyra VAE v2 not available")
+    LYRA_V2_AVAILABLE = False
+# Import Lyra loader module
+try:
+    from geofractal.model.vae.load_lyra import load_vae_lyra, load_lyra_illustrious
+    LYRA_LOADER_AVAILABLE = True
+except ImportError:
+    print("⚠️ Lyra loader module not available, using fallback")
+    LYRA_LOADER_AVAILABLE = False
 # ============================================================================
 ARCH_SD15 = "sd15"
 ARCH_SDXL = "sdxl"
+# Scheduler names
 SCHEDULER_EULER_A = "Euler Ancestral"
 SCHEDULER_EULER = "Euler"
 SCHEDULER_DPM_2M_SDE = "DPM++ 2M SDE"
 SCHEDULER_DPM_2M = "DPM++ 2M"
+SCHEDULER_CHOICES = [
+    SCHEDULER_EULER_A,
+    SCHEDULER_EULER,
+    SCHEDULER_DPM_2M_SDE,
+    SCHEDULER_DPM_2M,
+]
+# ComfyUI key prefixes for SDXL single-file checkpoints
+COMFYUI_UNET_PREFIX = "model.diffusion_model."
+COMFYUI_CLIP_L_PREFIX = "conditioner.embedders.0.transformer."
+COMFYUI_CLIP_G_PREFIX = "conditioner.embedders.1.model."
+COMFYUI_VAE_PREFIX = "first_stage_model."
+# Lyra repos
+LYRA_ILLUSTRIOUS_REPO = "AbstractPhil/vae-lyra-xl-adaptive-cantor-illustrious"
+LYRA_SD15_REPO = "AbstractPhil/vae-lyra"
+# T5 model - use flan-t5-xl (what Lyra was trained on)
+T5_XL_MODEL = "google/flan-t5-xl"
+T5_BASE_MODEL = "google/flan-t5-base"
 # ============================================================================
 # ============================================================================
 class LazyT5Encoder:
+    """Lazy loader for T5 encoder - only downloads/loads when first accessed."""
+    def __init__(self, model_name: str = T5_XL_MODEL, device: str = "cuda", dtype=torch.float16):
         self.model_name = model_name
         self.device = device
+        self.dtype = dtype
         self._encoder = None
         self._tokenizer = None
+        self._loaded = False
     @property
+    def encoder(self) -> T5EncoderModel:
         if self._encoder is None:
+            print(f"📥 Lazy loading T5 encoder: {self.model_name}...")
             self._encoder = T5EncoderModel.from_pretrained(
                 self.model_name,
+                torch_dtype=self.dtype
             ).to(self.device)
             self._encoder.eval()
+            print(f"✓ T5 encoder loaded ({sum(p.numel() for p in self._encoder.parameters())/1e6:.1f}M params)")
+            self._loaded = True
         return self._encoder
     @property
+    def tokenizer(self) -> T5Tokenizer:
         if self._tokenizer is None:
             print(f"📥 Loading T5 tokenizer: {self.model_name}...")
             self._tokenizer = T5Tokenizer.from_pretrained(self.model_name)
             print("✓ T5 tokenizer loaded")
         return self._tokenizer
+    @property
+    def is_loaded(self) -> bool:
+        return self._loaded
+    def unload(self):
+        """Free VRAM by unloading the encoder."""
+        if self._encoder is not None:
+            del self._encoder
+            self._encoder = None
+            self._loaded = False
+            torch.cuda.empty_cache()
+            print("🗑️ T5 encoder unloaded")
 class LazyLyraModel:
+    """Lazy loader for Lyra VAE - only downloads/loads when first accessed."""
+    def __init__(
+        self,
+        repo_id: str = LYRA_ILLUSTRIOUS_REPO,
+        device: str = "cuda",
+        checkpoint: Optional[str] = None
+    ):
         self.repo_id = repo_id
         self.device = device
+        self.checkpoint = checkpoint
         self._model = None
+        self._info = None
+        self._loaded = False
     @property
     def model(self):
         if self._model is None:
+            print(f"📥 Lazy loading Lyra VAE: {self.repo_id}...")
+            if LYRA_LOADER_AVAILABLE:
+                # Use the loader module
+                self._model, self._info = load_vae_lyra(
+                    self.repo_id,
+                    checkpoint=self.checkpoint,
+                    device=self.device,
+                    return_info=True
+                )
             else:
+                # Fallback to manual loading
+                self._model = self._load_fallback()
+                self._info = {"repo_id": self.repo_id, "version": "v2"}
+            self._model.eval()
+            self._loaded = True
+            print(f"✓ Lyra VAE loaded")
         return self._model
+    @property
+    def info(self) -> Optional[Dict]:
+        if self._info is None and self._model is not None:
+            return {"repo_id": self.repo_id}
+        return self._info
+    @property
+    def is_loaded(self) -> bool:
+        return self._loaded
+    def _load_fallback(self):
+        """Fallback loading if loader module not available."""
         if not LYRA_V2_AVAILABLE:
+            raise ImportError("Lyra VAE v2 not available")
+        config_path = hf_hub_download(
+            repo_id=self.repo_id,
+            filename="config.json",
+            repo_type="model"
+        )
+        with open(config_path, 'r') as f:
+            config_dict = json.load(f)
+        # Find checkpoint
+        from huggingface_hub import list_repo_files
+        import re
+        repo_files = list_repo_files(self.repo_id, repo_type="model")
+        checkpoint_files = [f for f in repo_files if f.endswith('.safetensors') or f.endswith('.pt')]
+        # Prefer weights/ folder
+        weights_files = [f for f in checkpoint_files if f.startswith('weights/')]
+        if weights_files:
+            checkpoint_file = sorted(weights_files)[-1]  # Latest
+        elif checkpoint_files:
+            checkpoint_file = checkpoint_files[0]
+        else:
+            raise FileNotFoundError(f"No checkpoint found in {self.repo_id}")
+        checkpoint_path = hf_hub_download(
+            repo_id=self.repo_id,
+            filename=checkpoint_file,
+            repo_type="model"
+        )
+        # Load weights
+        if checkpoint_file.endswith('.safetensors'):
+            state_dict = load_safetensors(checkpoint_path, device="cpu")
+        else:
             checkpoint = torch.load(checkpoint_path, map_location="cpu")
             state_dict = checkpoint.get('model_state_dict', checkpoint)
+        # Build config
+        vae_config = LyraV2Config(
+            modality_dims=config_dict.get('modality_dims'),
+            modality_seq_lens=config_dict.get('modality_seq_lens'),
+            binding_config=config_dict.get('binding_config'),
+            latent_dim=config_dict.get('latent_dim', 2048),
+            hidden_dim=config_dict.get('hidden_dim', 2048),
+            fusion_strategy=config_dict.get('fusion_strategy', 'adaptive_cantor'),
+        )
+        model = LyraV2(vae_config)
+        model.load_state_dict(state_dict, strict=False)
+        model.to(self.device)
+        return model
+    def unload(self):
+        """Free VRAM by unloading the model."""
+        if self._model is not None:
+            del self._model
+            self._model = None
+            self._info = None
+            self._loaded = False
+            torch.cuda.empty_cache()
+            print("🗑️ Lyra VAE unloaded")
+# ============================================================================
+# SCHEDULER FACTORY
+# ============================================================================
+def get_scheduler(
+    scheduler_name: str,
+    config_source: str = "stabilityai/stable-diffusion-xl-base-1.0",
+    is_sdxl: bool = True
+):
+    """Create scheduler by name.
+    Args:
+        scheduler_name: One of SCHEDULER_CHOICES
+        config_source: HF repo to load scheduler config from
+        is_sdxl: Whether this is for SDXL (affects some defaults)
+    Returns:
+        Configured scheduler instance
+    """
+    subfolder = "scheduler"
+    if scheduler_name == SCHEDULER_EULER_A:
+        return EulerAncestralDiscreteScheduler.from_pretrained(
+            config_source,
+            subfolder=subfolder
+        )
+    elif scheduler_name == SCHEDULER_EULER:
+        return EulerDiscreteScheduler.from_pretrained(
+            config_source,
+            subfolder=subfolder
+        )
+    elif scheduler_name == SCHEDULER_DPM_2M_SDE:
+        # DPM++ 2M SDE - good for detailed images
+        return DPMSolverSDEScheduler.from_pretrained(
+            config_source,
+            subfolder=subfolder,
+            algorithm_type="sde-dpmsolver++",
+            solver_order=2,
+            use_karras_sigmas=True,
+        )
+    elif scheduler_name == SCHEDULER_DPM_2M:
+        # DPM++ 2M - fast and quality
+        return DPMSolverMultistepScheduler.from_pretrained(
+            config_source,
+            subfolder=subfolder,
+            algorithm_type="dpmsolver++",
+            solver_order=2,
+            use_karras_sigmas=True,
+        )
+    else:
+        print(f"⚠️ Unknown scheduler '{scheduler_name}', defaulting to Euler Ancestral")
+        return EulerAncestralDiscreteScheduler.from_pretrained(
+            config_source,
+            subfolder=subfolder
+        )
+# ============================================================================
+# UTILITIES
+# ============================================================================
+def extract_comfyui_components(state_dict: Dict[str, torch.Tensor]) -> Dict[str, Dict[str, torch.Tensor]]:
+    """Extract UNet, CLIP-L, CLIP-G, and VAE from ComfyUI single-file checkpoint."""
+    components = {
+        "unet": {},
+        "clip_l": {},
+        "clip_g": {},
+        "vae": {}
+    }
+    for key, value in state_dict.items():
+        if key.startswith(COMFYUI_UNET_PREFIX):
+            new_key = key[len(COMFYUI_UNET_PREFIX):]
+            components["unet"][new_key] = value
+        elif key.startswith(COMFYUI_CLIP_L_PREFIX):
+            new_key = key[len(COMFYUI_CLIP_L_PREFIX):]
+            components["clip_l"][new_key] = value
+        elif key.startswith(COMFYUI_CLIP_G_PREFIX):
+            new_key = key[len(COMFYUI_CLIP_G_PREFIX):]
+            components["clip_g"][new_key] = value
+        elif key.startswith(COMFYUI_VAE_PREFIX):
+            new_key = key[len(COMFYUI_VAE_PREFIX):]
+            components["vae"][new_key] = value
+    print(f"  Extracted components:")
+    print(f"    UNet: {len(components['unet'])} keys")
+    print(f"    CLIP-L: {len(components['clip_l'])} keys")
+    print(f"    CLIP-G: {len(components['clip_g'])} keys")
+    print(f"    VAE: {len(components['vae'])} keys")
+    return components
+def get_clip_hidden_state(
+    model_output,
+    clip_skip: int = 1,
+    output_hidden_states: bool = True
+) -> torch.Tensor:
+    """Extract hidden state with clip_skip support."""
+    if clip_skip == 1 or not output_hidden_states:
+        return model_output.last_hidden_state
+    if hasattr(model_output, 'hidden_states') and model_output.hidden_states is not None:
+        return model_output.hidden_states[-clip_skip]
+    return model_output.last_hidden_state
 # ============================================================================
 # ============================================================================
 class SDXLFlowMatchingPipeline:
+    """Pipeline for SDXL-based flow-matching inference with dual CLIP encoders.
+    Uses lazy loading for T5 and Lyra - they're only downloaded when actually used.
+    """
     def __init__(
         self,
         self.scheduler = scheduler
         self.device = device
+        # Lazy loaders for Lyra components
         self.t5_loader = t5_loader
         self.lyra_loader = lyra_loader
         self.clip_skip = clip_skip
         self.vae_scale_factor = 0.13025
         self.arch = ARCH_SDXL
+        # Track current scheduler name for UI
+        self._scheduler_name = SCHEDULER_EULER_A
     def set_scheduler(self, scheduler_name: str):
+        """Switch scheduler without reloading model."""
+        if scheduler_name != self._scheduler_name:
+            self.scheduler = get_scheduler(
+                scheduler_name,
+                config_source="stabilityai/stable-diffusion-xl-base-1.0",
+                is_sdxl=True
+            )
+            self._scheduler_name = scheduler_name
+            print(f"✓ Scheduler changed to: {scheduler_name}")
     @property
+    def t5_encoder(self) -> Optional[T5EncoderModel]:
+        """Access T5 encoder (triggers lazy load if needed)."""
         return self.t5_loader.encoder if self.t5_loader else None
     @property
+    def t5_tokenizer(self) -> Optional[T5Tokenizer]:
+        """Access T5 tokenizer (triggers lazy load if needed)."""
         return self.t5_loader.tokenizer if self.t5_loader else None
     @property
     def lyra_model(self):
+        """Access Lyra model (triggers lazy load if needed)."""
         return self.lyra_loader.model if self.lyra_loader else None
+    @property
+    def lyra_available(self) -> bool:
+        """Check if Lyra components are configured (not necessarily loaded)."""
+        return self.t5_loader is not None and self.lyra_loader is not None
     def encode_prompt(
         self,
         prompt: str,
             prompt_embeds_g = get_clip_hidden_state(clip_g_output, clip_skip, output_hidden_states)
             pooled_prompt_embeds = clip_g_output.text_embeds
+        # Concatenate CLIP-L and CLIP-G embeddings
         prompt_embeds = torch.cat([prompt_embeds_l, prompt_embeds_g], dim=-1)
         # Negative prompt
         t5_summary: str = "",
         lyra_strength: float = 0.3
     ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
+        """Encode prompts using Lyra VAE v2 fusion (CLIP + T5).
+        This triggers lazy loading of T5 and Lyra if not already loaded.
+        """
+        if not self.lyra_available:
+            raise ValueError("Lyra VAE components not configured")
+        # Access properties triggers lazy load
+        t5_encoder = self.t5_encoder
+        t5_tokenizer = self.t5_tokenizer
+        lyra_model = self.lyra_model
         # Get standard CLIP embeddings first
         prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt(
             prompt, negative_prompt, clip_skip
         )
+        # Format T5 input with pilcrow separator (¶)
         SUMMARY_SEPARATOR = "¶"
         if t5_summary.strip():
             t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {t5_summary}"
             t5_prompt = f"{prompt} {SUMMARY_SEPARATOR} {prompt}"
         # Get T5 embeddings
+        t5_inputs = t5_tokenizer(
             t5_prompt,
             max_length=512,
             padding='max_length',
         ).to(self.device)
         with torch.no_grad():
+            t5_embeds = t5_encoder(**t5_inputs).last_hidden_state
         clip_l_dim = 768
+        clip_g_dim = 1280
         clip_l_embeds = prompt_embeds[..., :clip_l_dim]
         clip_g_embeds = prompt_embeds[..., clip_l_dim:]
                 't5_xl_l': t5_embeds.float(),
                 't5_xl_g': t5_embeds.float()
             }
+            reconstructions, mu, logvar, _ = lyra_model(
                 modality_inputs,
                 target_modalities=['clip_l', 'clip_g']
             )
             lyra_clip_l = reconstructions['clip_l'].to(prompt_embeds.dtype)
             lyra_clip_g = reconstructions['clip_g'].to(prompt_embeds.dtype)
+            # Normalize reconstructions to match input statistics
             clip_l_std_ratio = lyra_clip_l.std() / (clip_l_embeds.std() + 1e-8)
             clip_g_std_ratio = lyra_clip_g.std() / (clip_g_embeds.std() + 1e-8)
                 lyra_clip_g = (lyra_clip_g - lyra_clip_g.mean()) / (lyra_clip_g.std() + 1e-8)
                 lyra_clip_g = lyra_clip_g * clip_g_embeds.std() + clip_g_embeds.mean()
+        # Blend original CLIP with Lyra reconstruction
         fused_clip_l = (1 - lyra_strength) * clip_l_embeds + lyra_strength * lyra_clip_l
         fused_clip_g = (1 - lyra_strength) * clip_g_embeds + lyra_strength * lyra_clip_g
         prompt_embeds_fused = torch.cat([fused_clip_l, fused_clip_g], dim=-1)
+        # Process negative prompt
+        if negative_prompt:
+            neg_strength = lyra_strength * 0.5  # Less aggressive for negative
+            t5_neg_prompt = f"{negative_prompt} {SUMMARY_SEPARATOR} {negative_prompt}"
+            t5_inputs_neg = t5_tokenizer(
+                t5_neg_prompt,
+                max_length=512,
+                padding='max_length',
+                truncation=True,
+                return_tensors='pt'
+            ).to(self.device)
+            with torch.no_grad():
+                t5_embeds_neg = t5_encoder(**t5_inputs_neg).last_hidden_state
+            neg_clip_l = negative_prompt_embeds[..., :clip_l_dim]
+            neg_clip_g = negative_prompt_embeds[..., clip_l_dim:]
+            modality_inputs_neg = {
+                'clip_l': neg_clip_l.float(),
+                'clip_g': neg_clip_g.float(),
+                't5_xl_l': t5_embeds_neg.float(),
+                't5_xl_g': t5_embeds_neg.float()
+            }
+            recon_neg, _, _, _ = lyra_model(modality_inputs_neg, target_modalities=['clip_l', 'clip_g'])
+            lyra_neg_l = recon_neg['clip_l'].to(negative_prompt_embeds.dtype)
+            lyra_neg_g = recon_neg['clip_g'].to(negative_prompt_embeds.dtype)
+            # Normalize
+            neg_l_ratio = lyra_neg_l.std() / (neg_clip_l.std() + 1e-8)
+            neg_g_ratio = lyra_neg_g.std() / (neg_clip_g.std() + 1e-8)
+            if neg_l_ratio > 2.0 or neg_l_ratio < 0.5:
+                lyra_neg_l = (lyra_neg_l - lyra_neg_l.mean()) / (lyra_neg_l.std() + 1e-8)
+                lyra_neg_l = lyra_neg_l * neg_clip_l.std() + neg_clip_l.mean()
+            if neg_g_ratio > 2.0 or neg_g_ratio < 0.5:
+                lyra_neg_g = (lyra_neg_g - lyra_neg_g.mean()) / (lyra_neg_g.std() + 1e-8)
+                lyra_neg_g = lyra_neg_g * neg_clip_g.std() + neg_clip_g.mean()
+            fused_neg_l = (1 - neg_strength) * neg_clip_l + neg_strength * lyra_neg_l
+            fused_neg_g = (1 - neg_strength) * neg_clip_g + neg_strength * lyra_neg_g
+            negative_prompt_embeds_fused = torch.cat([fused_neg_l, fused_neg_g], dim=-1)
+        else:
+            negative_prompt_embeds_fused = torch.zeros_like(prompt_embeds_fused)
+        return prompt_embeds_fused, negative_prompt_embeds_fused, pooled, negative_pooled
     def _get_add_time_ids(
         self,
         negative_prompt: str = "",
         height: int = 1024,
         width: int = 1024,
+        num_inference_steps: int = 20,
+        guidance_scale: float = 7.5,
+        shift: float = 0.0,
+        use_flow_matching: bool = False,
+        prediction_type: str = "epsilon",
         seed: Optional[int] = None,
         use_lyra: bool = False,
+        clip_skip: int = 1,
         t5_summary: str = "",
         lyra_strength: float = 1.0,
         progress_callback=None
         else:
             generator = None
+        # Encode prompts (Lyra triggers lazy load only if use_lyra=True)
+        if use_lyra and self.lyra_available:
             prompt_embeds, negative_prompt_embeds, pooled, negative_pooled = self.encode_prompt_lyra(
                 prompt, negative_prompt, clip_skip, t5_summary, lyra_strength
             )
         self.scheduler.set_timesteps(num_inference_steps, device=self.device)
         timesteps = self.scheduler.timesteps
+        if not use_flow_matching:
+            latents = latents * self.scheduler.init_noise_sigma
+        # Prepare added time embeddings for SDXL
         original_size = (height, width)
         target_size = (height, width)
         crops_coords_top_left = (0, 0)
                 progress_callback(i, num_inference_steps, f"Step {i+1}/{num_inference_steps}")
             latent_model_input = torch.cat([latents] * 2) if guidance_scale > 1.0 else latents
+            if use_flow_matching and shift > 0:
+                sigma = t.float() / 1000.0
+                sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)
+                scaling = torch.sqrt(1 + sigma_shifted ** 2)
+                latent_model_input = latent_model_input / scaling
+            else:
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
             timestep = t.expand(latent_model_input.shape[0])
                 noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
                 noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+            if use_flow_matching:
+                sigma = t.float() / 1000.0
+                sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)
+                if prediction_type == "v_prediction":
+                    v_pred = noise_pred
+                    alpha_t = torch.sqrt(1 - sigma_shifted ** 2)
+                    sigma_t = sigma_shifted
+                    noise_pred = alpha_t * v_pred + sigma_t * latents
+                dt = -1.0 / num_inference_steps
+                latents = latents + dt * noise_pred
+            else:
+                latents = self.scheduler.step(
+                    noise_pred, t, latents, return_dict=False
+                )[0]
         # Decode
         latents = latents / self.vae_scale_factor
         return image
+# ============================================================================
+# SD1.5 PIPELINE
+# ============================================================================
+class SD15FlowMatchingPipeline:
+    """Pipeline for SD1.5-based flow-matching inference."""
+    def __init__(
+        self,
+        vae: AutoencoderKL,
+        text_encoder: CLIPTextModel,
+        tokenizer: CLIPTokenizer,
+        unet: UNet2DConditionModel,
+        scheduler,
+        device: str = "cuda",
+        t5_loader: Optional[LazyT5Encoder] = None,
+        lyra_loader: Optional[LazyLyraModel] = None,
+    ):
+        self.vae = vae
+        self.text_encoder = text_encoder
+        self.tokenizer = tokenizer
+        self.unet = unet
+        self.scheduler = scheduler
+        self.device = device
+        self.t5_loader = t5_loader
+        self.lyra_loader = lyra_loader
+        self.vae_scale_factor = 0.18215
+        self.arch = ARCH_SD15
+        self.is_lune_model = False
+    @property
+    def t5_encoder(self):
+        return self.t5_loader.encoder if self.t5_loader else None
+    @property
+    def t5_tokenizer(self):
+        return self.t5_loader.tokenizer if self.t5_loader else None
+    @property
+    def lyra_model(self):
+        return self.lyra_loader.model if self.lyra_loader else None
+    @property
+    def lyra_available(self) -> bool:
+        return self.t5_loader is not None and self.lyra_loader is not None
+    def encode_prompt(self, prompt: str, negative_prompt: str = ""):
+        """Encode text prompts to embeddings."""
+        text_inputs = self.tokenizer(
+            prompt,
+            padding="max_length",
+            max_length=self.tokenizer.model_max_length,
+            truncation=True,
+            return_tensors="pt",
+        )
+        text_input_ids = text_inputs.input_ids.to(self.device)
+        with torch.no_grad():
+            prompt_embeds = self.text_encoder(text_input_ids)[0]
+        if negative_prompt:
+            uncond_inputs = self.tokenizer(
+                negative_prompt,
+                padding="max_length",
+                max_length=self.tokenizer.model_max_length,
+                truncation=True,
+                return_tensors="pt",
+            )
+            uncond_input_ids = uncond_inputs.input_ids.to(self.device)
+            with torch.no_grad():
+                negative_prompt_embeds = self.text_encoder(uncond_input_ids)[0]
+        else:
+            negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+        return prompt_embeds, negative_prompt_embeds
+    def encode_prompt_lyra(self, prompt: str, negative_prompt: str = ""):
+        """Encode using Lyra VAE (CLIP + T5 fusion)."""
+        if not self.lyra_available:
+            raise ValueError("Lyra VAE components not configured")
+        t5_encoder = self.t5_encoder
+        t5_tokenizer = self.t5_tokenizer
+        lyra_model = self.lyra_model
+        # CLIP
+        text_inputs = self.tokenizer(
+            prompt,
+            padding="max_length",
+            max_length=self.tokenizer.model_max_length,
+            truncation=True,
+            return_tensors="pt",
+        )
+        text_input_ids = text_inputs.input_ids.to(self.device)
+        with torch.no_grad():
+            clip_embeds = self.text_encoder(text_input_ids)[0]
+        # T5
+        t5_inputs = t5_tokenizer(
+            prompt,
+            max_length=77,
+            padding='max_length',
+            truncation=True,
+            return_tensors='pt'
+        ).to(self.device)
+        with torch.no_grad():
+            t5_embeds = t5_encoder(**t5_inputs).last_hidden_state
+        # Fuse
+        modality_inputs = {'clip': clip_embeds, 't5': t5_embeds}
+        with torch.no_grad():
+            reconstructions, mu, logvar = lyra_model(
+                modality_inputs,
+                target_modalities=['clip']
+            )
+            prompt_embeds = reconstructions['clip']
+        # Negative
+        if negative_prompt:
+            uncond_inputs = self.tokenizer(
+                negative_prompt,
+                padding="max_length",
+                max_length=self.tokenizer.model_max_length,
+                truncation=True,
+                return_tensors="pt",
+            )
+            uncond_input_ids = uncond_inputs.input_ids.to(self.device)
+            with torch.no_grad():
+                clip_embeds_uncond = self.text_encoder(uncond_input_ids)[0]
+            t5_inputs_uncond = t5_tokenizer(
+                negative_prompt,
+                max_length=77,
+                padding='max_length',
+                truncation=True,
+                return_tensors='pt'
+            ).to(self.device)
+            with torch.no_grad():
+                t5_embeds_uncond = t5_encoder(**t5_inputs_uncond).last_hidden_state
+            modality_inputs_uncond = {'clip': clip_embeds_uncond, 't5': t5_embeds_uncond}
+            with torch.no_grad():
+                reconstructions_uncond, _, _ = lyra_model(
+                    modality_inputs_uncond,
+                    target_modalities=['clip']
+                )
+                negative_prompt_embeds = reconstructions_uncond['clip']
+        else:
+            negative_prompt_embeds = torch.zeros_like(prompt_embeds)
+        return prompt_embeds, negative_prompt_embeds
+    @torch.no_grad()
+    def __call__(
+        self,
+        prompt: str,
+        negative_prompt: str = "",
+        height: int = 512,
+        width: int = 512,
+        num_inference_steps: int = 20,
+        guidance_scale: float = 7.5,
+        shift: float = 2.5,
+        use_flow_matching: bool = True,
+        prediction_type: str = "epsilon",
+        seed: Optional[int] = None,
+        use_lyra: bool = False,
+        clip_skip: int = 1,
+        t5_summary: str = "",
+        lyra_strength: float = 1.0,
+        progress_callback=None
+    ):
+        """Generate image."""
+        if seed is not None:
+            generator = torch.Generator(device=self.device).manual_seed(seed)
+        else:
+            generator = None
+        if use_lyra and self.lyra_available:
+            prompt_embeds, negative_prompt_embeds = self.encode_prompt_lyra(prompt, negative_prompt)
+        else:
+            prompt_embeds, negative_prompt_embeds = self.encode_prompt(prompt, negative_prompt)
+        latent_channels = 4
+        latent_height = height // 8
+        latent_width = width // 8
+        latents = torch.randn(
+            (1, latent_channels, latent_height, latent_width),
+            generator=generator,
+            device=self.device,
+            dtype=torch.float32
+        )
+        self.scheduler.set_timesteps(num_inference_steps, device=self.device)
+        timesteps = self.scheduler.timesteps
+        if not use_flow_matching:
+            latents = latents * self.scheduler.init_noise_sigma
+        for i, t in enumerate(timesteps):
+            if progress_callback:
+                progress_callback(i, num_inference_steps, f"Step {i+1}/{num_inference_steps}")
+            latent_model_input = torch.cat([latents] * 2) if guidance_scale > 1.0 else latents
+            if use_flow_matching and shift > 0:
+                sigma = t.float() / 1000.0
+                sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)
+                scaling = torch.sqrt(1 + sigma_shifted ** 2)
+                latent_model_input = latent_model_input / scaling
+            else:
+                latent_model_input = self.scheduler.scale_model_input(latent_model_input, t)
+            timestep = t.expand(latent_model_input.shape[0])
+            text_embeds = torch.cat([negative_prompt_embeds, prompt_embeds]) if guidance_scale > 1.0 else prompt_embeds
+            noise_pred = self.unet(
+                latent_model_input,
+                timestep,
+                encoder_hidden_states=text_embeds,
+                return_dict=False
+            )[0]
+            if guidance_scale > 1.0:
+                noise_pred_uncond, noise_pred_text = noise_pred.chunk(2)
+                noise_pred = noise_pred_uncond + guidance_scale * (noise_pred_text - noise_pred_uncond)
+            if use_flow_matching:
+                sigma = t.float() / 1000.0
+                sigma_shifted = (shift * sigma) / (1 + (shift - 1) * sigma)
+                if prediction_type == "v_prediction":
+                    v_pred = noise_pred
+                    alpha_t = torch.sqrt(1 - sigma_shifted ** 2)
+                    sigma_t = sigma_shifted
+                    noise_pred = alpha_t * v_pred + sigma_t * latents
+                dt = -1.0 / num_inference_steps
+                latents = latents + dt * noise_pred
+            else:
+                latents = self.scheduler.step(noise_pred, t, latents, return_dict=False)[0]
+        latents = latents / self.vae_scale_factor
+        if self.is_lune_model:
+            latents = latents * 5.52
+        with torch.no_grad():
+            image = self.vae.decode(latents).sample
+        image = (image / 2 + 0.5).clamp(0, 1)
+        image = image.cpu().permute(0, 2, 3, 1).float().numpy()
+        image = (image * 255).round().astype("uint8")
+        image = Image.fromarray(image[0])
+        return image
 # ============================================================================
 # MODEL LOADERS
 # ============================================================================
+def load_lune_checkpoint(repo_id: str, filename: str, device: str = "cuda"):
+    """Load Lune checkpoint from .pt file."""
+    print(f"📥 Downloading: {repo_id}/{filename}")
+    checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model")
+    checkpoint = torch.load(checkpoint_path, map_location="cpu")
+    print(f"🏗️ Initializing SD1.5 UNet...")
+    unet = UNet2DConditionModel.from_pretrained(
+        "runwayml/stable-diffusion-v1-5",
+        subfolder="unet",
+        torch_dtype=torch.float32
+    )
+    student_state_dict = checkpoint["student"]
+    cleaned_dict = {}
+    for key, value in student_state_dict.items():
+        if key.startswith("unet."):
+            cleaned_dict[key[5:]] = value
+        else:
+            cleaned_dict[key] = value
+    unet.load_state_dict(cleaned_dict, strict=False)
+    step = checkpoint.get("gstep", "unknown")
+    print(f"✅ Loaded Lune from step {step}")
+    return unet.to(device)
 def load_illustrious_xl(
+    repo_id: str = "AbstractPhil/vae-lyra-xl-adaptive-cantor-illustrious",
     filename: str = "illustriousXL_v01.safetensors",
     device: str = "cuda"
 ) -> Tuple[UNet2DConditionModel, AutoencoderKL, CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer, CLIPTokenizer]:
     checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="model")
     print(f"✓ Downloaded: {checkpoint_path}")
+    print("📦 Loading with StableDiffusionXLPipeline.from_single_file()...")
     pipe = StableDiffusionXLPipeline.from_single_file(
         checkpoint_path,
         torch_dtype=torch.float16,
     torch.cuda.empty_cache()
     print("✅ Illustrious XL loaded!")
+    print(f"   UNet params: {sum(p.numel() for p in unet.parameters()):,}")
+    print(f"   VAE params: {sum(p.numel() for p in vae.parameters()):,}")
+    return unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2
+def load_sdxl_base(device: str = "cuda"):
+    """Load standard SDXL base model."""
+    print("📥 Loading SDXL Base 1.0...")
+    unet = UNet2DConditionModel.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        subfolder="unet",
+        torch_dtype=torch.float16
+    ).to(device)
+    vae = AutoencoderKL.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        subfolder="vae",
+        torch_dtype=torch.float16
+    ).to(device)
+    text_encoder = CLIPTextModel.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        subfolder="text_encoder",
+        torch_dtype=torch.float16
+    ).to(device)
+    text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        subfolder="text_encoder_2",
+        torch_dtype=torch.float16
+    ).to(device)
+    tokenizer = CLIPTokenizer.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        subfolder="tokenizer"
+    )
+    tokenizer_2 = CLIPTokenizer.from_pretrained(
+        "stabilityai/stable-diffusion-xl-base-1.0",
+        subfolder="tokenizer_2"
+    )
+    print("✅ SDXL Base loaded!")
     return unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2
 # PIPELINE INITIALIZATION
 # ============================================================================
+def initialize_pipeline(model_choice: str, device: str = "cuda"):
+    """Initialize the complete pipeline based on model choice.
+    Uses lazy loading for T5 and Lyra - they won't be downloaded until first use.
+    """
     print(f"🚀 Initializing {model_choice} pipeline...")
+    is_sdxl = "Illustrious" in model_choice or "SDXL" in model_choice
+    is_lune = "Lune" in model_choice
+    if is_sdxl:
+        # SDXL-based models
+        if "Illustrious" in model_choice:
+            unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2 = load_illustrious_xl(device=device)
+        else:
+            unet, vae, text_encoder, text_encoder_2, tokenizer, tokenizer_2 = load_sdxl_base(device=device)
+        # Create LAZY loaders for T5 and Lyra (no download yet!)
+        print("📋 Configuring lazy loaders for T5-XL and Lyra VAE (will download on first use)")
+        t5_loader = LazyT5Encoder(
+            model_name=T5_XL_MODEL,  # google/flan-t5-xl
+            device=device,
+            dtype=torch.float16
+        )
+        lyra_loader = LazyLyraModel(
+            repo_id=LYRA_ILLUSTRIOUS_REPO,
+            device=device
+        )
+        # Default scheduler: Euler Ancestral
+        scheduler = get_scheduler(SCHEDULER_EULER_A, is_sdxl=True)
+        pipeline = SDXLFlowMatchingPipeline(
+            vae=vae,
+            text_encoder=text_encoder,
+            text_encoder_2=text_encoder_2,
+            tokenizer=tokenizer,
+            tokenizer_2=tokenizer_2,
+            unet=unet,
+            scheduler=scheduler,
+            device=device,
+            t5_loader=t5_loader,
+            lyra_loader=lyra_loader,
+            clip_skip=1
+        )
     else:
+        # SD1.5-based models
+        vae = AutoencoderKL.from_pretrained(
+            "runwayml/stable-diffusion-v1-5",
+            subfolder="vae",
+            torch_dtype=torch.float32
+        ).to(device)
+        text_encoder = CLIPTextModel.from_pretrained(
+            "openai/clip-vit-large-patch14",
+            torch_dtype=torch.float32
+        ).to(device)
+        tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
+        # Lazy loaders for SD1.5 Lyra (T5-base)
+        print("📋 Configuring lazy loaders for T5-base and Lyra VAE v1 (will download on first use)")
+        t5_loader = LazyT5Encoder(
+            model_name=T5_BASE_MODEL,  # google/flan-t5-base
+            device=device,
+            dtype=torch.float32
         )
+        lyra_loader = LazyLyraModel(
+            repo_id=LYRA_SD15_REPO,
+            device=device
+        )
+        # Load UNet
+        if is_lune:
+            repo_id = "AbstractPhil/sd15-flow-lune"
+            filename = "sd15_flow_lune_e34_s34000.pt"
+            unet = load_lune_checkpoint(repo_id, filename, device)
+        else:
+            unet = UNet2DConditionModel.from_pretrained(
+                "runwayml/stable-diffusion-v1-5",
+                subfolder="unet",
+                torch_dtype=torch.float32
+            ).to(device)
+        scheduler = EulerDiscreteScheduler.from_pretrained(
+            "runwayml/stable-diffusion-v1-5",
+            subfolder="scheduler"
+        )
+        pipeline = SD15FlowMatchingPipeline(
+            vae=vae,
+            text_encoder=text_encoder,
+            tokenizer=tokenizer,
+            unet=unet,
+            scheduler=scheduler,
+            device=device,
+            t5_loader=t5_loader,
+            lyra_loader=lyra_loader,
+        )
+        pipeline.is_lune_model = is_lune
+    print("✅ Pipeline initialized! (T5 and Lyra will load on first use)")
     return pipeline
 CURRENT_PIPELINE = None
 CURRENT_MODEL = None
+def get_pipeline(model_choice: str):
     """Get or create pipeline for selected model."""
+    global CURRENT_PIPELINE, CURRENT_MODEL
     if CURRENT_PIPELINE is None or CURRENT_MODEL != model_choice:
+        CURRENT_PIPELINE = initialize_pipeline(model_choice, device="cuda")
         CURRENT_MODEL = model_choice
     return CURRENT_PIPELINE
 # INFERENCE
 # ============================================================================
+def estimate_duration(num_steps: int, width: int, height: int, use_lyra: bool = False, is_sdxl: bool = False) -> int:
+    """Estimate GPU duration."""
+    base_time_per_step = 0.5 if is_sdxl else 0.3
+    resolution_factor = (width * height) / (512 * 512)
+    estimated = num_steps * base_time_per_step * resolution_factor
+    if use_lyra:
+        estimated *= 2
+        estimated += 10  # Extra time for lazy loading on first use
+    return int(estimated + 20)
+@spaces.GPU(duration=lambda *args: estimate_duration(
+    args[6], args[8], args[9], args[12],
+    "SDXL" in args[3] or "Illustrious" in args[3]
+))
 def generate_image(
     prompt: str,
     t5_summary: str,
     negative_prompt: str,
     model_choice: str,
+    scheduler_choice: str,
     clip_skip: int,
     num_steps: int,
     cfg_scale: float,
     width: int,
     height: int,
+    shift: float,
+    use_flow_matching: bool,
     use_lyra: bool,
     lyra_strength: float,
     seed: int,
         progress((step + 1) / total, desc=desc)
     try:
+        pipeline = get_pipeline(model_choice)
+        # Update scheduler if needed (SDXL only)
+        is_sdxl = "SDXL" in model_choice or "Illustrious" in model_choice
+        if is_sdxl and hasattr(pipeline, 'set_scheduler'):
+            pipeline.set_scheduler(scheduler_choice)
+        prediction_type = "epsilon"
+        if not is_sdxl and "Lune" in model_choice:
+            prediction_type = "v_prediction"
+        if not use_lyra or not pipeline.lyra_available:
             progress(0.05, desc="Generating...")
             image = pipeline(
                 width=width,
                 num_inference_steps=num_steps,
                 guidance_scale=cfg_scale,
+                shift=shift,
+                use_flow_matching=use_flow_matching,
+                prediction_type=prediction_type,
                 seed=seed,
                 use_lyra=False,
                 clip_skip=clip_skip,
             return image, None, seed
         else:
+            # Side-by-side comparison
             progress(0.05, desc="Generating standard...")
             image_standard = pipeline(
                 width=width,
                 num_inference_steps=num_steps,
                 guidance_scale=cfg_scale,
+                shift=shift,
+                use_flow_matching=use_flow_matching,
+                prediction_type=prediction_type,
                 seed=seed,
                 use_lyra=False,
                 clip_skip=clip_skip,
                 progress_callback=lambda s, t, d: progress(0.05 + (s/t) * 0.45, desc=d)
             )
+            progress(0.5, desc="Generating Lyra fusion (loading T5 + Lyra if needed)...")
             image_lyra = pipeline(
                 prompt=prompt,
                 width=width,
                 num_inference_steps=num_steps,
                 guidance_scale=cfg_scale,
+                shift=shift,
+                use_flow_matching=use_flow_matching,
+                prediction_type=prediction_type,
                 seed=seed,
                 use_lyra=True,
                 clip_skip=clip_skip,
     with gr.Blocks() as demo:
         gr.Markdown("""
+        # 🌙 Lyra/Lune Flow-Matching Image Generation
         **Geometric crystalline diffusion** by [AbstractPhil](https://huggingface.co/AbstractPhil)
+        Generate images using SD1.5 and SDXL-based models with geometric deep learning:
         | Model | Architecture | Lyra Version | Best For |
         |-------|-------------|--------------|----------|
         | **Illustrious XL** | SDXL | v2 (T5-XL) | Anime/illustration, high detail |
         | **SDXL Base** | SDXL | v2 (T5-XL) | Photorealistic, general purpose |
+        | **Flow-Lune** | SD1.5 | v1 (T5-base) | Fast flow matching (15-25 steps) |
+        | **SD1.5 Base** | SD1.5 | v1 (T5-base) | Baseline comparison |
+        **Lazy Loading**: T5 and Lyra VAE are only downloaded when you enable Lyra fusion!
         """)
         with gr.Row():
             with gr.Column(scale=1):
                 prompt = gr.TextArea(
+                    label="Prompt (Tags for CLIP)",
                     value="masterpiece, best quality, 1girl, blue hair, school uniform, cherry blossoms, detailed background",
                     lines=3
                 )
                 t5_summary = gr.TextArea(
+                    label="T5 Summary (Natural Language for Lyra)",
+                    value="A beautiful anime girl with flowing blue hair wearing a school uniform, surrounded by delicate pink cherry blossoms against a bright sky",
                     lines=2,
+                    info="Used after ¶ separator for T5. Leave empty to use tags only."
                 )
                 negative_prompt = gr.TextArea(
                     label="Negative Prompt",
+                    value="lowres, bad anatomy, bad hands, text, error, cropped, worst quality, low quality",
                     lines=2
                 )
+                model_choice = gr.Dropdown(
+                    label="Model",
+                    choices=[
+                        "Illustrious XL",
+                        "SDXL Base",
+                        "Flow-Lune (SD1.5)",
+                        "SD1.5 Base"
+                    ],
+                    value="Illustrious XL"
+                )
+                scheduler_choice = gr.Dropdown(
+                    label="Scheduler (SDXL only)",
+                    choices=SCHEDULER_CHOICES,
+                    value=SCHEDULER_EULER_A,
+                    info="Euler Ancestral recommended for Illustrious"
+                )
                 clip_skip = gr.Slider(
                     label="CLIP Skip",
+                    minimum=1,
+                    maximum=4,
+                    value=2,
+                    step=1,
+                    info="2 recommended for Illustrious, 1 for others"
                 )
                 use_lyra = gr.Checkbox(
+                    label="Enable Lyra VAE (CLIP+T5 Fusion)",
                     value=False,
+                    info="Enables lazy loading of T5 and Lyra on first use"
                 )
                 lyra_strength = gr.Slider(
                     label="Lyra Blend Strength",
+                    minimum=0.0,
+                    maximum=3.0,
+                    value=1.0,
+                    step=0.05,
+                    info="0.0 = pure CLIP, 1.0 = pure Lyra reconstruction"
                 )
                 with gr.Accordion("Generation Settings", open=True):
+                    num_steps = gr.Slider(
+                        label="Steps",
+                        minimum=1,
+                        maximum=50,
+                        value=25,
+                        step=1
+                    )
+                    cfg_scale = gr.Slider(
+                        label="CFG Scale",
+                        minimum=1.0,
+                        maximum=20.0,
+                        value=7.0,
+                        step=0.5
+                    )
                     with gr.Row():
+                        width = gr.Slider(
+                            label="Width",
+                            minimum=512,
+                            maximum=1536,
+                            value=1024,
+                            step=64
+                        )
+                        height = gr.Slider(
+                            label="Height",
+                            minimum=512,
+                            maximum=1536,
+                            value=1024,
+                            step=64
+                        )
+                    seed = gr.Slider(
+                        label="Seed",
+                        minimum=0,
+                        maximum=2**32 - 1,
+                        value=42,
+                        step=1
+                    )
+                    randomize_seed = gr.Checkbox(
+                        label="Randomize Seed",
+                        value=True
+                    )
+                with gr.Accordion("Advanced (Flow Matching)", open=False):
+                    use_flow_matching = gr.Checkbox(
+                        label="Enable Flow Matching",
+                        value=False,
+                        info="Use flow matching ODE (for Lune only)"
+                    )
+                    shift = gr.Slider(
+                        label="Shift",
+                        minimum=0.0,
+                        maximum=5.0,
+                        value=0.0,
+                        step=0.1,
+                        info="Flow matching shift (0=disabled)"
+                    )
                 generate_btn = gr.Button("🎨 Generate", variant="primary", size="lg")
             with gr.Column(scale=1):
                 with gr.Row():
+                    output_image_standard = gr.Image(
+                        label="Generated Image",
+                        type="pil"
+                    )
+                    output_image_lyra = gr.Image(
+                        label="Lyra Fusion 🎵",
+                        type="pil",
+                        visible=False
+                    )
                 output_seed = gr.Number(label="Seed", precision=0)
+                gr.Markdown("""
+                ### Tips
+                - **Lazy Loading**: T5-XL (~3GB) and Lyra VAE only download when you enable Lyra
+                - **Illustrious XL**: Use CLIP skip 2, Euler Ancestral scheduler
+                - **Schedulers**: DPM++ 2M SDE for detail, Euler A for speed
+                - **Lyra v2**: Uses `google/flan-t5-xl` for richer semantics
+                """)
         # Event handlers
+        def on_model_change(model_name):
+            """Update defaults based on model."""
+            if "Illustrious" in model_name:
+                return {
+                    clip_skip: gr.update(value=2),
+                    width: gr.update(value=1024),
+                    height: gr.update(value=1024),
+                    num_steps: gr.update(value=25),
+                    use_flow_matching: gr.update(value=False),
+                    shift: gr.update(value=0.0),
+                    scheduler_choice: gr.update(visible=True, value=SCHEDULER_EULER_A)
+                }
+            elif "SDXL" in model_name:
+                return {
+                    clip_skip: gr.update(value=1),
+                    width: gr.update(value=1024),
+                    height: gr.update(value=1024),
+                    num_steps: gr.update(value=30),
+                    use_flow_matching: gr.update(value=False),
+                    shift: gr.update(value=0.0),
+                    scheduler_choice: gr.update(visible=True, value=SCHEDULER_EULER_A)
+                }
+            elif "Lune" in model_name:
+                return {
+                    clip_skip: gr.update(value=1),
+                    width: gr.update(value=512),
+                    height: gr.update(value=512),
+                    num_steps: gr.update(value=20),
+                    use_flow_matching: gr.update(value=True),
+                    shift: gr.update(value=2.5),
+                    scheduler_choice: gr.update(visible=False)
+                }
+            else:  # SD1.5 Base
+                return {
+                    clip_skip: gr.update(value=1),
+                    width: gr.update(value=512),
+                    height: gr.update(value=512),
+                    num_steps: gr.update(value=30),
+                    use_flow_matching: gr.update(value=False),
+                    shift: gr.update(value=0.0),
+                    scheduler_choice: gr.update(visible=False)
+                }
         def on_lyra_toggle(enabled):
+            """Show/hide Lyra comparison."""
             if enabled:
                 return {
                     output_image_standard: gr.update(visible=True, label="Standard"),
                     output_image_lyra: gr.update(visible=False)
                 }
+        model_choice.change(
+            fn=on_model_change,
+            inputs=[model_choice],
+            outputs=[clip_skip, width, height, num_steps, use_flow_matching, shift, scheduler_choice]
+        )
         use_lyra.change(
             fn=on_lyra_toggle,
             inputs=[use_lyra],
         generate_btn.click(
             fn=generate_image,
             inputs=[
+                prompt, t5_summary, negative_prompt, model_choice, scheduler_choice, clip_skip,
+                num_steps, cfg_scale, width, height, shift,
+                use_flow_matching, use_lyra, lyra_strength, seed, randomize_seed
             ],
             outputs=[output_image_standard, output_image_lyra, output_seed]
         )