Spaces:

AbstractPhil
/

lyra-xl-playground

Running on Zero

App Files Files Community

AbstractPhil commited on 17 days ago

Commit

e5ffd07

verified ·

1 Parent(s): 0f1416b

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -23

app.py CHANGED Viewed

@@ -1027,6 +1027,7 @@ def load_lyra_vae(repo_id: str = "AbstractPhil/vae-lyra", device: str = "cuda"):
 def load_lyra_vae_xl(
     repo_id: str = "AbstractPhil/vae-lyra-xl-adaptive-cantor-illustrious",
     device: str = "cuda"
 ):
     """Load Lyra VAE v2 (SDXL/Illustrious version) from HuggingFace."""
@@ -1037,7 +1038,9 @@ def load_lyra_vae_xl(
     print(f"🎵 Loading Lyra VAE v2 from {repo_id}...")
     try:
-        # Download config.json first to get model architecture
         print("  📥 Downloading config.json...")
         config_path = hf_hub_download(
             repo_id=repo_id,
@@ -1048,23 +1051,53 @@ def load_lyra_vae_xl(
         with open(config_path, 'r') as f:
             config_dict = json.load(f)
-        print(f"  ✓ Config loaded: {config_dict.get('fusion_strategy', 'unknown')} fusion")
-        # Download model weights
-        print("  📥 Downloading model.pt...")
         checkpoint_path = hf_hub_download(
             repo_id=repo_id,
-            filename="checkpoint_lyra_illustrious_37000.pt",
             repo_type="model"
         )
         checkpoint = torch.load(checkpoint_path, map_location="cpu")
-        # Build config from repo's config.json
         vae_config = LyraV2Config(
-            modality_dims=config_dict.get('modality_dims', {"clip_l": 768, "clip_g": 1280, "t5_xl_l": 2048, "t5_xl_g": 2048}),
-            modality_seq_lens=config_dict.get('modality_seq_lens', {"clip_l": 77, "clip_g": 77, "t5_xl_l": 512, "t5_xl_g": 512}),
-            binding_config=config_dict.get('binding_config'),
             latent_dim=config_dict.get('latent_dim', 2048),
             seq_len=config_dict.get('seq_len', 77),
             encoder_layers=config_dict.get('encoder_layers', 3),
@@ -1078,28 +1111,48 @@ def load_lyra_vae_xl(
             cantor_local_window=config_dict.get('cantor_local_window', 3),
             alpha_init=config_dict.get('alpha_init', 1.0),
             beta_init=config_dict.get('beta_init', 0.3),
         )
         lyra_model = LyraV2(vae_config)
-        # Load weights from checkpoint
-        if 'model_state_dict' in checkpoint:
-            lyra_model.load_state_dict(checkpoint['model_state_dict'])
-        else:
-            lyra_model.load_state_dict(checkpoint)
-        # Keep Lyra in float32 for stability - inputs will be upcast
         lyra_model.to(device)
         lyra_model.eval()
-        print(f"✅ Lyra VAE v2 loaded")
-        print(f"   Fusion: {config_dict.get('fusion_strategy')}")
-        print(f"   Latent dim: {config_dict.get('latent_dim')}")
-        print(f"   Hidden dim: {config_dict.get('hidden_dim')}")
         if 'global_step' in checkpoint:
-            print(f"   Step: {checkpoint['global_step']:,}")
         if 'best_loss' in checkpoint:
-            print(f"   Loss: {checkpoint['best_loss']:.4f}")
         return lyra_model
@@ -1132,9 +1185,9 @@ def initialize_pipeline(model_choice: str, device: str = "cuda"):
         # T5-XL for Lyra
         print("Loading T5-XL encoder...")
-        t5_tokenizer = T5Tokenizer.from_pretrained("google/t5-v1_1-xl")
         t5_encoder = T5EncoderModel.from_pretrained(
-            "google/t5-v1_1-xl",
             torch_dtype=torch.float16
         ).to(device)
         t5_encoder.eval()

 def load_lyra_vae_xl(
     repo_id: str = "AbstractPhil/vae-lyra-xl-adaptive-cantor-illustrious",
+    checkpoint_filename: str = None,  # Auto-detect if None
     device: str = "cuda"
 ):
     """Load Lyra VAE v2 (SDXL/Illustrious version) from HuggingFace."""
     print(f"🎵 Loading Lyra VAE v2 from {repo_id}...")
     try:
+        from huggingface_hub import list_repo_files
+        # Download config.json
         print("  📥 Downloading config.json...")
         config_path = hf_hub_download(
             repo_id=repo_id,
         with open(config_path, 'r') as f:
             config_dict = json.load(f)
+        print(f"  ✓ Config: {config_dict.get('fusion_strategy', 'unknown')} fusion, latent_dim={config_dict.get('latent_dim')}")
+        # Auto-detect checkpoint if not specified
+        if checkpoint_filename is None:
+            repo_files = list_repo_files(repo_id, repo_type="model")
+            checkpoint_files = [f for f in repo_files if f.endswith('.pt') or f.endswith('.safetensors')]
+            checkpoint_files = [f for f in checkpoint_files if 'checkpoint' in f.lower() or 'model' in f.lower()]
+            if not checkpoint_files:
+                raise FileNotFoundError(f"No checkpoint found in {repo_id}")
+            # Prefer newest checkpoint (highest step number)
+            def extract_step(name):
+                import re
+                match = re.search(r'(\d+)\.pt', name)
+                return int(match.group(1)) if match else 0
+            checkpoint_files.sort(key=extract_step, reverse=True)
+            checkpoint_filename = checkpoint_files[0]
+            print(f"  ✓ Auto-selected checkpoint: {checkpoint_filename}")
+        # Download checkpoint
+        print(f"  📥 Downloading {checkpoint_filename}...")
         checkpoint_path = hf_hub_download(
             repo_id=repo_id,
+            filename=checkpoint_filename,
             repo_type="model"
         )
         checkpoint = torch.load(checkpoint_path, map_location="cpu")
+        # Build config with all v2 fields
         vae_config = LyraV2Config(
+            modality_dims=config_dict.get('modality_dims', {
+                "clip_l": 768, "clip_g": 1280,
+                "t5_xl_l": 2048, "t5_xl_g": 2048
+            }),
+            modality_seq_lens=config_dict.get('modality_seq_lens', {
+                "clip_l": 77, "clip_g": 77,
+                "t5_xl_l": 512, "t5_xl_g": 512
+            }),
+            binding_config=config_dict.get('binding_config', {
+                "clip_l": {"t5_xl_l": 0.3},
+                "clip_g": {"t5_xl_g": 0.3},
+                "t5_xl_l": {},
+                "t5_xl_g": {}
+            }),
             latent_dim=config_dict.get('latent_dim', 2048),
             seq_len=config_dict.get('seq_len', 77),
             encoder_layers=config_dict.get('encoder_layers', 3),
             cantor_local_window=config_dict.get('cantor_local_window', 3),
             alpha_init=config_dict.get('alpha_init', 1.0),
             beta_init=config_dict.get('beta_init', 0.3),
+            alpha_lr_scale=config_dict.get('alpha_lr_scale', 0.1),
+            beta_lr_scale=config_dict.get('beta_lr_scale', 1.0),
+            beta_kl=config_dict.get('beta_kl', 0.1),
+            beta_reconstruction=config_dict.get('beta_reconstruction', 1.0),
+            beta_cross_modal=config_dict.get('beta_cross_modal', 0.0),
+            beta_alpha_regularization=config_dict.get('beta_alpha_regularization', 0.01),
+            kl_clamp_max=config_dict.get('kl_clamp_max', 1.0),
+            logvar_clamp_min=config_dict.get('logvar_clamp_min', -10.0),
+            logvar_clamp_max=config_dict.get('logvar_clamp_max', 10.0),
         )
+        # Initialize model
         lyra_model = LyraV2(vae_config)
+        # Load weights
+        state_dict = checkpoint.get('model_state_dict', checkpoint)
+        missing, unexpected = lyra_model.load_state_dict(state_dict, strict=False)
+        if missing:
+            print(f"  ⚠️ Missing keys: {len(missing)} (using initialized weights)")
+        if unexpected:
+            print(f"  ⚠️ Unexpected keys: {len(unexpected)} (ignored)")
         lyra_model.to(device)
         lyra_model.eval()
+        # Print summary
+        total_params = sum(p.numel() for p in lyra_model.parameters())
+        print(f"✅ Lyra VAE v2 loaded ({total_params/1e6:.1f}M params)")
+        print(f"   Fusion: {vae_config.fusion_strategy}")
+        print(f"   Latent: {vae_config.latent_dim}, Hidden: {vae_config.hidden_dim}")
         if 'global_step' in checkpoint:
+            print(f"   Trained steps: {checkpoint['global_step']:,}")
         if 'best_loss' in checkpoint:
+            print(f"   Best loss: {checkpoint['best_loss']:.4f}")
+        # Print binding info
+        fusion_params = lyra_model.get_fusion_params()
+        if fusion_params.get('alphas'):
+            alpha_vals = {k: torch.sigmoid(v).item() for k, v in fusion_params['alphas'].items()}
+            print(f"   Alphas: {alpha_vals}")
         return lyra_model
         # T5-XL for Lyra
         print("Loading T5-XL encoder...")
+        t5_tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
         t5_encoder = T5EncoderModel.from_pretrained(
+            "google/flan-t5-xl",
             torch_dtype=torch.float16
         ).to(device)
         t5_encoder.eval()