Amshaker
/

Mobile-O-1.5B

+from dataclasses import dataclass
+import torch
+from PIL import Image
+from transformers import AutoTokenizer
+from blip3o.model import *
+from peft import PeftModel
+import os
+from safetensors.torch import load_file
+import argparse
+from pathlib import Path
+import re
+@dataclass
+class T2IConfig:
+    # Base model path (original model before LoRA training)
+    #base_model_path: str = "/proj/cvl/users/x_fahkh2/BLIP3o_SANA/fastvlm-o/blip3o_fast_vlm_unified_v6_60k_blip3o_45k_sharegpt_quad_all_learnable_dynamic_lr_v5_27e_lora16_pretrain_without_sft_ve_learnable_v7_abl4"
+    #base_model_path: str = "/proj/cvl/users/x_fahkh2/BLIP3o_SANA/fastvlm-o/blip3o_fast_vlm_unified_v6_60k_blip3o_45k_sharegpt_quad_all_learnable_dynamic_lr_v5_7e_lora16_after_sft_pretrain_ve_learnable_v7_image_edit_512_v3_LLM_Lora"
+    base_model_path: str = "/proj/cvl/users/x_fahkh2/BLIP3o_SANA/fastvlm-o/blip3o_fast_vlm_unified_v6_60k_blip3o_45k_sharegpt_quad_all_learnable_dynamic_lr_v5_20e_lora16_after_sft_pretrain_v6_ve_learnable_v7_with_edit_1_5B"
+    dtype: torch.dtype = torch.bfloat16
+    # generation config
+    scale: int = 0
+    seq_len: int = 729
+    top_p: float = 0.95
+    top_k: int = 1200
+    # Set to True to use LoRA checkpoint, False to use base model only
+    use_lora_checkpoint: bool = True
+def find_latest_checkpoint(checkpoint_dir):
+    """
+    Find the latest checkpoint in the given directory.
+    Args:
+        checkpoint_dir: Path to the directory containing checkpoints
+    Returns:
+        Path to the latest checkpoint's global_step directory, or None if not found
+    """
+    checkpoint_path = Path(checkpoint_dir)
+    if not checkpoint_path.exists():
+        print(f"⚠️  Warning: Checkpoint directory does not exist: {checkpoint_dir}")
+        return None
+    # Find all checkpoint directories (format: checkpoint-XXXXX)
+    checkpoint_dirs = []
+    for item in checkpoint_path.iterdir():
+        if item.is_dir() and item.name.startswith("checkpoint-"):
+            # Extract the step number
+            match = re.match(r"checkpoint-(\d+)", item.name)
+            if match:
+                step_num = int(match.group(1))
+                checkpoint_dirs.append((step_num, item))
+    if not checkpoint_dirs:
+        print(f"⚠️  Warning: No checkpoint directories found in {checkpoint_dir}")
+        return None
+    # Sort by step number and get the latest
+    checkpoint_dirs.sort(key=lambda x: x[0], reverse=True)
+    latest_step, latest_dir = checkpoint_dirs[0]
+    latest_step=23620
+    # Look for global_step directory inside
+    global_step_dir = latest_dir / f"global_step{latest_step}"
+    if not global_step_dir.exists():
+        print(f"⚠️  Warning: global_step directory not found at {global_step_dir}")
+        return None
+    print(f"✓ Found latest checkpoint: {latest_dir.name} (step {latest_step})")
+    return str(global_step_dir)
+class TextToImageInference:
+    def __init__(self, config: T2IConfig):
+        self.config = config
+        self.device = 'cuda:0'#torch.device(config.device)
+        self._load_models()
+    def save_merged_model(self, output_path: str, deepspeed_checkpoint_path: str = None):
+        """
+        Merge LoRA weights with base model and save as a standalone model.
+        Handles DeepSpeed ZeRO checkpoints if provided.
+        Args:
+            output_path: Directory where the merged model will be saved
+            deepspeed_checkpoint_path: Path to DeepSpeed checkpoint directory (e.g., checkpoint-5719/global_step5719)
+        """
+        import shutil
+        from pathlib import Path
+        from transformers import AutoTokenizer
+        import torch
+        print(f"\n{'='*80}")
+        print("SAVING MERGED MODEL")
+        print(f"{'='*80}\n")
+        output_path = Path(output_path)
+        output_path.mkdir(parents=True, exist_ok=True)
+        # Step 0: Load DeepSpeed checkpoint if provided
+        if deepspeed_checkpoint_path is not None:
+            print("[0/5] Loading DeepSpeed checkpoint...")
+            deepspeed_checkpoint_path = Path(deepspeed_checkpoint_path)
+            # Check if zero_to_fp32.py exists
+            zero_script = deepspeed_checkpoint_path.parent / "zero_to_fp32.py"
+            if not zero_script.exists():
+                print(f"  ⚠️  zero_to_fp32.py not found at {zero_script}")
+                print("  Looking for consolidated checkpoint...")
+            # Try to load consolidated checkpoint
+            consolidated_path = deepspeed_checkpoint_path / "pytorch_model.bin"
+            if False:
+                print(f"  Loading consolidated checkpoint from {consolidated_path}")
+                deepspeed_state_dict = torch.load(consolidated_path, map_location='cpu')
+                print(f"  ✓ Loaded {len(deepspeed_state_dict)} parameters from DeepSpeed checkpoint")
+            else:
+                # Try to load from mp_rank_00_model_states.pt
+                model_states_path = deepspeed_checkpoint_path / "mp_rank_00_model_states.pt"
+                if model_states_path.exists():
+                    print(f"  Loading model states from {model_states_path}")
+                    checkpoint = torch.load(model_states_path, map_location='cpu')
+                    # Extract the actual model state dict (DeepSpeed wraps it)
+                    if 'module' in checkpoint:
+                        deepspeed_state_dict = checkpoint['module']
+                    elif 'model_state_dict' in checkpoint:
+                        deepspeed_state_dict = checkpoint['model_state_dict']
+                    else:
+                        deepspeed_state_dict = checkpoint
+                    print(f"  ✓ Loaded {len(deepspeed_state_dict)} parameters from DeepSpeed checkpoint")
+                else:
+                    print(f"  ⚠️  No consolidated checkpoint found. Please run:")
+                    print(f"     cd {deepspeed_checkpoint_path.parent}")
+                    print(f"     python zero_to_fp32.py {deepspeed_checkpoint_path.name} pytorch_model.bin")
+                    deepspeed_state_dict = None
+        else:
+            deepspeed_state_dict = None
+        # Check if model is a PEFT model (has LoRA)
+        from peft import PeftModel
+        if isinstance(self.model, PeftModel):
+            print("[1/5] Merging LoRA weights into base model...")
+            merged_model = self.model.merge_and_unload()
+            print("  ✓ LoRA weights merged")
+            # Move to CPU for saving to avoid CUDA memory issues
+            print("  Moving model to CPU for saving...")
+            merged_model = merged_model.cpu()
+        else:
+            print("[1/5] Model has no LoRA adapters, saving as-is...")
+            merged_model = self.model.cpu()
+        # Save the merged model - use state_dict method to avoid PEFT issues
+        print(f"\n[2/5] Preparing model state dict...")
+        # Get the base model config
+        if hasattr(merged_model, 'config'):
+            config = merged_model.config
+        else:
+            from transformers import AutoConfig
+            config = AutoConfig.from_pretrained(self.config.base_model_path, trust_remote_code=True)
+        # Get model state dict
+        state_dict = merged_model.state_dict()
+        # Merge with DeepSpeed checkpoint if available
+        if deepspeed_state_dict is not None:
+            print("  Merging with DeepSpeed checkpoint...")
+            # Remove 'module.' prefix if present (from DDP/DeepSpeed)
+            cleaned_deepspeed_dict = {}
+            for key, value in deepspeed_state_dict.items():
+                clean_key = key.replace('module.', '')
+                cleaned_deepspeed_dict[clean_key] = value
+            # Update state dict with DeepSpeed weights
+            # This will overwrite LoRA-merged weights with fully trained weights
+            for key, value in cleaned_deepspeed_dict.items():
+                if key in state_dict:
+                    state_dict[key] = value
+                else:
+                    # Add new parameters that might have been trained
+                    state_dict[key] = value
+            print(f"  ✓ Merged {len(cleaned_deepspeed_dict)} parameters from DeepSpeed")
+        # Remove any PEFT-related keys that might remain
+        keys_to_remove = []
+        for key in state_dict.keys():
+            if any(x in key for x in ['lora_', 'adapter_', 'peft_']):
+                keys_to_remove.append(key)
+        if keys_to_remove:
+            print(f"  Removing {len(keys_to_remove)} PEFT-related keys...")
+            for key in keys_to_remove:
+                del state_dict[key]
+        print(f"  ✓ Final state dict has {len(state_dict)} parameters")
+        # Save config
+        print(f"\n[3/5] Saving config to: {output_path}")
+        config.save_pretrained(str(output_path))
+        print("  ✓ Config saved")
+        # Save model weights using safetensors
+        print(f"\n[4/5] Saving model weights...")
+        from safetensors.torch import save_file
+        import math
+        # Split into shards if needed (5GB per shard)
+        max_shard_size = 5 * 1024 * 1024 * 1024  # 5GB in bytes
+        # Calculate approximate size
+        total_size = sum(v.numel() * v.element_size() for v in state_dict.values())
+        if total_size > max_shard_size:
+            print(f"  Model size: {total_size / 1024**3:.2f}GB, splitting into shards...")
+            num_shards = math.ceil(total_size / max_shard_size)
+            # Split state dict into shards
+            keys = list(state_dict.keys())
+            shard_size = len(keys) // num_shards + 1
+            # Create index file for sharded model
+            weight_map = {}
+            for i in range(num_shards):
+                shard_keys = keys[i * shard_size:(i + 1) * shard_size]
+                shard_dict = {k: state_dict[k] for k in shard_keys}
+                shard_filename = f"model-{i+1:05d}-of-{num_shards:05d}.safetensors"
+                save_file(shard_dict, str(output_path / shard_filename))
+                # Update weight map
+                for k in shard_keys:
+                    weight_map[k] = shard_filename
+                print(f"  ✓ Saved shard {i+1}/{num_shards}: {shard_filename}")
+            # Save index file
+            import json
+            index = {
+                "metadata": {"total_size": total_size},
+                "weight_map": weight_map
+            }
+            with open(output_path / "model.safetensors.index.json", "w") as f:
+                json.dump(index, f, indent=2)
+            print("  ✓ Saved model index")
+        else:
+            print(f"  Model size: {total_size / 1024**3:.2f}GB, saving in single file...")
+            save_file(state_dict, str(output_path / "model.safetensors"))
+            print("  ✓ Model weights saved")
+        # Save tokenizer
+        print("\n[5/5] Saving tokenizer and additional files...")
+        tokenizer = AutoTokenizer.from_pretrained(self.config.base_model_path)
+        tokenizer.save_pretrained(str(output_path))
+        print("  ✓ Tokenizer saved")
+        # Copy additional files
+        base_path = Path(self.config.base_model_path)
+        # Copy Python files (modeling, configuration, processing)
+        print("  Copying Python files...")
+        for py_file in base_path.glob("*.py"):
+            if any(x in py_file.name.lower() for x in ["modeling", "configuration", "processing", "image"]):
+                try:
+                    shutil.copy2(py_file, output_path / py_file.name)
+                    print(f"    - {py_file.name}")
+                except Exception as e:
+                    print(f"    ⚠️  Failed to copy {py_file.name}: {e}")
+        # Copy projector files if they exist
+        print("  Checking for projector files...")
+        search_paths = [
+            base_path,
+            base_path / "merged_model",
+        ]
+        # Add checkpoint path if it exists
+        if hasattr(self.config, 'lora_checkpoint_path') and self.config.lora_checkpoint_path:
+            search_paths.append(Path(self.config.lora_checkpoint_path))
+        projector_files = ["mm_projector.bin", "gen_projector.bin"]
+        for bin_file in projector_files:
+            found = False
+            for search_path in search_paths:
+                if search_path is None or not search_path.exists():
+                    continue
+                src = search_path / bin_file
+                if src.exists():
+                    shutil.copy2(src, output_path / bin_file)
+                    print(f"    - {bin_file}")
+                    found = True
+                    break
+            if not found:
+                # Check if it's in the state dict instead
+                if any(bin_file.replace('.bin', '') in key for key in state_dict.keys()):
+                    print(f"    ℹ️  {bin_file} weights are in model state dict")
+                else:
+                    print(f"    ⚠️  {bin_file} not found (may not be needed)")
+        # Copy config files
+        print("  Copying additional config files...")
+        for json_file in ["generation_config.json", "preprocessor_config.json"]:
+            src = base_path / json_file
+            if src.exists():
+                shutil.copy2(src, output_path / json_file)
+                print(f"    - {json_file}")
+        print(f"\n{'='*80}")
+        print("✅ MODEL SAVED SUCCESSFULLY!")
+        print(f"{'='*80}")
+        print(f"\nMerged model saved to: {output_path}")
+        print(f"Total parameters: {len(state_dict):,}")
+        print(f"Model size: {total_size / 1024**3:.2f}GB")
+        if deepspeed_state_dict is not None:
+            print("\n⚠️  Note: This model includes weights from DeepSpeed checkpoint")
+        print("\nYou can now load it with:")
+        print(f"  from transformers import AutoModelForCausalLM")
+        print(f"  model = AutoModelForCausalLM.from_pretrained('{output_path}', trust_remote_code=True)")
+        print(f"\nOr with your custom class:")
+        print(f"  model = blip3oFastForInferenceLM.from_pretrained('{output_path}')")
+        print(f"\n{'='*80}\n")
+    def _load_deepspeed_checkpoint(self, model, checkpoint_dir):
+        """Load DeepSpeed checkpoint with full model states"""
+        print(f"Loading DeepSpeed checkpoint from: {checkpoint_dir}")
+        # Path to the consolidated model states
+        global_step_dir = os.path.join(checkpoint_dir, "checkpoint-23620/global_step23620")
+        model_state_path = os.path.join(global_step_dir, "mp_rank_00_model_states.pt")
+        if not os.path.exists(model_state_path):
+            print(f"⚠️  Warning: Model states not found at {model_state_path}")
+            print("    Using zero_to_fp32.py to consolidate checkpoint...")
+            # Try to use zero_to_fp32.py to consolidate
+            import subprocess
+            zero_script = os.path.join(checkpoint_dir, "zero_to_fp32.py")
+            if os.path.exists(zero_script):
+                output_path = os.path.join(checkpoint_dir, "pytorch_model.bin")
+                subprocess.run([
+                    "python", zero_script,
+                    checkpoint_dir,
+                    output_path
+                ])
+                model_state_path = output_path
+            else:
+                print("    zero_to_fp32.py not found, skipping full checkpoint loading")
+                return model
+        # Load the checkpoint
+        print(f"Loading model states from: {model_state_path}")
+        checkpoint = torch.load(model_state_path, map_location="cpu")
+        # Extract the actual state dict (DeepSpeed wraps it)
+        if "module" in checkpoint:
+            state_dict = checkpoint["module"]
+        elif isinstance(checkpoint, dict) and "state_dict" in checkpoint:
+            state_dict = checkpoint["state_dict"]
+        else:
+            state_dict = checkpoint
+        # Load non-LoRA weights (DiT, projectors, vision tower, etc.)
+        # We'll load these into the base model before applying LoRA
+        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+        print(f"✓ Loaded checkpoint successfully")
+        if missing_keys:
+            print(f"  Missing keys (expected for LoRA): {len(missing_keys)}")
+            # Show first few missing keys
+            for key in missing_keys[:5]:
+                print(f"    - {key}")
+            if len(missing_keys) > 5:
+                print(f"    ... and {len(missing_keys) - 5} more")
+        if unexpected_keys:
+            print(f"  Unexpected keys: {len(unexpected_keys)}")
+            for key in unexpected_keys[:5]:
+                print(f"    - {key}")
+        return model
+    def _load_models(self):
+        """Load model with LoRA adapters and full checkpoint weights"""
+        print("=" * 80)
+        if self.config.use_lora_checkpoint:
+            print(f"Loading base model from: {self.config.base_model_path}")
+            print(f"Loading LoRA checkpoint from: {self.config.lora_checkpoint_path}")
+        else:
+            print(f"Loading model without LoRA from: {self.config.base_model_path}")
+        print("=" * 80)
+        # Step 1: Load base model architecture
+        print("\n[1/4] Loading base model architecture...")
+        base_model = blip3oFastForInferenceLM.from_pretrained(
+            self.config.base_model_path,
+            torch_dtype=self.config.dtype,
+            device_map="cpu",  # Load to CPU first for checkpoint loading
+        )
+        print("✓ Base model loaded")
+        if self.config.use_lora_checkpoint:
+            # Step 2: Load full checkpoint weights (DiT, projectors, etc.)
+            print("\n[2/4] Loading full checkpoint weights (DiT, projectors, etc.)...")
+            base_model = self._load_deepspeed_checkpoint(
+                base_model,
+                self.config.lora_checkpoint_path
+            )
+            # Step 3: Apply LoRA adapters on top
+            print("\n[3/4] Applying LoRA adapters...")
+            self.model = PeftModel.from_pretrained(
+                base_model,
+                self.config.lora_checkpoint_path,
+                torch_dtype=self.config.dtype,
+            )
+            print("✓ LoRA adapters applied successfully!")
+            # Print parameter info
+            lora_params = sum(p.numel() for n, p in self.model.named_parameters() if "lora" in n.lower())
+            total_params = sum(p.numel() for p in self.model.parameters())
+            print(f"  LoRA parameters: {lora_params:,} ({100 * lora_params / total_params:.2f}%)")
+        else:
+            self.model = base_model
+        # Step 4: Move to device and set to eval mode
+        print("\n[4/4] Moving model to device and setting eval mode...")
+        self.model = self.model.to(self.device)
+        self.model.eval()
+        print(f"✓ Model on {self.device}")
+        # Load tokenizer from checkpoint (has all the special tokens)
+        tokenizer_path = self.config.lora_checkpoint_path if self.config.use_lora_checkpoint else self.config.base_model_path
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+        print(f"✓ Tokenizer loaded from: {tokenizer_path}")
+        print("=" * 80)
+        print("\n✅ Model loading complete! Ready for inference.\n")
+    def generate_image(self, prompt, steps=30) -> Image.Image:
+        """Generate image from text prompt"""
+        batch_messages = []
+        messages = [
+            {"role": "system", "content": "You are a helpful assistant."},
+            {"role": "user", "content": f"Please generate image based on the following caption: {prompt}"}
+        ]
+        input_text = self.tokenizer.apply_chat_template(
+            messages,
+            tokenize=False,
+            add_generation_prompt=True
+        )
+        #input_text += f"<im_start>"
+        batch_messages.append(input_text)
+        inputs = self.tokenizer(
+            batch_messages,
+            return_tensors="pt",
+            padding=True,
+            truncation=True,
+            padding_side="left"
+        )
+        with torch.no_grad():
+            output_image = self.model.generate_image(
+                inputs.input_ids.to(self.device),
+                inputs.attention_mask.to(self.device),
+                #steps=steps
+            )
+        return output_image[0]
+def consolidate_checkpoint_first(checkpoint_dir):
+    """
+    Consolidate DeepSpeed checkpoint before loading.
+    Run this once if you get errors loading the checkpoint.
+    """
+    import subprocess
+    print("=" * 80)
+    print("Consolidating DeepSpeed checkpoint...")
+    print("=" * 80)
+    zero_script = os.path.join(checkpoint_dir, "zero_to_fp32.py")
+    output_path = os.path.join(checkpoint_dir, "pytorch_model.bin")
+    if not os.path.exists(zero_script):
+        print(f"❌ zero_to_fp32.py not found at {zero_script}")
+        return False
+    print(f"Input: {checkpoint_dir}")
+    print(f"Output: {output_path}")
+    result = subprocess.run(
+        ["python", zero_script, checkpoint_dir, output_path],
+        capture_output=True,
+        text=True
+    )
+    if result.returncode == 0:
+        print(f"✓ Checkpoint consolidated successfully to {output_path}")
+        return True
+    else:
+        print(f"❌ Error consolidating checkpoint:")
+        print(result.stderr)
+        return False
+def main():
+    """Generate images with different inference steps"""
+    # Parse command line arguments
+    parser = argparse.ArgumentParser(description="Merge BLIP3o LoRA model with base model")
+    parser.add_argument(
+        "--checkpoint_dir",
+        type=str,
+        required=True,
+        help="Path to the checkpoint directory (e.g., blip3o_fast_vlm_unified_v6_60k_...)"
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default=None,
+        help="Output directory for merged model (default: {checkpoint_dir}/final_merged_model_{step})"
+    )
+    parser.add_argument(
+        "--skip_inference",
+        action="store_true",
+        help="Skip image generation and only save merged model"
+    )
+    args = parser.parse_args()
+    checkpoint_dir = args.checkpoint_dir
+    # Find the latest checkpoint
+    print(f"\n{'='*80}")
+    print(f"Searching for latest checkpoint in: {checkpoint_dir}")
+    print(f"{'='*80}\n")
+    latest_checkpoint = find_latest_checkpoint(checkpoint_dir)
+    if latest_checkpoint is None:
+        print("❌ Error: Could not find any valid checkpoints")
+        return
+    # Extract step number from checkpoint path
+    step_match = re.search(r"global_step(\d+)", latest_checkpoint)
+    if step_match:
+        step_num = step_match.group(1)
+    else:
+        step_num = "unknown"
+    # Set output directory
+    if args.output_dir is None:
+        output_dir = f"{checkpoint_dir}/final_merged_model_{step_num}"
+    else:
+        output_dir = args.output_dir
+    print(f"Output directory: {output_dir}\n")
+    # Update config with checkpoint directory
+    config = T2IConfig()
+    config.base_model_path = checkpoint_dir
+    config.lora_checkpoint_path = checkpoint_dir
+    # Initialize inference
+    inference = TextToImageInference(config)
+    # Save merged model
+    inference.save_merged_model(output_dir, deepspeed_checkpoint_path=latest_checkpoint)
+    if args.skip_inference:
+        print("\n✅ Merged model saved. Skipping inference as requested.")
+        return
+    # Generate test images
+    prompts = [
+        'A surreal scene on a lunar-like surface, where a brown horse is standing on the back of an astronaut...',
+        "a photo of four cute cats",
+        "a photo of five cute dogs",
+        "a photo of a horse",
+        "a photo of a tiger",
+        "a photo of a wolf",
+        "a beautiful mountain landscape"
+    ]
+    inference_steps = [20]
+    image_output_dir = f"Fast-SANA-LoRA-Full-{step_num}"
+    os.makedirs(image_output_dir, exist_ok=True)
+    all_images = []
+    # Generate images
+    print("\n" + "=" * 80)
+    print("Starting image generation...")
+    print("=" * 80)
+    for idx, prompt in enumerate(prompts):
+        print(f"\n[Prompt {idx+1}/{len(prompts)}] {prompt[:60]}...")
+        row_images = []
+        for inf in inference_steps:
+            print(f"  Generating with {inf} steps...", end=" ")
+            image_sana = inference.generate_image(prompt, steps=inf)
+            save_path = os.path.join(image_output_dir, f"prompt{idx:02d}_steps{inf}.png")
+            image_sana.save(save_path)
+            print(f"✓ Saved")
+            row_images.append(image_sana)
+        all_images.append(row_images)
+    # Create grid visualization
+    print("\n" + "=" * 80)
+    print("Creating grid visualization...")
+    print("=" * 80)
+    import matplotlib.pyplot as plt
+    fig, axes = plt.subplots(len(prompts), len(inference_steps), figsize=(15, 10))
+    for i, row_images in enumerate(all_images):
+        for j, img in enumerate(row_images):
+            if len(inference_steps) == 1:
+                ax = axes[i]
+            else:
+                ax = axes[i, j]
+            ax.imshow(img)
+            ax.axis("off")
+            if i == 0:
+                ax.set_title(f"{inference_steps[j]} steps", fontsize=10)
+    plt.tight_layout()
+    grid_path = os.path.join(image_output_dir, "grid_results.png")
+    plt.savefig(grid_path, dpi=150, bbox_inches='tight')
+    print(f"✓ Grid saved: {grid_path}")
+    plt.close()
+    print("\n✅ All done! Check the '{}' folder for results.".format(image_output_dir))
+def compare_base_vs_lora():
+    """Compare base model vs LoRA-trained model outputs"""
+    import matplotlib.pyplot as plt
+    test_prompts = [
+        "a photo of a cute cat",
+        "a beautiful mountain landscape",
+        "a tiger in the forest"
+    ]
+    num_inference_steps = 20
+    for model_type in ["base", "lora"]:
+        config = T2IConfig()
+        config.use_lora_checkpoint = (model_type == "lora")
+        output_dir = f"comparison_{model_type}"
+        os.makedirs(output_dir, exist_ok=True)
+        print(f"\n{'='*80}")
+        print(f"Generating with {model_type.upper()} model")
+        print(f"{'='*80}")
+        inference = TextToImageInference(config)
+        for idx, prompt in enumerate(test_prompts):
+            print(f"\n[{idx+1}/{len(test_prompts)}] {prompt}")
+            image = inference.generate_image(prompt, num_inference_steps=num_inference_steps)
+            save_path = os.path.join(output_dir, f"{model_type}_prompt{idx:02d}.png")
+            image.save(save_path)
+            print(f"✓ Saved: {save_path}")
+        # Clean up to free memory
+        del inference
+        torch.cuda.empty_cache()
+    print("\n✅ Comparison complete!")
+    print("Check 'comparison_base' and 'comparison_lora' folders")
+if __name__ == "__main__":
+    main()