Spaces:

alpercagann
/

SonicDiffusionClean

Runtime error

App Files Files Community

alpercagann commited on Apr 10, 2025

Commit

dc72d06

1 Parent(s): 3747436

Add torch to requirements

Browse files

Files changed (13) hide show

DIFFUSERS_COMPATIBILITY.md +65 -0
Dockerfile +27 -0
app.py +8 -1
attention_custom.py +129 -4
compatibility_patches.py +56 -0
patch_diffusers.sh +13 -0
pipeline_stable_diffusion_custom.py +96 -14
requirements.txt +3 -3
test_imports.py +29 -0
test_pipeline.py +45 -0
transformer_2d_custom.py +48 -2
unet2d_custom.py +78 -16
unet_2d_blocks_custom.py +127 -15

DIFFUSERS_COMPATIBILITY.md ADDED Viewed

	@@ -0,0 +1,65 @@

+# Diffusers Compatibility Issues
+## Overview
+This document outlines compatibility issues with the SonicDiffusion project and the diffusers library version 0.21.4.
+## Identified Issues
+The project requires components from newer versions of diffusers that are not available in 0.21.4, including:
+1. `IPAdapterMixin` in `diffusers.loaders`
+2. `FromSingleFileMixin` in `diffusers.loaders`
+3. `PeftAdapterMixin` in `diffusers.loaders`
+4. `USE_PEFT_BACKEND` in `diffusers.utils`
+5. `apply_freeu` in `diffusers.utils.torch_utils`
+6. `AdaGroupNorm` in `diffusers.models.normalization`
+7. `ResnetBlockCondNorm2D` in `diffusers.models.resnet`
+8. `DualTransformer2DModel` in `diffusers.models.transformers.dual_transformer_2d`
+9. `GEGLU`, `GELU`, `ApproximateGELU` in `diffusers.models.activations`
+10. `ImagePositionalEmbeddings`, `PatchEmbed`, `PixArtAlphaTextProjection` in `diffusers.models.embeddings`
+11. `AdaLayerNormSingle` in `diffusers.models.normalization`
+12. `StableDiffusionMixin` in `diffusers.pipelines.pipeline_utils`
+## Solutions
+We've implemented several fixes for compatibility:
+1. Added dummy implementations for missing classes
+2. Added fallback imports with try/except blocks
+3. Simplified implementations of complex components
+4. Worked around limitations in the older diffusers API
+## Recommended Approach
+For a more reliable fix, you should:
+1. **Update diffusers**: Upgrade to a newer version (we recommend at least 0.25.0)
+    ```bash
+    pip install 'diffusers>=0.25.0'
+    ```
+2. **Update related packages**: Ensure complementary packages are also updated
+    ```bash
+    pip install 'transformers>=4.36.0' 'accelerate>=0.25.0'
+    ```
+3. **Alternative approach**: If you cannot update diffusers, try using a standalone version without integration with HuggingFace:
+    - Modify the controller.py to use explicit PyTorch components without requiring diffusers for direct audio-to-image conversion
+    - Use a pre-trained model with your own implementation of the pipeline
+## Error Handling for Gradio
+There are also issues with Gradio compatibility. The simplest solution is:
+```bash
+pip install 'gradio>=4.19.0,<4.27.0'
+```
+When running the app, use:
+```python
+demo.launch(server_name="0.0.0.0", share=True)
+```
+This helps prevent the localhost access error and creates a shareable link.

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    git \
+    ffmpeg \
+    libsndfile1 \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies with pinned versions
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Create necessary directories
+RUN mkdir -p assets ckpts outputs
+# Expose port for Gradio
+EXPOSE 7860
+# Command to run the application
+CMD ["python", "app.py"]

app.py CHANGED Viewed

@@ -1,6 +1,12 @@
 import os
 import sys
 # Print environment information
 print("==== Environment Information ====")
 print(f"Python version: {sys.version}")
@@ -181,4 +187,5 @@ with gr.Blocks(title="SonicDiffusion") as demo:
                 )
 if __name__ == "__main__":
-    demo.launch()

 import os
 import sys
+# Apply compatibility patches first
+try:
+    import compatibility_patches
+except ImportError:
+    print("Warning: compatibility_patches not found")
 # Print environment information
 print("==== Environment Information ====")
 print(f"Python version: {sys.version}")
                 )
 if __name__ == "__main__":
+    # Change the server parameters
+    demo.launch(server_name="0.0.0.0", share=True)

attention_custom.py CHANGED Viewed

@@ -1,17 +1,142 @@
 # Adapted from https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention.py
 from typing import Any, Dict, Optional
 import torch
 import torch.nn.functional as F
 from torch import nn
 from diffusers.utils import deprecate, logging
-from diffusers.utils.torch_utils import maybe_allow_in_graph
-from diffusers.models.activations import GEGLU, GELU, ApproximateGELU
 from diffusers.models.attention_processor import Attention
-from diffusers.models.embeddings import SinusoidalPositionalEmbedding
-from diffusers.models.normalization import AdaLayerNorm, AdaLayerNormContinuous, AdaLayerNormZero, RMSNorm
 logger = logging.get_logger(__name__)

 # Adapted from https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention.py
 from typing import Any, Dict, Optional
+import math
 import torch
 import torch.nn.functional as F
 from torch import nn
 from diffusers.utils import deprecate, logging
+# Import maybe_allow_in_graph or define if not available
+try:
+    from diffusers.utils.torch_utils import maybe_allow_in_graph
+except ImportError:
+    def maybe_allow_in_graph(fn):
+        """Dummy decorator for compatibility with older diffusers versions"""
+        return fn
+# Define activation functions since they're not available in this version of diffusers
+# GELU activation
+class GELU(nn.Module):
+    """
+    Custom implementation of GELU activation for compatibility with older diffusers versions.
+    See https://arxiv.org/abs/1606.08415 for details.
+    """
+    def forward(self, input):
+        return F.gelu(input)
+# Approximate GELU
+class ApproximateGELU(nn.Module):
+    """
+    Custom implementation of Approximate GELU activation for compatibility with older diffusers versions.
+    """
+    def forward(self, input):
+        return 0.5 * input * (1 + torch.tanh(math.sqrt(2 / math.pi) * (input + 0.044715 * torch.pow(input, 3))))
+# GEGLU activation
+class GEGLU(nn.Module):
+    """
+    Custom implementation of GEGLU activation for compatibility with older diffusers versions.
+    See https://arxiv.org/abs/2002.05202 for more details.
+    """
+    def __init__(self, dim_in, dim_out):
+        super().__init__()
+        self.proj = nn.Linear(dim_in, dim_out * 2)
+        self.dim_out = dim_out
+    def forward(self, hidden_states):
+        hidden_states, gate = self.proj(hidden_states).chunk(2, dim=-1)
+        return hidden_states * F.gelu(gate)
 from diffusers.models.attention_processor import Attention
+# Import embeddings with fallbacks
+try:
+    from diffusers.models.embeddings import SinusoidalPositionalEmbedding
+except ImportError:
+    # Define a simple SinusoidalPositionalEmbedding
+    class SinusoidalPositionalEmbedding(nn.Module):
+        """
+        Custom implementation of SinusoidalPositionalEmbedding for compatibility with older diffusers versions.
+        """
+        def __init__(self, dim, max_seq_length=5000):
+            super().__init__()
+            self.dim = dim
+            self.max_seq_length = max_seq_length
+        def forward(self, seq_length):
+            position = torch.arange(seq_length, device=seq_length.device)
+            dim_t = torch.arange(self.dim // 2, device=seq_length.device)
+            dim_t = 10000 ** (2 * (dim_t) / self.dim)
+            x = position[:, None] / dim_t[None, :]
+            embeddings = torch.cat((torch.sin(x), torch.cos(x)), dim=1)
+            if self.dim % 2 == 1:  # if odd, add zero padding
+                embeddings = torch.cat((embeddings, torch.zeros_like(embeddings[:, :1])), dim=1)
+            return embeddings.to(seq_length.device)
+# Import normalization layers with fallbacks
+try:
+    from diffusers.models.normalization import AdaLayerNorm, AdaLayerNormContinuous, AdaLayerNormZero, RMSNorm
+except ImportError:
+    # Define simple versions for compatibility
+    class AdaLayerNorm(nn.Module):
+        """
+        Custom implementation of AdaLayerNorm for compatibility with older diffusers versions.
+        """
+        def __init__(self, embedding_dim, num_embeddings=None):
+            super().__init__()
+            self.emb = nn.Linear(embedding_dim, embedding_dim * 2)
+            self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False)
+        def forward(self, x, emb):
+            shift, scale = self.emb(emb).chunk(2, dim=1)
+            x = self.norm(x)
+            return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
+    class AdaLayerNormContinuous(nn.Module):
+        """
+        Custom implementation of AdaLayerNormContinuous for compatibility with older diffusers versions.
+        """
+        def __init__(self, embedding_dim):
+            super().__init__()
+            self.emb = nn.Linear(embedding_dim, embedding_dim * 2)
+            self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False)
+        def forward(self, x, emb):
+            shift, scale = self.emb(emb).chunk(2, dim=1)
+            x = self.norm(x)
+            return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
+    class AdaLayerNormZero(nn.Module):
+        """
+        Custom implementation of AdaLayerNormZero for compatibility with older diffusers versions.
+        """
+        def __init__(self, embedding_dim):
+            super().__init__()
+            self.emb = nn.Linear(embedding_dim, embedding_dim * 2)
+            self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False)
+        def forward(self, x, emb):
+            shift, scale = self.emb(emb).chunk(2, dim=1)
+            x = self.norm(x)
+            return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
+    class RMSNorm(nn.Module):
+        """
+        Custom implementation of RMSNorm for compatibility with older diffusers versions.
+        """
+        def __init__(self, dim, eps=1e-6):
+            super().__init__()
+            self.scale = dim ** 0.5
+            self.eps = eps
+            self.g = nn.Parameter(torch.ones(dim))
+        def forward(self, x):
+            return x * self.g / torch.norm(x, dim=-1, keepdim=True).clamp(min=self.eps) * self.scale
 logger = logging.get_logger(__name__)

compatibility_patches.py ADDED Viewed

	@@ -0,0 +1,56 @@

+"""
+Compatibility patches for huggingface_hub and diffusers
+"""
+import sys
+import importlib
+from functools import wraps
+# Check if huggingface_hub is installed
+try:
+    import huggingface_hub
+    # Add the cached_download function if it doesn't exist
+    if not hasattr(huggingface_hub, 'cached_download'):
+        def cached_download(*args, **kwargs):
+            """Compatibility function to replace cached_download"""
+            # Use the newer hf_hub_download function
+            return huggingface_hub.hf_hub_download(*args, **kwargs)
+        # Add the missing function to the module
+        huggingface_hub.cached_download = cached_download
+except ImportError:
+    print("huggingface_hub not found, skipping patch")
+# Patch for diffusers dynamic_modules_utils.py
+try:
+    import diffusers.utils.dynamic_modules_utils as dmu
+    # Store the original import statement
+    original_import = dmu.__import__
+    # Define a wrapper for __import__
+    @wraps(original_import)
+    def patched_import(name, *args, **kwargs):
+        try:
+            return original_import(name, *args, **kwargs)
+        except ImportError as e:
+            if 'cached_download' in str(e) and name == 'huggingface_hub':
+                # Import the module without the missing function
+                mod = importlib.import_module(name)
+                # Add the missing function
+                if not hasattr(mod, 'cached_download'):
+                    def cached_download(*args, **kwargs):
+                        return mod.hf_hub_download(*args, **kwargs)
+                    mod.cached_download = cached_download
+                return mod
+            raise
+    # Apply the patch
+    dmu.__import__ = patched_import
+except ImportError:
+    print("diffusers.utils.dynamic_modules_utils not found, skipping patch")

patch_diffusers.sh ADDED Viewed

	@@ -0,0 +1,13 @@

+#!/bin/bash
+# Run this script to patch the dynamic_modules_utils.py file
+SITE_PACKAGES=$(python -c "import site; print(site.getsitepackages()[0])")
+DMU_FILE="$SITE_PACKAGES/diffusers/utils/dynamic_modules_utils.py"
+# Create a backup
+cp "$DMU_FILE" "${DMU_FILE}.bak"
+# Replace the import statement
+sed -i 's/from huggingface_hub import cached_download, hf_hub_download, model_info/from huggingface_hub import hf_hub_download, model_info\n\ndef cached_download(*args, **kwargs):\n    """Compatibility wrapper for hf_hub_download"""\n    return hf_hub_download(*args, **kwargs)/g' "$DMU_FILE"
+echo "Patched $DMU_FILE"

pipeline_stable_diffusion_custom.py CHANGED Viewed

@@ -4,29 +4,110 @@ import inspect
 from typing import Any, Callable, Dict, List, Optional, Union
 import torch
 from packaging import version
 from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
 from diffusers.configuration_utils import FrozenDict
 from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
-from diffusers.loaders import  IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin, FromSingleFileMixin
-from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel
 from diffusers.models.lora import adjust_lora_scale_text_encoder
 from diffusers.schedulers import KarrasDiffusionSchedulers
-from diffusers.utils import (
-    USE_PEFT_BACKEND,
-    deprecate,
-    logging,
-    replace_example_docstring,
-    scale_lora_layers,
-    unscale_lora_layers,
-)
 from diffusers.utils.torch_utils import randn_tensor
-from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
-from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
-from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
@@ -104,6 +185,7 @@ def retrieve_timesteps(
     return timesteps, num_inference_steps
 class StableDiffusionPipeline(
     DiffusionPipeline,
     StableDiffusionMixin,

 from typing import Any, Callable, Dict, List, Optional, Union
 import torch
+import torch.nn as nn
 from packaging import version
 from transformers import CLIPImageProcessor, CLIPTextModel, CLIPTokenizer, CLIPVisionModelWithProjection
+# Import ModelMixin and ConfigMixin for our custom classes
+from diffusers.configuration_utils import ConfigMixin
+from diffusers.models.modeling_utils import ModelMixin
+from diffusers.utils import BaseOutput
 from diffusers.configuration_utils import FrozenDict
 from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
+# Modified to handle older diffusers versions (0.21.4)
+try:
+    from diffusers.loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin, FromSingleFileMixin
+except ImportError:
+    # Create dummy classes for missing imports
+    from diffusers.loaders import LoraLoaderMixin, TextualInversionLoaderMixin
+    # Define dummy mixins for backward compatibility
+    class IPAdapterMixin:
+        """Dummy IPAdapterMixin for compatibility with older diffusers versions."""
+        pass
+    class FromSingleFileMixin:
+        """Dummy FromSingleFileMixin for compatibility with older diffusers versions."""
+        pass
+# Import models with fallback for older diffusers versions
+try:
+    from diffusers.models import AutoencoderKL, ImageProjection, UNet2DConditionModel
+except ImportError:
+    from diffusers.models import AutoencoderKL, UNet2DConditionModel
+    # Define dummy class for compatibility
+    class ImageProjection(nn.Module):
+        """Dummy ImageProjection for compatibility with older diffusers versions."""
+        def __init__(self, image_embed_dim=None, cross_attention_dim=None):
+            super().__init__()
+            self.image_embed_dim = image_embed_dim
+            self.cross_attention_dim = cross_attention_dim
 from diffusers.models.lora import adjust_lora_scale_text_encoder
 from diffusers.schedulers import KarrasDiffusionSchedulers
+# Check if USE_PEFT_BACKEND is available in diffusers
+try:
+    from diffusers.utils import (
+        USE_PEFT_BACKEND,
+        deprecate,
+        logging,
+        replace_example_docstring,
+        scale_lora_layers,
+        unscale_lora_layers,
+    )
+except ImportError:
+    from diffusers.utils import deprecate, logging
+    # Define placeholders for missing utilities
+    USE_PEFT_BACKEND = False
+    def replace_example_docstring(example_docstring):
+        """Dummy function for compatibility with older diffusers versions."""
+        def decorator(fn):
+            return fn
+        return decorator
+    def scale_lora_layers(model, weight):
+        """Dummy function for compatibility with older diffusers versions."""
+        pass
+    def unscale_lora_layers(model, weight):
+        """Dummy function for compatibility with older diffusers versions."""
+        pass
 from diffusers.utils.torch_utils import randn_tensor
+# Import pipeline utils with fallbacks
+try:
+    from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
+except ImportError:
+    from diffusers.pipelines.pipeline_utils import DiffusionPipeline
+    # Create a minimal StableDiffusionMixin for compatibility
+    class StableDiffusionMixin:
+        """Custom implementation of StableDiffusionMixin for compatibility with older diffusers versions."""
+        pass
+# Import pipeline output and safety checker
+try:
+    from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
+    from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker
+except ImportError:
+    # Define custom StableDiffusionPipelineOutput for compatibility
+    class StableDiffusionPipelineOutput(BaseOutput):
+        """Custom implementation for compatibility with older diffusers versions."""
+        images: torch.FloatTensor
+        nsfw_content_detected: Optional[List[bool]]
+    # Define custom StableDiffusionSafetyChecker for compatibility
+    class StableDiffusionSafetyChecker(ModelMixin, ConfigMixin):
+        """Custom implementation for compatibility with older diffusers versions."""
+        def __init__(self, *args, **kwargs):
+            super().__init__()
+        def forward(self, images, clip_input):
+            return images, [False] * len(images)
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
     return timesteps, num_inference_steps
+# Try to determine what mixins are available in the installed diffusers version
 class StableDiffusionPipeline(
     DiffusionPipeline,
     StableDiffusionMixin,

requirements.txt CHANGED Viewed

@@ -1,12 +1,12 @@
-gradio>=4.0.0
 requests>=2.30.0
 tqdm>=4.66.0
 torch==2.0.1
 transformers>=4.30.0,<4.36.0
 diffusers==0.21.4
-huggingface_hub==0.23.1
 accelerate>=0.24.0
 einops>=0.7.0
 omegaconf>=2.0.0
 librosa>=0.9.0
-soundfile>=0.12.0

+gradio>=4.0.0,<5.0.0
 requests>=2.30.0
 tqdm>=4.66.0
 torch==2.0.1
 transformers>=4.30.0,<4.36.0
 diffusers==0.21.4
+huggingface_hub==0.16.4
 accelerate>=0.24.0
 einops>=0.7.0
 omegaconf>=2.0.0
 librosa>=0.9.0
+soundfile>=0.12.0

test_imports.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import sys
+print("Python version:", sys.version)
+print("Python path:", sys.path)
+try:
+    import diffusers
+    print("Diffusers version:", diffusers.__version__)
+    # Try importing specific classes from diffusers
+    from diffusers.configuration_utils import FrozenDict
+    print("Successfully imported FrozenDict")
+    from diffusers.loaders import IPAdapterMixin, LoraLoaderMixin, TextualInversionLoaderMixin
+    print("Successfully imported mixins")
+    from diffusers.models import AutoencoderKL, UNet2DConditionModel
+    print("Successfully imported models")
+    # Try pipeline-specific imports
+    from diffusers.pipelines.pipeline_utils import DiffusionPipeline, StableDiffusionMixin
+    print("Successfully imported pipeline utils")
+    from diffusers.pipelines.stable_diffusion.pipeline_output import StableDiffusionPipelineOutput
+    print("Successfully imported pipeline output")
+except ImportError as e:
+    print("Import error:", e)
+    import traceback
+    traceback.print_exc()

test_pipeline.py ADDED Viewed

	@@ -0,0 +1,45 @@

+"""
+Simple script to test if our fixes for diffusers compatibility are working.
+This script doesn't use Gradio or the full web interface.
+"""
+import os
+import torch
+import numpy as np
+from PIL import Image
+# Import our custom components
+from unet2d_custom import UNet2DConditionModel
+from pipeline_stable_diffusion_custom import StableDiffusionPipeline
+def main():
+    print("Testing SonicDiffusion pipeline components...")
+    # Check imports
+    print("Imports successful!")
+    # Check if CUDA is available
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    print(f"Using device: {device}")
+    # Try to initialize a pipeline (without loading weights, just to test the class structure)
+    try:
+        # This will just test if the pipeline can be initialized, not if it works correctly
+        print("Testing pipeline initialization...")
+        pipeline = StableDiffusionPipeline(
+            vae=None,
+            text_encoder=None,
+            tokenizer=None,
+            unet=None,
+            scheduler=None,
+            safety_checker=None,
+            feature_extractor=None,
+        )
+        print("Pipeline initialization successful!")
+    except Exception as e:
+        print(f"Error initializing pipeline: {e}")
+    print("Tests completed.")
+if __name__ == "__main__":
+    main()

transformer_2d_custom.py CHANGED Viewed

@@ -11,9 +11,55 @@ from diffusers.configuration_utils import ConfigMixin, register_to_config
 from diffusers.utils import BaseOutput, deprecate, is_torch_version, logging
 from attention_custom import BasicTransformerBlock
-from diffusers.models.embeddings import ImagePositionalEmbeddings, PatchEmbed, PixArtAlphaTextProjection
 from diffusers.models.modeling_utils import ModelMixin
-from diffusers.models.normalization import AdaLayerNormSingle
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name

 from diffusers.utils import BaseOutput, deprecate, is_torch_version, logging
 from attention_custom import BasicTransformerBlock
+# Import embeddings with fallbacks
+try:
+    from diffusers.models.embeddings import ImagePositionalEmbeddings, PatchEmbed, PixArtAlphaTextProjection
+except ImportError:
+    # Define custom classes for compatibility
+    class ImagePositionalEmbeddings(nn.Module):
+        """Custom implementation for compatibility with older diffusers versions."""
+        def __init__(self, *args, **kwargs):
+            super().__init__()
+            self.position_embeddings = nn.Parameter(torch.zeros(1, 1, 1, 1))
+        def forward(self, x):
+            return x + self.position_embeddings
+    class PatchEmbed(nn.Module):
+        """Custom implementation for compatibility with older diffusers versions."""
+        def __init__(self, *args, **kwargs):
+            super().__init__()
+            self.proj = nn.Conv2d(3, 1024, kernel_size=1)
+        def forward(self, x):
+            return self.proj(x).flatten(2).transpose(1, 2)
+    class PixArtAlphaTextProjection(nn.Module):
+        """Custom implementation for compatibility with older diffusers versions."""
+        def __init__(self, *args, **kwargs):
+            super().__init__()
+        def forward(self, x):
+            return x
 from diffusers.models.modeling_utils import ModelMixin
+# Import normalization with fallbacks
+try:
+    from diffusers.models.normalization import AdaLayerNormSingle
+except ImportError:
+    # Define a custom AdaLayerNormSingle
+    class AdaLayerNormSingle(nn.Module):
+        """Custom implementation for compatibility with older diffusers versions."""
+        def __init__(self, embedding_dim, emb_dim=None):
+            super().__init__()
+            self.emb_layer = nn.Linear(emb_dim or embedding_dim, embedding_dim)
+            self.norm = nn.LayerNorm(embedding_dim, elementwise_affine=False)
+        def forward(self, x, emb):
+            shift = self.emb_layer(emb).unsqueeze(1)
+            x = self.norm(x)
+            return x + shift
 logger = logging.get_logger(__name__)  # pylint: disable=invalid-name

unet2d_custom.py CHANGED Viewed

@@ -8,10 +8,32 @@ import torch.nn as nn
 import torch.utils.checkpoint
 from diffusers.configuration_utils import ConfigMixin, register_to_config
-from diffusers.loaders import PeftAdapterMixin, UNet2DConditionLoadersMixin
-#from diffusers.loaders import UNet2DConditionLoadersMixin
-from diffusers.utils import USE_PEFT_BACKEND, BaseOutput, deprecate, logging, scale_lora_layers, unscale_lora_layers
 from diffusers.models.activations import get_activation
 from diffusers.models.attention_processor import (
@@ -22,18 +44,57 @@ from diffusers.models.attention_processor import (
     AttnAddedKVProcessor,
     AttnProcessor,
 )
-from diffusers.models.embeddings import (
-    GaussianFourierProjection,
-    #GLIGENTextBoundingboxProjection,
-    ImageHintTimeEmbedding,
-    ImageProjection,
-    ImageTimeEmbedding,
-    TextImageProjection,
-    TextImageTimeEmbedding,
-    TextTimeEmbedding,
-    TimestepEmbedding,
-    Timesteps,
-)
 from diffusers.models.modeling_utils import ModelMixin
 from unet_2d_blocks_custom import (
@@ -60,6 +121,7 @@ class UNet2DConditionOutput(BaseOutput):
     sample: torch.FloatTensor = None
 class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin, PeftAdapterMixin):
     r"""
     A conditional 2D UNet model that takes a noisy sample, conditional state, and a timestep and returns a sample

 import torch.utils.checkpoint
 from diffusers.configuration_utils import ConfigMixin, register_to_config
+# Modified to handle older diffusers versions (0.21.4)
+try:
+    from diffusers.loaders import PeftAdapterMixin, UNet2DConditionLoadersMixin
+except ImportError:
+    from diffusers.loaders import UNet2DConditionLoadersMixin
+    # Define dummy mixin for backward compatibility
+    class PeftAdapterMixin:
+        """Dummy PeftAdapterMixin for compatibility with older diffusers versions."""
+        pass
+# Check if USE_PEFT_BACKEND is available in diffusers
+try:
+    from diffusers.utils import USE_PEFT_BACKEND, BaseOutput, deprecate, logging, scale_lora_layers, unscale_lora_layers
+except ImportError:
+    from diffusers.utils import BaseOutput, deprecate, logging
+    # Define placeholders for missing utilities
+    USE_PEFT_BACKEND = False
+    def scale_lora_layers(model, weight):
+        """Dummy function for compatibility with older diffusers versions."""
+        pass
+    def unscale_lora_layers(model, weight):
+        """Dummy function for compatibility with older diffusers versions."""
+        pass
 from diffusers.models.activations import get_activation
 from diffusers.models.attention_processor import (
     AttnAddedKVProcessor,
     AttnProcessor,
 )
+try:
+    from diffusers.models.embeddings import (
+        GaussianFourierProjection,
+        GLIGENTextBoundingboxProjection,
+        ImageHintTimeEmbedding,
+        ImageProjection,
+        ImageTimeEmbedding,
+        TextImageProjection,
+        TextImageTimeEmbedding,
+        TextTimeEmbedding,
+        TimestepEmbedding,
+        Timesteps,
+    )
+except ImportError:
+    # For older diffusers versions
+    from diffusers.models.embeddings import (
+        GaussianFourierProjection,
+        ImageProjection,
+        TextTimeEmbedding,
+        TimestepEmbedding,
+        Timesteps,
+    )
+    # Define missing classes for compatibility
+    class GLIGENTextBoundingboxProjection(nn.Module):
+        """Dummy class for compatibility with older diffusers versions."""
+        def __init__(self, positive_len=None, out_dim=None, feature_type=None):
+            super().__init__()
+            self.positive_len = positive_len
+            self.out_dim = out_dim
+            self.feature_type = feature_type
+    class ImageHintTimeEmbedding(nn.Module):
+        """Dummy class for compatibility with older diffusers versions."""
+        def __init__(self, image_embed_dim=None, time_embed_dim=None):
+            super().__init__()
+    class ImageTimeEmbedding(nn.Module):
+        """Dummy class for compatibility with older diffusers versions."""
+        def __init__(self, image_embed_dim=None, time_embed_dim=None):
+            super().__init__()
+    class TextImageProjection(nn.Module):
+        """Dummy class for compatibility with older diffusers versions."""
+        def __init__(self, text_embed_dim=None, image_embed_dim=None, cross_attention_dim=None):
+            super().__init__()
+    class TextImageTimeEmbedding(nn.Module):
+        """Dummy class for compatibility with older diffusers versions."""
+        def __init__(self, text_embed_dim=None, image_embed_dim=None, time_embed_dim=None):
+            super().__init__()
 from diffusers.models.modeling_utils import ModelMixin
 from unet_2d_blocks_custom import (
     sample: torch.FloatTensor = None
+# Modified for compatibility with older diffusers
 class UNet2DConditionModel(ModelMixin, ConfigMixin, UNet2DConditionLoadersMixin, PeftAdapterMixin):
     r"""
     A conditional 2D UNet model that takes a noisy sample, conditional state, and a timestep and returns a sample

unet_2d_blocks_custom.py CHANGED Viewed

@@ -8,24 +8,136 @@ import torch.nn.functional as F
 from torch import nn
 from diffusers.utils import deprecate, is_torch_version, logging
-from diffusers.utils.torch_utils import apply_freeu
 from diffusers.models.activations import get_activation
 from diffusers.models.attention_processor import Attention, AttnAddedKVProcessor, AttnAddedKVProcessor2_0
-from diffusers.models.normalization import AdaGroupNorm
-from diffusers.models.resnet import (
-    Downsample2D,
-    FirDownsample2D,
-    FirUpsample2D,
-    KDownsample2D,
-    KUpsample2D,
-    ResnetBlock2D,
-    ResnetBlockCondNorm2D,
-    Upsample2D,
-)
-from diffusers.models.transformers.dual_transformer_2d import DualTransformer2DModel
 from transformer_2d_custom import Transformer2DModel
 #from diffusers.models.transformers.transformer_2d import Transformer2DModel

 from torch import nn
 from diffusers.utils import deprecate, is_torch_version, logging
+# Import apply_freeu or define it if not available
+try:
+    from diffusers.utils.torch_utils import apply_freeu
+except ImportError:
+    # Define a custom apply_freeu function for compatibility
+    def apply_freeu(
+        feats: torch.Tensor,
+        hidden_states: torch.Tensor,
+        res_hidden_states: torch.Tensor,
+        s1: float,
+        s2: float,
+        b1: float,
+        b2: float,
+    ) -> torch.Tensor:
+        """
+        Custom implementation of FreeU for older diffusers versions.
+        See https://github.com/ChenyangSi/FreeU for more details.
+        Args:
+            feats: Features at the current layer
+            hidden_states: Hidden states from the previous layer
+            res_hidden_states: Residual hidden states from the previous layer
+            s1: Scaling factor for frequency components
+            s2: Scaling factor for frequency components
+            b1: Scaling factor for original hidden states
+            b2: Scaling factor for original hidden states
+        Returns:
+            The processed feature map
+        """
+        if all(param is None for param in [s1, s2, b1, b2]):
+            return hidden_states
+        # Simple implementation that just passes through the hidden states unchanged
+        # This maintains compatibility without the actual FreeU feature
+        return hidden_states
 from diffusers.models.activations import get_activation
 from diffusers.models.attention_processor import Attention, AttnAddedKVProcessor, AttnAddedKVProcessor2_0
+# Handle missing AdaGroupNorm
+try:
+    from diffusers.models.normalization import AdaGroupNorm
+except ImportError:
+    # Define a custom AdaGroupNorm class if it's not available
+    class AdaGroupNorm(nn.Module):
+        """Custom implementation of AdaGroupNorm for compatibility with older diffusers versions."""
+        def __init__(self, embedding_dim, num_groups=32, eps=1e-5):
+            super().__init__()
+            self.num_groups = num_groups
+            self.eps = eps
+            self.embedding_dim = embedding_dim
+            self.linear = nn.Linear(embedding_dim, embedding_dim * 2)
+        def forward(self, x, emb):
+            # Simple implementation that falls back to GroupNorm
+            emb = self.linear(emb)
+            emb = emb[:, :, None, None]
+            scale, shift = emb.chunk(2, dim=1)
+            # Use standard GroupNorm
+            x = nn.functional.group_norm(x, self.num_groups, eps=self.eps)
+            # Apply scale and shift
+            return x * (1 + scale) + shift
+# Import resnet components with fallbacks for older diffusers versions
+try:
+    from diffusers.models.resnet import (
+        Downsample2D,
+        FirDownsample2D,
+        FirUpsample2D,
+        KDownsample2D,
+        KUpsample2D,
+        ResnetBlock2D,
+        ResnetBlockCondNorm2D,
+        Upsample2D,
+    )
+except ImportError:
+    # Import what's available
+    from diffusers.models.resnet import (
+        Downsample2D,
+        FirDownsample2D,
+        FirUpsample2D,
+        KDownsample2D,
+        KUpsample2D,
+        ResnetBlock2D,
+        Upsample2D,
+    )
+    # Define a custom ResnetBlockCondNorm2D class
+    class ResnetBlockCondNorm2D(nn.Module):
+        """
+        Resnet block with conditional normalization for compatibility with older diffusers versions.
+        Args:
+            in_channels (int): Number of input channels.
+            out_channels (int): Number of output channels.
+            temb_channels (int): Number of timestep embedding channels.
+            groups (int, optional): Number of groups for GroupNorm. Defaults to 32.
+            eps (float, optional): Epsilon for GroupNorm. Defaults to 1e-5.
+        """
+        def __init__(
+            self,
+            *args,
+            **kwargs
+        ):
+            super().__init__()
+            # Use ResnetBlock2D as fallback
+            self.block = ResnetBlock2D(*args, **kwargs)
+        def forward(self, hidden_states, temb=None, scale=None):
+            return self.block(hidden_states, temb)
+# Import transformer models
+try:
+    from diffusers.models.transformers.dual_transformer_2d import DualTransformer2DModel
+except ImportError:
+    # Define a custom DualTransformer2DModel for older diffusers versions
+    class DualTransformer2DModel(nn.Module):
+        """Dummy implementation for older diffusers versions"""
+        def __init__(self, *args, **kwargs):
+            super().__init__()
+        def forward(self, *args, **kwargs):
+            raise NotImplementedError("DualTransformer2DModel is not available in this version of diffusers")
+# Use our custom Transformer2DModel
 from transformer_2d_custom import Transformer2DModel
 #from diffusers.models.transformers.transformer_2d import Transformer2DModel